diff options
Diffstat (limited to 'roles')
39 files changed, 1143 insertions, 45 deletions
diff --git a/roles/docker/tasks/systemcontainer_crio.yml b/roles/docker/tasks/systemcontainer_crio.yml index 787f51f94..24ca0d9f8 100644 --- a/roles/docker/tasks/systemcontainer_crio.yml +++ b/roles/docker/tasks/systemcontainer_crio.yml @@ -3,6 +3,15 @@  - set_fact:      l_insecure_crio_registries: "{{ '\"{}\"'.format('\", \"'.join(openshift.docker.insecure_registries)) }}"    when: openshift.docker.insecure_registries +- set_fact: +    l_crio_registries: "{{ openshift.docker.additional_registries + ['docker.io'] }}" +  when: openshift.docker.additional_registries +- set_fact: +    l_crio_registries: "{{ ['docker.io'] }}" +  when: not openshift.docker.additional_registries +- set_fact: +    l_additional_crio_registries: "{{ '\"{}\"'.format('\", \"'.join(l_crio_registries)) }}" +  when: openshift.docker.additional_registries  - name: Ensure container-selinux is installed    package: @@ -88,10 +97,16 @@          l_crio_image_prepend: "docker.io/gscrivano"          l_crio_image_name: "crio-o-fedora" -    - name: Use Centos based image when distribution is Red Hat or CentOS +    - name: Use Centos based image when distribution is CentOS        set_fact:          l_crio_image_name: "cri-o-centos" -      when: ansible_distribution in ['RedHat', 'CentOS'] +      when: ansible_distribution == "CentOS" + +    - name: Use RHEL based image when distribution is Red Hat +      set_fact: +        l_crio_image_prepend: "registry.access.redhat.com" +        l_crio_image_name: "cri-o" +      when: ansible_distribution == "RedHat"      # For https://github.com/openshift/openshift-ansible/pull/4049#discussion_r114478504      - name: Use a testing registry if requested diff --git a/roles/docker/templates/crio.conf.j2 b/roles/docker/templates/crio.conf.j2 index 5b31932b1..b4ee84fd0 100644 --- a/roles/docker/templates/crio.conf.j2 +++ b/roles/docker/templates/crio.conf.j2 @@ -120,6 +120,11 @@ insecure_registries = [  {{ l_insecure_crio_registries|default("") }}  ] +# registries is used to specify a comma separated list of registries to be used +# when pulling an unqualified image (e.g. fedora:rawhide). +registries = [ +{{ l_additional_crio_registries|default("") }} +]  # The "crio.network" table contains settings pertaining to the  # management of CNI plugins.  [crio.network] diff --git a/roles/etcd/templates/etcd.conf.j2 b/roles/etcd/templates/etcd.conf.j2 index ce362c743..2c2803aee 100644 --- a/roles/etcd/templates/etcd.conf.j2 +++ b/roles/etcd/templates/etcd.conf.j2 @@ -11,7 +11,8 @@  ETCD_NAME={{ etcd_hostname }}  ETCD_LISTEN_PEER_URLS={{ etcd_listen_peer_urls }}  ETCD_DATA_DIR={{ etcd_data_dir }} -#ETCD_SNAPSHOT_COUNTER=10000 +#ETCD_WAL_DIR="" +#ETCD_SNAPSHOT_COUNT=10000  ETCD_HEARTBEAT_INTERVAL=500  ETCD_ELECTION_TIMEOUT=2500  ETCD_LISTEN_CLIENT_URLS={{ etcd_listen_client_urls }} @@ -41,24 +42,43 @@ ETCD_INITIAL_CLUSTER_TOKEN={{ etcd_initial_cluster_token }}  #ETCD_DISCOVERY_PROXY=  {% endif %}  ETCD_ADVERTISE_CLIENT_URLS={{ etcd_advertise_client_urls }} +#ETCD_STRICT_RECONFIG_CHECK="false" +#ETCD_AUTO_COMPACTION_RETENTION="0" +#ETCD_ENABLE_V2="true"  #[proxy]  #ETCD_PROXY=off +#ETCD_PROXY_FAILURE_WAIT="5000" +#ETCD_PROXY_REFRESH_INTERVAL="30000" +#ETCD_PROXY_DIAL_TIMEOUT="1000" +#ETCD_PROXY_WRITE_TIMEOUT="5000" +#ETCD_PROXY_READ_TIMEOUT="0"  #[security]  {% if etcd_url_scheme == 'https' -%} -ETCD_CA_FILE={{ etcd_ca_file }} +ETCD_TRUSTED_CA_FILE={{ etcd_ca_file }} +ETCD_CLIENT_CERT_AUTH="true"  ETCD_CERT_FILE={{ etcd_cert_file }}  ETCD_KEY_FILE={{ etcd_key_file }}  {% endif -%} +#ETCD_AUTO_TLS="false"  {% if etcd_peer_url_scheme == 'https' -%} -ETCD_PEER_CA_FILE={{ etcd_peer_ca_file }} +ETCD_PEER_TRUSTED_CA_FILE={{ etcd_peer_ca_file }} +ETCD_PEER_CLIENT_CERT_AUTH="true"  ETCD_PEER_CERT_FILE={{ etcd_peer_cert_file }}  ETCD_PEER_KEY_FILE={{ etcd_peer_key_file }}  {% endif -%} +#ETCD_PEER_AUTO_TLS="false"  #[logging]  ETCD_DEBUG="{{ etcd_debug | default(false) | bool | string }}"  {% if etcd_log_package_levels is defined %}  ETCD_LOG_PACKAGE_LEVELS="{{ etcd_log_package_levels }}"  {% endif %} + +#[profiling] +#ETCD_ENABLE_PPROF="false" +#ETCD_METRICS="basic" +# +#[auth] +#ETCD_AUTH_TOKEN="simple" diff --git a/roles/flannel_register/defaults/main.yaml b/roles/flannel_register/defaults/main.yaml index ddf8230ec..71c8f38c3 100644 --- a/roles/flannel_register/defaults/main.yaml +++ b/roles/flannel_register/defaults/main.yaml @@ -1,7 +1,6 @@  --- -flannel_network: "{{ openshift.common.portal_net | default('172.30.0.0/16', true) }}" -flannel_min_network: 172.30.5.0 -flannel_subnet_len: 24 +flannel_network: "{{ openshift.master.sdn_cluster_network_cidr }}" +flannel_subnet_len: "{{ 32 - openshift.master.sdn_host_subnet_length }}"  flannel_etcd_key: /openshift.com/network  etcd_hosts: "{{ etcd_urls }}"  etcd_conf_dir: "{{ openshift.common.config_base }}/master" diff --git a/roles/flannel_register/templates/flannel-config.json b/roles/flannel_register/templates/flannel-config.json index 89ce4c30b..bba3729fa 100644 --- a/roles/flannel_register/templates/flannel-config.json +++ b/roles/flannel_register/templates/flannel-config.json @@ -1,7 +1,6 @@  {      "Network": "{{ flannel_network }}",      "SubnetLen": {{ flannel_subnet_len }}, -    "SubnetMin": "{{ flannel_min_network }}",      "Backend": {          "Type": "host-gw"       } diff --git a/roles/lib_openshift/library/oc_atomic_container.py b/roles/lib_openshift/library/oc_atomic_container.py index 955c6313e..79bd08f4e 100644 --- a/roles/lib_openshift/library/oc_atomic_container.py +++ b/roles/lib_openshift/library/oc_atomic_container.py @@ -83,7 +83,7 @@ def _install(module, container, image, values_list):      if rc != 0:          return rc, out, err, False      else: -        changed = "Extracting" in out +        changed = "Extracting" in out or "Copying blob" in out          return rc, out, err, changed  def _uninstall(module, name): @@ -127,7 +127,7 @@ def do_update(module, container, old_image, image, values_list):      if rc != 0:          module.fail_json(rc=rc, msg=err)      else: -        changed = "Extracting" in out +        changed = "Extracting" in out or "Copying blob" in out          module.exit_json(msg=out, changed=changed) diff --git a/roles/lib_openshift/src/ansible/oc_atomic_container.py b/roles/lib_openshift/src/ansible/oc_atomic_container.py index 7b81760df..454d7c4b2 100644 --- a/roles/lib_openshift/src/ansible/oc_atomic_container.py +++ b/roles/lib_openshift/src/ansible/oc_atomic_container.py @@ -19,7 +19,7 @@ def _install(module, container, image, values_list):      if rc != 0:          return rc, out, err, False      else: -        changed = "Extracting" in out +        changed = "Extracting" in out or "Copying blob" in out          return rc, out, err, changed  def _uninstall(module, name): @@ -63,7 +63,7 @@ def do_update(module, container, old_image, image, values_list):      if rc != 0:          module.fail_json(rc=rc, msg=err)      else: -        changed = "Extracting" in out +        changed = "Extracting" in out or "Copying blob" in out          module.exit_json(msg=out, changed=changed) diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml index 8aa57e75a..b82c2e602 100644 --- a/roles/openshift_cfme/defaults/main.yml +++ b/roles/openshift_cfme/defaults/main.yml @@ -27,9 +27,6 @@ openshift_cfme_pv_data:  # Tuning parameter to use more than 5 images at once from an ImageStream  openshift_cfme_maxImagesBulkImportedPerRepository: 100 -# Hostname/IP of the NFS server. Currently defaults to first master -openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" -openshift_cfme_nfs_directory: "/exports"  # TODO: Refactor '_install_app' variable. This is just for testing but  # maybe in the future it should control the entire yes/no for CFME.  # diff --git a/roles/openshift_cfme/tasks/nfs.yml b/roles/openshift_cfme/tasks/nfs.yml index 8db45492e..ca04628a8 100644 --- a/roles/openshift_cfme/tasks/nfs.yml +++ b/roles/openshift_cfme/tasks/nfs.yml @@ -1,6 +1,13 @@  ---  # Tasks to statically provision NFS volumes  # Include if not using dynamic volume provisioning + +- name: Set openshift_cfme_nfs_server fact +  when: openshift_cfme_nfs_server is not defined +  set_fact: +    # Hostname/IP of the NFS server. Currently defaults to first master +    openshift_cfme_nfs_server: "{{ oo_nfs_to_config.0 }}" +  - name: Ensure the /exports/ directory exists    file:      path: /exports/ diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index c8769b511..db3c0b654 100644 --- a/roles/openshift_health_checker/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -26,15 +26,13 @@ from ansible.module_utils.six import string_types  YUM_IMPORT_EXCEPTION = None  DNF_IMPORT_EXCEPTION = None -PKG_MGR = None  try:      import yum  # pylint: disable=import-error -    PKG_MGR = "yum"  except ImportError as err:      YUM_IMPORT_EXCEPTION = err +  try:      import dnf  # pylint: disable=import-error -    PKG_MGR = "dnf"  except ImportError as err:      DNF_IMPORT_EXCEPTION = err @@ -51,14 +49,19 @@ def main():      module = AnsibleModule(          argument_spec=dict(              package_list=dict(type="list", required=True), +            package_mgr=dict(type="str", required=True),          ),          supports_check_mode=True      ) -    if YUM_IMPORT_EXCEPTION and DNF_IMPORT_EXCEPTION: +    # determine the package manager to use +    package_mgr = module.params['package_mgr'] +    if package_mgr not in ('yum', 'dnf'): +        module.fail_json(msg="package_mgr must be one of: yum, dnf") +    pkg_mgr_exception = dict(yum=YUM_IMPORT_EXCEPTION, dnf=DNF_IMPORT_EXCEPTION)[package_mgr] +    if pkg_mgr_exception:          module.fail_json( -            msg="aos_version module could not import yum or dnf: %s %s" % -            (YUM_IMPORT_EXCEPTION, DNF_IMPORT_EXCEPTION) +            msg="aos_version module could not import {}: {}".format(package_mgr, pkg_mgr_exception)          )      # determine the packages we will look for @@ -78,7 +81,7 @@ def main():      # get the list of packages available and complain if anything is wrong      try: -        pkgs = _retrieve_available_packages(expected_pkg_names) +        pkgs = _retrieve_available_packages(package_mgr, expected_pkg_names)          if versioned_pkgs:              _check_precise_version_found(pkgs, _to_dict(versioned_pkgs))              _check_higher_version_found(pkgs, _to_dict(versioned_pkgs)) @@ -93,7 +96,7 @@ def _to_dict(pkg_list):      return {pkg["name"]: pkg for pkg in pkg_list} -def _retrieve_available_packages(expected_pkgs): +def _retrieve_available_packages(pkg_mgr, expected_pkgs):      # The openshift excluder prevents unintended updates to openshift      # packages by setting yum excludes on those packages. See:      # https://wiki.centos.org/SpecialInterestGroup/PaaS/OpenShift-Origin-Control-Updates @@ -103,14 +106,15 @@ def _retrieve_available_packages(expected_pkgs):      # be excluded. So, for our purposes here, disable excludes to see      # what will really be available during an install or upgrade. -    if PKG_MGR == "yum": +    if pkg_mgr == "yum":          # search for package versions available for openshift pkgs          yb = yum.YumBase()  # pylint: disable=invalid-name          yb.conf.disable_excludes = ['all']          try: -            pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs) +            pkgs = yb.rpmdb.returnPackages(patterns=expected_pkgs) +            pkgs += yb.pkgSack.returnPackages(patterns=expected_pkgs)          except yum.Errors.PackageSackError as excinfo:              # you only hit this if *none* of the packages are available              raise AosVersionException('\n'.join([ @@ -118,7 +122,7 @@ def _retrieve_available_packages(expected_pkgs):                  'Check your subscription and repo settings.',                  str(excinfo),              ])) -    elif PKG_MGR == "dnf": +    elif pkg_mgr == "dnf":          dbase = dnf.Base()  # pyling: disable=invalid-name          dbase.conf.disable_excludes = ['all'] @@ -127,8 +131,11 @@ def _retrieve_available_packages(expected_pkgs):          dquery = dbase.sack.query()          aquery = dquery.available() +        iquery = dquery.installed() -        pkgs = list(aquery.filter(name=expected_pkgs)) +        available_pkgs = list(aquery.filter(name=expected_pkgs)) +        installed_pkgs = list(iquery.filter(name=expected_pkgs)) +        pkgs = available_pkgs + installed_pkgs          if not pkgs:              # pkgs list is empty, raise because no expected packages found diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 857a80c74..866c74d7c 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -32,6 +32,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):      # we use python-docker-py to check local docker for images, and skopeo      # to look for images available remotely without waiting to pull them.      dependencies = ["python-docker-py", "skopeo"] +    skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false"      def is_active(self):          """Skip hosts with unsupported deployment types.""" @@ -67,8 +68,10 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):                  "failed": True,                  "msg": (                      "One or more required Docker images are not available:\n    {}\n" -                    "Configured registries: {}" -                ).format(",\n    ".join(sorted(unavailable_images)), ", ".join(registries)), +                    "Configured registries: {}\n" +                    "Checked by: {}" +                ).format(",\n    ".join(sorted(unavailable_images)), ", ".join(registries), +                         self.skopeo_img_check_command),              }          return {} @@ -169,8 +172,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):          for registry in registries:              args = { -                "_raw_params": "timeout 10 skopeo inspect --tls-verify=false " -                               "docker://{}/{}".format(registry, image) +                "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)              }              result = self.execute_module("command", args)              if result.get("rc", 0) == 0 and not result.get("failed"): diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 8b780114f..0b795b6c4 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -46,6 +46,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):          check_multi_minor_release = deployment_type in ['openshift-enterprise']          args = { +            "package_mgr": self.get_var("ansible_pkg_mgr"),              "package_list": [                  {                      "name": "openvswitch", diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index 6054d3f3e..e871f39f0 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -5,6 +5,7 @@ from openshift_checks.package_version import PackageVersion, OpenShiftCheckExcep  def task_vars_for(openshift_release, deployment_type):      return dict( +        ansible_pkg_mgr='yum',          openshift=dict(common=dict(service_type=deployment_type)),          openshift_release=openshift_release,          openshift_image_tag='v' + openshift_release, @@ -27,6 +28,7 @@ def test_openshift_version_not_supported():  def test_invalid_openshift_release_format():      task_vars = dict( +        ansible_pkg_mgr='yum',          openshift=dict(common=dict(service_type='origin')),          openshift_image_tag='v0',          openshift_deployment_type='origin', diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 3e424da12..d73c290ff 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -61,6 +61,14 @@      openshift_hosted_registry_env_vars: "{{ openshift_hosted_registry_env_vars | combine({'OPENSHIFT_DEFAULT_REGISTRY':'docker-registry.default.svc:5000'}) }}"    when: openshift_push_via_dns | default(false) | bool +- name: Update registry proxy settings for dc/docker-registry +  set_fact: +    openshift_hosted_registry_env_vars: "{{ {'HTTPS_PROXY': (openshift.common.https_proxy | default('')), +                                             'HTTP_PROXY':  (openshift.common.http_proxy  | default('')), +                                             'NO_PROXY':    (openshift.common.no_proxy    | default(''))} +                                           | combine(openshift_hosted_registry_env_vars) }}" +  when: (openshift.common.https_proxy | default(False)) or (openshift.common.http_proxy | default('')) != '' +  - name: Create the registry service account    oc_serviceaccount:      name: "{{ openshift_hosted_registry_serviceaccount }}" diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index e57ed733e..68ec7233e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -18,6 +18,15 @@      openshift_hosted_router_selector: "{{ openshift.hosted.router.selector | default(None) }}"      openshift_hosted_router_image: "{{ openshift.hosted.router.registryurl }}" +- name: Get the certificate contents for router +  copy: +    backup: True +    dest: "/etc/origin/master/{{ item | basename }}" +    src: "{{ item }}" +  with_items: "{{ openshift_hosted_routers | oo_collect(attribute='certificate') | +                  oo_select_keys_from_list(['keyfile', 'certfile', 'cafile']) }}" +  when: ( not openshift_hosted_router_create_certificate | bool ) or openshift_hosted_router_certificate != {} +  # This is for when we desire a cluster signed cert  # The certificate is generated and placed in master_config_dir/  - block: @@ -43,15 +52,6 @@    # End Block    when: ( openshift_hosted_router_create_certificate | bool ) and openshift_hosted_router_certificate == {} -- name: Get the certificate contents for router -  copy: -    backup: True -    dest: "/etc/origin/master/{{ item | basename }}" -    src: "{{ item }}" -  with_items: "{{ openshift_hosted_routers | oo_collect(attribute='certificate') | -                  oo_select_keys_from_list(['keyfile', 'certfile', 'cafile']) }}" -  when: not openshift_hosted_router_create_certificate | bool -  - name: Create the router service account(s)    oc_serviceaccount:      name: "{{ item.serviceaccount }}" diff --git a/roles/openshift_logging_curator/templates/curator.j2 b/roles/openshift_logging_curator/templates/curator.j2 index 6431f86d9..e74918a40 100644 --- a/roles/openshift_logging_curator/templates/curator.j2 +++ b/roles/openshift_logging_curator/templates/curator.j2 @@ -44,6 +44,8 @@ spec:                cpu: "{{curator_cpu_limit}}"  {% if curator_memory_limit is defined and curator_memory_limit is not none and curator_memory_limit != "" %}                memory: "{{curator_memory_limit}}" +            requests: +              memory: "{{curator_memory_limit}}"  {% endif %}            env:              - diff --git a/roles/openshift_logging_elasticsearch/templates/es.j2 b/roles/openshift_logging_elasticsearch/templates/es.j2 index cbe6b89f2..5f2932541 100644 --- a/roles/openshift_logging_elasticsearch/templates/es.j2 +++ b/roles/openshift_logging_elasticsearch/templates/es.j2 @@ -48,7 +48,7 @@ spec:                cpu: "{{es_cpu_limit}}"  {% endif %}              requests: -              memory: "512Mi" +              memory: "{{es_memory_limit}}"            ports:              -                containerPort: 9200 diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index 88e039e3f..a4afb6618 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -36,6 +36,8 @@ spec:            limits:              cpu: {{ openshift_logging_fluentd_cpu_limit }}              memory: {{ openshift_logging_fluentd_memory_limit }} +          requests: +            memory: {{ openshift_logging_fluentd_memory_limit }}          volumeMounts:          - name: runlogjournal            mountPath: /run/log/journal diff --git a/roles/openshift_logging_kibana/templates/kibana.j2 b/roles/openshift_logging_kibana/templates/kibana.j2 index 512d99d06..da1386d3e 100644 --- a/roles/openshift_logging_kibana/templates/kibana.j2 +++ b/roles/openshift_logging_kibana/templates/kibana.j2 @@ -46,6 +46,8 @@ spec:  {% endif %}  {% if kibana_memory_limit is not none and kibana_memory_limit != "" %}                memory: "{{ kibana_memory_limit }}" +            requests: +              memory: "{{ kibana_memory_limit }}"  {% endif %}  {% endif %}            env: @@ -82,6 +84,8 @@ spec:  {% endif %}  {% if kibana_proxy_memory_limit is not none and kibana_proxy_memory_limit != "" %}                memory: "{{ kibana_proxy_memory_limit }}" +            requests: +              memory: "{{ kibana_proxy_memory_limit }}"  {% endif %}  {% endif %}            ports: diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 70afe5cee..ff18d3270 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -45,6 +45,8 @@ spec:  {% endif %}  {% if mux_memory_limit is not none %}              memory: "{{mux_memory_limit}}" +          requests: +            memory: "{{mux_memory_limit}}"  {% endif %}  {% endif %}          ports: diff --git a/roles/openshift_master/tasks/systemd_units.yml b/roles/openshift_master/tasks/systemd_units.yml index 9f7f9ea4e..7a918c57e 100644 --- a/roles/openshift_master/tasks/systemd_units.yml +++ b/roles/openshift_master/tasks/systemd_units.yml @@ -7,7 +7,7 @@  # openshift_master_config_dir is set.  - name: Set openshift_master_config_dir if unset    set_fact: -    openshift_master_config_dir: '/var/lib/origin' +    openshift_master_config_dir: '/etc/origin/master'    when: openshift_master_config_dir is not defined  # This play may be consumed outside the role, we need to ensure that diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2 index b675a2e4a..7049f7189 100644 --- a/roles/openshift_node/templates/node.yaml.v1.j2 +++ b/roles/openshift_node/templates/node.yaml.v1.j2 @@ -21,8 +21,6 @@ kubeletArguments: {{ openshift.node.kubelet_args | default(None) | to_padded_yam    - remote    container-runtime-endpoint:    - /var/run/crio.sock -  experimental-cri: -  - 'true'    image-service-endpoint:    - /var/run/crio.sock    node-labels: diff --git a/roles/openshift_prometheus/README.md b/roles/openshift_prometheus/README.md new file mode 100644 index 000000000..c5a44bffb --- /dev/null +++ b/roles/openshift_prometheus/README.md @@ -0,0 +1,95 @@ +OpenShift Prometheus +==================== + +OpenShift Prometheus Installation + +Requirements +------------ + + +Role Variables +-------------- + +For default values, see [`defaults/main.yaml`](defaults/main.yaml). + +- `openshift_prometheus_state`: present - install/update. absent - uninstall. + +- `openshift_prometheus_namespace`: project (i.e. namespace) where the components will be +  deployed. + +- `openshift_prometheus_replicas`: The number of replicas for prometheus deployment. + +- `openshift_prometheus_node_selector`: Selector for the nodes prometheus will be deployed on. + +- `openshift_prometheus_image_<COMPONENT>`: specify image for the component  + +## Storage related variables +Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can set pv claim by setting corresponding role variable: +``` +openshift_prometheus_<COMPONENT>_storage_type: <VALUE> +openshift_prometheus_<COMPONENT>_pvc_(name|size|access_modes|pv_selector): <VALUE> +``` +e.g +``` +openshift_prometheus_storage_type: pvc +openshift_prometheus_alertmanager_pvc_name: alertmanager +openshift_prometheus_alertbuffer_pvc_size: 10G +openshift_prometheus_pvc_access_modes: [ReadWriteOnce] +``` + +## Additional Alert Rules file variable +An external file with alert rules can be added by setting path to additional rules variable:  +``` +openshift_prometheus_additional_rules_file: <PATH>  +``` + +File content should be in prometheus alert rules format. +Following example sets rule to fire an alert when one of the cluster nodes is down: + +``` +groups: +- name: example-rules +  interval: 30s # defaults to global interval +  rules: +  - alert: Node Down +    expr: up{job="kubernetes-nodes"} == 0 +    annotations: +      miqTarget: "ContainerNode" +      severity: "HIGH" +      message: "{{ '{{' }}{{ '$labels.instance' }}{{ '}}' }} is down" +``` + + +## Additional variables to control resource limits +Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can specify a cpu and memory limits and requests by setting +the corresponding role variable: +``` +openshift_prometheus_<COMPONENT>_(limits|requests)_(memory|cpu): <VALUE> +``` +e.g +``` +openshift_prometheus_alertmanager_limits_memory: 1Gi +openshift_prometheus_oath_proxy_requests_cpu: 100 +``` + +Dependencies +------------ + +openshift_facts + + +Example Playbook +---------------- + +``` +- name: Configure openshift-prometheus +  hosts: oo_first_master +  roles: +  - role: openshift_prometheus +``` + +License +------- + +Apache License, Version 2.0 + diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml new file mode 100644 index 000000000..18d6a1645 --- /dev/null +++ b/roles/openshift_prometheus/defaults/main.yaml @@ -0,0 +1,74 @@ +--- +# defaults file for openshift_prometheus +openshift_prometheus_state: present + +openshift_prometheus_namespace: prometheus + +openshift_prometheus_replicas: 1 +openshift_prometheus_node_selector: {"region":"infra"} + +# images +openshift_prometheus_image_proxy: "openshift/oauth-proxy:v1.0.0" +openshift_prometheus_image_prometheus: "openshift/prometheus:v2.0.0-dev" +openshift_prometheus_image_alertmanager: "openshift/prometheus-alertmanager:dev" +openshift_prometheus_image_alertbuffer: "ilackarms/message-buffer" + +# additional prometheus rules file +openshift_prometheus_additional_rules_file: null + +# All the required exports +openshift_prometheus_pv_exports: +  - prometheus +  - prometheus-alertmanager +  - prometheus-alertbuffer +# PV template files and their created object names +openshift_prometheus_pv_data: +  - pv_name: prometheus +    pv_template: prom-pv-server.yml +    pv_label: Prometheus Server PV +  - pv_name: prometheus-alertmanager +    pv_template: prom-pv-alertmanager.yml +    pv_label: Prometheus Alertmanager PV +  - pv_name: prometheus-alertbuffer +    pv_template: prom-pv-alertbuffer.yml +    pv_label: Prometheus Alert Buffer PV + +# Hostname/IP of the NFS server. Currently defaults to first master +openshift_prometheus_nfs_server: "{{ groups.nfs.0 }}" + +# storage +openshift_prometheus_storage_type: pvc +openshift_prometheus_pvc_name: prometheus +openshift_prometheus_pvc_size: 10G +openshift_prometheus_pvc_access_modes: [ReadWriteOnce] +openshift_prometheus_pvc_pv_selector: {} + +openshift_prometheus_alertmanager_storage_type: pvc +openshift_prometheus_alertmanager_pvc_name: prometheus-alertmanager +openshift_prometheus_alertmanager_pvc_size: 10G +openshift_prometheus_alertmanager_pvc_access_modes: [ReadWriteOnce] +openshift_prometheus_alertmanager_pvc_pv_selector: {} + +openshift_prometheus_alertbuffer_storage_type: pvc +openshift_prometheus_alertbuffer_pvc_name: prometheus-alertbuffer +openshift_prometheus_alertbuffer_pvc_size: 10G +openshift_prometheus_alertbuffer_pvc_access_modes: [ReadWriteOnce] +openshift_prometheus_alertbuffer_pvc_pv_selector: {} + +# container resources +openshift_prometheus_cpu_limit: null +openshift_prometheus_memory_limit: null +openshift_prometheus_cpu_requests: null +openshift_prometheus_memory_requests: null +openshift_prometheus_alertmanager_cpu_limit: null +openshift_prometheus_alertmanager_memory_limit: null +openshift_prometheus_alertmanager_cpu_requests: null +openshift_prometheus_alertmanager_memory_requests: null +openshift_prometheus_alertbuffer_cpu_limit: null +openshift_prometheus_alertbuffer_memory_limit: null +openshift_prometheus_alertbuffer_cpu_requests: null +openshift_prometheus_alertbuffer_memory_requests: null +openshift_prometheus_oauth_proxy_cpu_limit: null +openshift_prometheus_oauth_proxy_memory_limit: null +openshift_prometheus_oauth_proxy_cpu_requests: null +openshift_prometheus_oauth_proxy_memory_requests: null diff --git a/roles/openshift_prometheus/files/openshift_prometheus.exports b/roles/openshift_prometheus/files/openshift_prometheus.exports new file mode 100644 index 000000000..3ccedb1fd --- /dev/null +++ b/roles/openshift_prometheus/files/openshift_prometheus.exports @@ -0,0 +1,3 @@ +/exports/prometheus *(rw,no_root_squash,no_wdelay) +/exports/prometheus-alertmanager *(rw,no_root_squash,no_wdelay) +/exports/prometheus-alertbuffer *(rw,no_root_squash,no_wdelay) diff --git a/roles/openshift_prometheus/meta/main.yaml b/roles/openshift_prometheus/meta/main.yaml new file mode 100644 index 000000000..33188bb7e --- /dev/null +++ b/roles/openshift_prometheus/meta/main.yaml @@ -0,0 +1,19 @@ +--- +galaxy_info: +  author: OpenShift Development <dev@lists.openshift.redhat.com> +  description: Deploy OpenShift prometheus integration for the cluster +  company: Red Hat, Inc. +  license: license (Apache) +  min_ansible_version: 2.2 +  platforms: +  - name: EL +    versions: +    - 7 +  - name: Fedora +    versions: +    - all +  categories: +  - openshift +dependencies: +- { role: lib_openshift } +- { role: openshift_facts } diff --git a/roles/openshift_prometheus/tasks/create_pvs.yaml b/roles/openshift_prometheus/tasks/create_pvs.yaml new file mode 100644 index 000000000..4e79da05f --- /dev/null +++ b/roles/openshift_prometheus/tasks/create_pvs.yaml @@ -0,0 +1,36 @@ +--- +# Check for existance and then conditionally: +# - evaluate templates +# - PVs +# +# These tasks idempotently create required Prometheus PV objects. Do not +# call this file directly. This file is intended to be ran as an +# include that has a 'with_items' attached to it. Hence the use below +# of variables like "{{ item.pv_label }}" + +- name: "Check if the {{ item.pv_label }} template has been created already" +  oc_obj: +    namespace: "{{ openshift_prometheus_namespace }}" +    state: list +    kind: pv +    name: "{{ item.pv_name }}" +  register: prom_pv_check + +# Skip all of this if the PV already exists +- block: +    - name: "Ensure the {{ item.pv_label }} template is evaluated" +      template: +        src: "{{ item.pv_template }}.j2" +        dest: "{{ tempdir }}/templates/{{ item.pv_template }}" + +    - name: "Ensure {{ item.pv_label }} is created" +      oc_obj: +        namespace: "{{ openshift_prometheus_namespace }}" +        kind: pv +        name: "{{ item.pv_name }}" +        state: present +        delete_after: True +        files: +          - "{{ tempdir }}/templates/{{ item.pv_template }}" +  when: +    - not prom_pv_check.results.results.0 diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml new file mode 100644 index 000000000..93bdda3e8 --- /dev/null +++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml @@ -0,0 +1,241 @@ +--- + +# namespace +- name: Add prometheus project +  oc_project: +    state: "{{ state }}" +    name: "{{ openshift_prometheus_namespace }}" +    node_selector: "{{ openshift_prometheus_node_selector | oo_selector_to_string_list() }}" +    description: Prometheus + +# secrets +- name: Set alert and prometheus secrets +  oc_secret: +    state: "{{ state }}" +    name: "{{ item }}-proxy" +    namespace: "{{ openshift_prometheus_namespace }}" +    contents: +      - path: session_secret +        data: "{{ 43 | oo_random_word }}=" +  with_items: +    - prometheus +    - alerts + +# serviceaccount +- name: create prometheus serviceaccount +  oc_serviceaccount: +    state: "{{ state }}" +    name: prometheus +    namespace: "{{ openshift_prometheus_namespace }}" +    #    TODO add annotations when supproted +    #    annotations: +    #      serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' +    #      serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + +    secrets: +      - prometheus-secrets +  changed_when: no + +# TODO remove this when annotations are supported by oc_serviceaccount +- name: annotate serviceaccount +  command: > +    {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} +    serviceaccount prometheus +    serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' +    serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + + +# create clusterrolebinding for prometheus serviceaccount +- name: Set cluster-reader permissions for prometheus +  oc_adm_policy_user: +    state: "{{ state }}" +    namespace: "{{ openshift_prometheus_namespace }}" +    resource_kind: cluster-role +    resource_name: cluster-reader +    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus" + + +###################################################################### +# NFS +# In the case that we are not running on a cloud provider, volumes must be statically provisioned + +- include: nfs.yaml +  when: not (openshift_cloudprovider_kind is defined and (openshift_cloudprovider_kind == 'aws' or openshift_cloudprovider_kind == 'gce')) + + +# create prometheus and alerts services +# TODO join into 1 task with loop +- name: Create prometheus service +  oc_service: +    state: "{{ state }}" +    name: "{{ item.name }}" +    namespace: "{{ openshift_prometheus_namespace }}" +    selector: +      app: prometheus +    labels: +      name: "{{ item.name }}" +      #    TODO add annotations when supported +      #    annotations: +      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" +    ports: +      - port: 443 +        targetPort: 8443 +  with_items: +    - name: prometheus + +- name: Create alerts service +  oc_service: +    state: "{{ state }}" +    name: "{{ item.name }}" +    namespace: "{{ openshift_prometheus_namespace }}" +    selector: +      app: prometheus +    labels: +      name: "{{ item.name }}" +      #    TODO add annotations when supported +      #    annotations: +      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" +    ports: +      - port: 443 +        targetPort: 9443 +  with_items: +    - name: alerts + + +# Annotate services with secret name +# TODO remove this when annotations are supported by oc_service +- name: annotate prometheus service +  command: > +    {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} +    service prometheus 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls' + +- name: annotate alerts service +  command: > +    {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} +    service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls' + +# create prometheus and alerts routes +- name: create prometheus and alerts routes +  oc_route: +    state: "{{ state }}" +    name: "{{ item.name }}" +    namespace: "{{ openshift_prometheus_namespace }}" +    service_name: "{{ item.name }}" +    tls_termination: reencrypt +  with_items: +    - name: prometheus +    - name: alerts + +# Storage +- name: create prometheus pvc +  oc_pvc: +    namespace: "{{ openshift_prometheus_namespace }}" +    name: "{{ openshift_prometheus_pvc_name }}" +    access_modes: "{{ openshift_prometheus_pvc_access_modes }}" +    volume_capacity: "{{ openshift_prometheus_pvc_size }}" +    selector: "{{ openshift_prometheus_pvc_pv_selector }}" + +- name: create alertmanager pvc +  oc_pvc: +    namespace: "{{ openshift_prometheus_namespace }}" +    name: "{{ openshift_prometheus_alertmanager_pvc_name }}" +    access_modes: "{{ openshift_prometheus_alertmanager_pvc_access_modes }}" +    volume_capacity: "{{ openshift_prometheus_alertmanager_pvc_size }}" +    selector: "{{ openshift_prometheus_alertmanager_pvc_pv_selector }}" + +- name: create alertbuffer pvc +  oc_pvc: +    namespace: "{{ openshift_prometheus_namespace }}" +    name: "{{ openshift_prometheus_alertbuffer_pvc_name }}" +    access_modes: "{{ openshift_prometheus_alertbuffer_pvc_access_modes }}" +    volume_capacity: "{{ openshift_prometheus_alertbuffer_pvc_size }}" +    selector: "{{ openshift_prometheus_alertbuffer_pvc_pv_selector }}" + +# create prometheus deployment +- name: Set prometheus deployment template +  template: +    src: prometheus_deployment.j2 +    dest: "{{ tempdir }}/templates/prometheus.yaml" +  vars: +    namespace: "{{ openshift_prometheus_namespace }}" +    prom_replicas: "{{ openshift_prometheus_replicas }}" + +- name: Set prometheus deployment +  oc_obj: +    state: "{{ state }}" +    name: "prometheus" +    namespace: "{{ openshift_prometheus_namespace }}" +    kind: deployment +    files: +      - "{{ tempdir }}/templates/prometheus.yaml" +    delete_after: true + +# prometheus configmap +# Copy the additional rules file if it is defined +- name: Copy additional rules file to host +  copy: +    src: "{{ openshift_prometheus_additional_rules_file }}" +    dest: "{{ tempdir }}/prometheus.additional.rules" +  when: +    - openshift_prometheus_additional_rules_file is defined +    - openshift_prometheus_additional_rules_file is not none +    - openshift_prometheus_additional_rules_file | trim | length > 0 + +- stat: +    path: "{{ tempdir }}/prometheus.additional.rules" +  register: additional_rules_stat + +# The kubernetes version impacts the prometheus scraping endpoint +# so gathering it before constructing the configmap +- name: get oc version +  oc_version: +  register: oc_version + +- set_fact: +    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" + +- template: +    src: prometheus.yml.j2 +    dest: "{{ tempdir }}/prometheus.yml" +  changed_when: no + +- template: +    src: prometheus.rules.j2 +    dest: "{{ tempdir }}/prometheus.rules" +  changed_when: no + +# In prometheus configmap create "additional.rules" section if file exists +- name: Set prometheus configmap +  oc_configmap: +    state: "{{ state }}" +    name: "prometheus" +    namespace: "{{ openshift_prometheus_namespace }}" +    from_file: +      prometheus.rules: "{{ tempdir }}/prometheus.rules" +      prometheus.additional.rules: "{{ tempdir }}/prometheus.additional.rules" +      prometheus.yml: "{{ tempdir }}/prometheus.yml" +  when: additional_rules_stat.stat.exists == True + +- name: Set prometheus configmap +  oc_configmap: +    state: "{{ state }}" +    name: "prometheus" +    namespace: "{{ openshift_prometheus_namespace }}" +    from_file: +      prometheus.rules: "{{ tempdir }}/prometheus.rules" +      prometheus.yml: "{{ tempdir }}/prometheus.yml" +  when: additional_rules_stat.stat.exists == False + +# alertmanager configmap +- template: +    src: alertmanager.yml.j2 +    dest: "{{ tempdir }}/alertmanager.yml" +  changed_when: no + +- name: Set alertmanager configmap +  oc_configmap: +    state: "{{ state }}" +    name: "prometheus-alerts" +    namespace: "{{ openshift_prometheus_namespace }}" +    from_file: +      alertmanager.yml: "{{ tempdir }}/alertmanager.yml" diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml new file mode 100644 index 000000000..523a64334 --- /dev/null +++ b/roles/openshift_prometheus/tasks/main.yaml @@ -0,0 +1,26 @@ +--- + +- name: Create temp directory for doing work in on target +  command: mktemp -td openshift-prometheus-ansible-XXXXXX +  register: mktemp +  changed_when: False + +- set_fact: +    tempdir: "{{ mktemp.stdout }}" + +- name: Create templates subdirectory +  file: +    state: directory +    path: "{{ tempdir }}/templates" +    mode: 0755 +  changed_when: False + +- include: install_prometheus.yaml +  vars: +    state: "{{ openshift_prometheus_state }}" + +- name: Delete temp directory +  file: +    name: "{{ tempdir }}" +    state: absent +  changed_when: False diff --git a/roles/openshift_prometheus/tasks/nfs.yaml b/roles/openshift_prometheus/tasks/nfs.yaml new file mode 100644 index 000000000..0b45f2cee --- /dev/null +++ b/roles/openshift_prometheus/tasks/nfs.yaml @@ -0,0 +1,44 @@ +--- +# Tasks to statically provision NFS volumes +# Include if not using dynamic volume provisioning +- name: Ensure the /exports/ directory exists +  file: +    path: /exports/ +    state: directory +    mode: 0755 +    owner: root +    group: root + +- name: Ensure the prom-pv0X export directories exist +  file: +    path: "/exports/{{ item }}" +    state: directory +    mode: 0777 +    owner: nfsnobody +    group: nfsnobody +  with_items: "{{ openshift_prometheus_pv_exports }}" + +- name: Ensure the NFS exports for Prometheus PVs exist +  copy: +    src: openshift_prometheus.exports +    dest: /etc/exports.d/openshift_prometheus.exports +  register: nfs_exports_updated + +- name: Ensure the NFS export table is refreshed if exports were added +  command: exportfs -ar +  when: +    - nfs_exports_updated.changed + + +###################################################################### +# Create the required Prometheus PVs. Check out these online docs if you +# need a refresher on includes looping with items: +# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0 +# * http://stackoverflow.com/a/35128533 +# +# TODO: Handle the case where a PV template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- include: create_pvs.yaml +  with_items: "{{ openshift_prometheus_pv_data }}" diff --git a/roles/openshift_prometheus/templates/alertmanager.yml.j2 b/roles/openshift_prometheus/templates/alertmanager.yml.j2 new file mode 100644 index 000000000..6c432a3d0 --- /dev/null +++ b/roles/openshift_prometheus/templates/alertmanager.yml.j2 @@ -0,0 +1,20 @@ +global: + +# The root route on which each incoming alert enters. +route: +  # default route if none match +  receiver: alert-buffer-wh + +  # The labels by which incoming alerts are grouped together. For example, +  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would +  # be batched into a single group. +  # TODO: +  group_by: [] + +  # All the above attributes are inherited by all child routes and can +  # overwritten on each. + +receivers: +- name: alert-buffer-wh +  webhook_configs: +  - url: http://localhost:9099/topics/alerts diff --git a/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 new file mode 100644 index 000000000..55a5e19c3 --- /dev/null +++ b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: prometheus-alertbuffer +  labels: +    storage: prometheus-alertbuffer +spec: +  capacity: +    storage: 15Gi +  accessModes: +    - ReadWriteOnce +  nfs: +    path: /exports/prometheus-alertbuffer +    server: {{ openshift_prometheus_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 new file mode 100644 index 000000000..4ee518735 --- /dev/null +++ b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: prometheus-alertmanager +  labels: +    storage: prometheus-alertmanager +spec: +  capacity: +    storage: 15Gi +  accessModes: +    - ReadWriteOnce +  nfs: +    path: /exports/prometheus-alertmanager +    server: {{ openshift_prometheus_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 new file mode 100644 index 000000000..933bf0f60 --- /dev/null +++ b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: +  name: prometheus +  labels: +    storage: prometheus +spec: +  capacity: +    storage: 15Gi +  accessModes: +    - ReadWriteOnce +  nfs: +    path: /exports/prometheus +    server: {{ openshift_prometheus_nfs_server }} +  persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_prometheus/templates/prometheus.rules.j2 b/roles/openshift_prometheus/templates/prometheus.rules.j2 new file mode 100644 index 000000000..e861dc127 --- /dev/null +++ b/roles/openshift_prometheus/templates/prometheus.rules.j2 @@ -0,0 +1,4 @@ +groups: +- name: example-rules +  interval: 30s # defaults to global interval +  rules: diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2 new file mode 100644 index 000000000..63430f834 --- /dev/null +++ b/roles/openshift_prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,174 @@ +rule_files: +  - 'prometheus.rules' +{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %} +  - 'prometheus.additional.rules' +{% endif %} + + + +# A scrape configuration for running Prometheus on a Kubernetes cluster. +# This uses separate scrape configs for cluster components (i.e. API server, node) +# and services to allow each to use different authentication configs. +# +# Kubernetes labels will be added as Prometheus labels on metrics via the +# `labelmap` relabeling action. + +# Scrape config for API servers. +# +# Kubernetes exposes API servers as endpoints to the default/kubernetes +# service so this uses `endpoints` role and uses relabelling to only keep +# the endpoints associated with the default/kubernetes service using the +# default named port `https`. This works for single API server deployments as +# well as HA API server deployments. +scrape_configs: +- job_name: 'kubernetes-apiservers' + +  kubernetes_sd_configs: +  - role: endpoints + +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + +  # Keep only the default/kubernetes service endpoints for the https port. This +  # will add targets for each API server which Kubernetes adds an endpoint to +  # the default/kubernetes service. +  relabel_configs: +  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +    action: keep +    regex: default;kubernetes;https + +# Scrape config for nodes. +# +# Each node exposes a /metrics endpoint that contains operational metrics for +# the Kubelet and other components. +- job_name: 'kubernetes-nodes' + +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + +  kubernetes_sd_configs: +  - role: node + +  relabel_configs: +  - action: labelmap +    regex: __meta_kubernetes_node_label_(.+) + +# Scrape config for controllers. +# +# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for +# the controllers. +# +# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via +#       endpoints. +- job_name: 'kubernetes-controllers' + +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + +  kubernetes_sd_configs: +  - role: endpoints + +  # Keep only the default/kubernetes service endpoints for the https port, and then +  # set the port to 8444. This is the default configuration for the controllers on OpenShift +  # masters. +  relabel_configs: +  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +    action: keep +    regex: default;kubernetes;https +  - source_labels: [__address__] +    action: replace +    target_label: __address__ +    regex: (.+)(?::\d+) +    replacement: $1:8444 + +# Scrape config for cAdvisor. +# +# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that +# reports container metrics for each running pod. Scrape those by default. +- job_name: 'kubernetes-cadvisor' + +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + +{% if kubernetes_version | float() >= 1.7 | float() %} +  metrics_path: /metrics/cadvisor +{% else %} +  metrics_path: /metrics +{% endif %} + +  kubernetes_sd_configs: +  - role: node + +  relabel_configs: +  - action: labelmap +    regex: __meta_kubernetes_node_label_(.+) + +# Scrape config for service endpoints. +# +# The relabeling allows the actual service scrape endpoint to be configured +# via the following annotations: +# +# * `prometheus.io/scrape`: Only scrape services that have a value of `true` +# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need +# to set this to `https` & most likely set the `tls_config` of the scrape config. +# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. +# * `prometheus.io/port`: If the metrics are exposed on a different port to the +# service then set this appropriately. +- job_name: 'kubernetes-service-endpoints' + +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +    # TODO: this should be per target +    insecure_skip_verify: true + +  kubernetes_sd_configs: +  - role: endpoints + +  relabel_configs: +  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] +    action: keep +    regex: true +  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] +    action: replace +    target_label: __scheme__ +    regex: (https?) +  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] +    action: replace +    target_label: __metrics_path__ +    regex: (.+) +  - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] +    action: replace +    target_label: __address__ +    regex: (.+)(?::\d+);(\d+) +    replacement: $1:$2 +  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] +    action: replace +    target_label: __basic_auth_username__ +    regex: (.+) +  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] +    action: replace +    target_label: __basic_auth_password__ +    regex: (.+) +  - action: labelmap +    regex: __meta_kubernetes_service_label_(.+) +  - source_labels: [__meta_kubernetes_namespace] +    action: replace +    target_label: kubernetes_namespace +  - source_labels: [__meta_kubernetes_service_name] +    action: replace +    target_label: kubernetes_name + +alerting: +  alertmanagers: +  - scheme: http +    static_configs: +    - targets: +      - "localhost:9093" diff --git a/roles/openshift_prometheus/templates/prometheus_deployment.j2 b/roles/openshift_prometheus/templates/prometheus_deployment.j2 new file mode 100644 index 000000000..98c117f19 --- /dev/null +++ b/roles/openshift_prometheus/templates/prometheus_deployment.j2 @@ -0,0 +1,240 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: +  name: prometheus +  namespace: {{ namespace }} +  labels: +    app: prometheus +spec: +  replicas: {{ prom_replicas|default(1) }} +  selector: +    provider: openshift +    matchLabels: +      app: prometheus +  template: +    metadata: +      name: prometheus +      labels: +        app: prometheus +    spec: +      serviceAccountName: prometheus +{% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %} +      nodeSelector: +{% for key, value in openshift_prometheus_node_selector.iteritems() %} +        {{key}}: "{{value}}" +{% endfor %} +{% endif %} +      containers: +      # Deploy Prometheus behind an oauth proxy +      - name: prom-proxy +        image: "{{ openshift_prometheus_image_proxy }}" +        imagePullPolicy: IfNotPresent +        resources: +          requests: +{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %} +            memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %} +            cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}" +{% endif %} +          limits: +{% if openshift_prometheus_memory_requests_limit_proxy is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %} +            memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %} +            cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}" +{% endif %} +        ports: +        - containerPort: 8443 +          name: web +        args: +        - -provider=openshift +        - -https-address=:8443 +        - -http-address= +        - -email-domain=* +        - -upstream=http://localhost:9090 +        - -client-id=system:serviceaccount:{{ namespace }}:prometheus +        - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' +        - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' +        - -tls-cert=/etc/tls/private/tls.crt +        - -tls-key=/etc/tls/private/tls.key +        - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token +        - -cookie-secret-file=/etc/proxy/secrets/session_secret +        - -skip-auth-regex=^/metrics +        volumeMounts: +        - mountPath: /etc/tls/private +          name: prometheus-tls +        - mountPath: /etc/proxy/secrets +          name: prometheus-secrets +        - mountPath: /prometheus +          name: prometheus-data + +      - name: prometheus +        args: +        - --storage.tsdb.retention=6h +        - --config.file=/etc/prometheus/prometheus.yml +        - --web.listen-address=localhost:9090 +        image: "{{ openshift_prometheus_image_prometheus }}" +        imagePullPolicy: IfNotPresent +        resources: +          requests: +{% if openshift_prometheus_memory_requests is defined and openshift_prometheus_memory_requests is not none %} +            memory: "{{openshift_prometheus_memory_requests}}" +{% endif %} +{% if openshift_prometheus_cpu_requests is defined and openshift_prometheus_cpu_requests is not none %} +            cpu: "{{openshift_prometheus_cpu_requests}}" +{% endif %} +          limits: +{% if openshift_prometheus_memory_limit is defined and openshift_prometheus_memory_limit is not none %} +            memory: "{{ openshift_prometheus_memory_limit }}" +{% endif %} +{% if openshift_prometheus_cpu_limit is defined and openshift_prometheus_cpu_limit is not none %} +            cpu: "{{openshift_prometheus_cpu_limit}}" +{% endif %} + +        volumeMounts: +        - mountPath: /etc/prometheus +          name: prometheus-config +        - mountPath: /prometheus +          name: prometheus-data + +      # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy +      - name: alerts-proxy +        image: "{{ openshift_prometheus_image_proxy }}" +        imagePullPolicy: IfNotPresent +        resources: +          requests: +{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %} +            memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %} +            cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}" +{% endif %} +          limits: +{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %} +            memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %} +            cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}" +{% endif %} +        ports: +        - containerPort: 9443 +          name: web +        args: +        - -provider=openshift +        - -https-address=:9443 +        - -http-address= +        - -email-domain=* +        - -upstream=http://localhost:9099 +        - -client-id=system:serviceaccount:{{ namespace }}:prometheus +        - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' +        - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' +        - -tls-cert=/etc/tls/private/tls.crt +        - -tls-key=/etc/tls/private/tls.key +        - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token +        - -cookie-secret-file=/etc/proxy/secrets/session_secret +        volumeMounts: +        - mountPath: /etc/tls/private +          name: alerts-tls +        - mountPath: /etc/proxy/secrets +          name: alerts-secrets + +      - name: alert-buffer +        args: +        - --storage-path=/alert-buffer/messages.db +        image: "{{ openshift_prometheus_image_alertbuffer }}" +        imagePullPolicy: IfNotPresent +        resources: +          requests: +{% if openshift_prometheus_alertbuffer_memory_requests is defined and openshift_prometheus_alertbuffer_memory_requests is not none %} +            memory: "{{openshift_prometheus_alertbuffer_memory_requests}}" +{% endif %} +{% if openshift_prometheus_alertbuffer_cpu_requests is defined and openshift_prometheus_alertbuffer_cpu_requests is not none %} +            cpu: "{{openshift_prometheus_alertbuffer_cpu_requests}}" +{% endif %} +          limits: +{% if openshift_prometheus_alertbuffer_memory_limit is defined and openshift_prometheus_alertbuffer_memory_limit is not none %} +            memory: "{{openshift_prometheus_alertbuffer_memory_limit}}" +{% endif %} +{% if openshift_prometheus_alertbuffer_cpu_limit is defined and openshift_prometheus_alertbuffer_cpu_limit is not none %} +            cpu: "{{openshift_prometheus_alertbuffer_cpu_limit}}" +{% endif %} +        volumeMounts: +        - mountPath: /alert-buffer +          name: alert-buffer-data +        ports: +        - containerPort: 9099 +          name: alert-buf + +      - name: alertmanager +        args: +        - -config.file=/etc/alertmanager/alertmanager.yml +        image: "{{ openshift_prometheus_image_alertmanager }}" +        imagePullPolicy: IfNotPresent +        resources: +          requests: +{% if openshift_prometheus_alertmanager_memory_requests is defined and openshift_prometheus_alertmanager_memory_requests is not none %} +            memory: "{{openshift_prometheus_alertmanager_memory_requests}}" +{% endif %} +{% if openshift_prometheus_alertmanager_cpu_requests is defined and openshift_prometheus_alertmanager_cpu_requests is not none %} +            cpu: "{{openshift_prometheus_alertmanager_cpu_requests}}" +{% endif %} +          limits: +{% if openshift_prometheus_alertmanager_memory_limit is defined and openshift_prometheus_alertmanager_memory_limit is not none %} +            memory: "{{openshift_prometheus_alertmanager_memory_limit}}" +{% endif %} +{% if openshift_prometheus_alertmanager_cpu_limit is defined and openshift_prometheus_alertmanager_cpu_limit is not none %} +            cpu: "{{openshift_prometheus_alertmanager_cpu_limit}}" +{% endif %} +        ports: +        - containerPort: 9093 +          name: web +        volumeMounts: +        - mountPath: /etc/alertmanager +          name: alertmanager-config +        - mountPath: /alertmanager +          name: alertmanager-data + +      restartPolicy: Always +      volumes: +      - name: prometheus-config +        configMap: +          defaultMode: 420 +          name: prometheus +      - name: prometheus-secrets +        secret: +          secretName: prometheus-proxy +      - name: prometheus-tls +        secret: +          secretName: prometheus-tls +      - name: prometheus-data +{% if openshift_prometheus_storage_type == 'pvc' %} +        persistentVolumeClaim: +          claimName: {{ openshift_prometheus_pvc_name }} +{% else %} +        emptydir: {} +{% endif %} +      - name: alertmanager-config +        configMap: +          defaultMode: 420 +          name: prometheus-alerts +      - name: alerts-secrets +        secret: +          secretName: alerts-proxy +      - name: alerts-tls +        secret: +          secretName: prometheus-alerts-tls +      - name: alertmanager-data +{% if openshift_prometheus_alertmanager_storage_type == 'pvc' %} +        persistentVolumeClaim: +          claimName: {{ openshift_prometheus_alertmanager_pvc_name }} +{% else %} +        emptydir: {} +{% endif %} +      - name: alert-buffer-data +{% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %} +        persistentVolumeClaim: +          claimName: {{ openshift_prometheus_alertbuffer_pvc_name }} +{% else %} +        emptydir: {} +{% endif %} diff --git a/roles/openshift_prometheus/tests/inventory b/roles/openshift_prometheus/tests/inventory new file mode 100644 index 000000000..878877b07 --- /dev/null +++ b/roles/openshift_prometheus/tests/inventory @@ -0,0 +1,2 @@ +localhost + diff --git a/roles/openshift_prometheus/tests/test.yaml b/roles/openshift_prometheus/tests/test.yaml new file mode 100644 index 000000000..37baf573c --- /dev/null +++ b/roles/openshift_prometheus/tests/test.yaml @@ -0,0 +1,5 @@ +--- +- hosts: localhost +  remote_user: root +  roles: +    - openshift_prometheus  | 
