diff options
| author | Mangirdas <m.judeikis@gmail.com> | 2018-01-27 08:05:31 +0000 | 
|---|---|---|
| committer | Mangirdas <m.judeikis@gmail.com> | 2018-01-28 08:38:36 +0000 | 
| commit | 3de29f6d5a3017b57c553c5e2fb63a50994df840 (patch) | |
| tree | 8d19dd62d0375ce989751e612f32486b72e82e9c | |
| parent | a24ccff0423ca25bfcb1a3d9f79470aae5948d66 (diff) | |
| download | openshift-3de29f6d5a3017b57c553c5e2fb63a50994df840.tar.gz openshift-3de29f6d5a3017b57c553c5e2fb63a50994df840.tar.bz2 openshift-3de29f6d5a3017b57c553c5e2fb63a50994df840.tar.xz openshift-3de29f6d5a3017b57c553c5e2fb63a50994df840.zip  | |
Rebase Prometheus example for new scrape endpoints and expose alert manager
| -rw-r--r-- | playbooks/openshift-prometheus/private/uninstall.yml | 8 | ||||
| -rw-r--r-- | playbooks/openshift-prometheus/uninstall.yml | 2 | ||||
| -rw-r--r-- | roles/openshift_prometheus/defaults/main.yaml | 15 | ||||
| -rw-r--r-- | roles/openshift_prometheus/tasks/facts.yaml | 10 | ||||
| -rw-r--r-- | roles/openshift_prometheus/tasks/install_prometheus.yaml | 119 | ||||
| -rw-r--r-- | roles/openshift_prometheus/tasks/main.yaml | 4 | ||||
| -rw-r--r-- | roles/openshift_prometheus/tasks/uninstall.yaml (renamed from roles/openshift_prometheus/tasks/uninstall_prometheus.yaml) | 0 | ||||
| -rw-r--r-- | roles/openshift_prometheus/templates/prometheus.j2 | 92 | ||||
| -rw-r--r-- | roles/openshift_prometheus/templates/prometheus.yml.j2 | 175 | 
9 files changed, 283 insertions, 142 deletions
diff --git a/playbooks/openshift-prometheus/private/uninstall.yml b/playbooks/openshift-prometheus/private/uninstall.yml new file mode 100644 index 000000000..2df39c2a8 --- /dev/null +++ b/playbooks/openshift-prometheus/private/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall Prometheus +  hosts: masters[0] +  tasks: +  - name: Run the Prometheus Uninstall Role Tasks +    include_role: +      name: openshift_prometheus +      tasks_from: uninstall diff --git a/playbooks/openshift-prometheus/uninstall.yml b/playbooks/openshift-prometheus/uninstall.yml new file mode 100644 index 000000000..c92ade786 --- /dev/null +++ b/playbooks/openshift-prometheus/uninstall.yml @@ -0,0 +1,2 @@ +--- +- import_playbook: private/uninstall.yml diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml index 1b21c4739..37a05f3f0 100644 --- a/roles/openshift_prometheus/defaults/main.yaml +++ b/roles/openshift_prometheus/defaults/main.yaml @@ -7,9 +7,24 @@ openshift_prometheus_namespace: openshift-metrics  # defaults hosts for routes  openshift_prometheus_hostname: prometheus-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}  openshift_prometheus_alerts_hostname: alerts-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} +openshift_prometheus_alertmanager_hostname: alertmanager-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} +  openshift_prometheus_node_selector: {"region":"infra"} +openshift_prometheus_service_port: 443 +openshift_prometheus_service_targetport: 8443 +openshift_prometheus_service_name: prometheus +openshift_prometheus_alerts_service_targetport: 9443 +openshift_prometheus_alerts_service_name: alerts +openshift_prometheus_alertmanager_service_targetport: 10443 +openshift_prometheus_alertmanager_service_name: alertmanager +openshift_prometheus_serviceaccount_annotations: [] +l_openshift_prometheus_serviceaccount_annotations: +  - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' +  - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' +  - serviceaccounts.openshift.io/oauth-redirectreference.alertmanager='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alertmanager"}}' +  # additional prometheus rules file  openshift_prometheus_additional_rules_file: null diff --git a/roles/openshift_prometheus/tasks/facts.yaml b/roles/openshift_prometheus/tasks/facts.yaml new file mode 100644 index 000000000..214089732 --- /dev/null +++ b/roles/openshift_prometheus/tasks/facts.yaml @@ -0,0 +1,10 @@ +--- +# The kubernetes version impacts the prometheus scraping endpoint +# so gathering it before constructing the configmap +- name: get oc version +  oc_version: +  register: oc_version + +- set_fact: +    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" +    openshift_prometheus_serviceaccount_annotations: "{{ l_openshift_prometheus_serviceaccount_annotations + openshift_prometheus_serviceaccount_annotations|list }}" diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml index 749df5152..0b565502f 100644 --- a/roles/openshift_prometheus/tasks/install_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml @@ -1,4 +1,6 @@  --- +# set facts +- include_tasks: facts.yaml  # namespace  - name: Add prometheus project @@ -9,7 +11,7 @@      description: Prometheus  # secrets -- name: Set alert and prometheus secrets +- name: Set alert, alertmanager and prometheus secrets    oc_secret:      state: present      name: "{{ item }}-proxy" @@ -20,30 +22,24 @@    with_items:      - prometheus      - alerts +    - alertmanager  # serviceaccount  - name: create prometheus serviceaccount    oc_serviceaccount:      state: present -    name: prometheus +    name: "{{ openshift_prometheus_service_name }}"      namespace: "{{ openshift_prometheus_namespace }}" -    #    TODO add annotations when supproted -    #    annotations: -    #      serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' -    #      serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - -    secrets: -      - prometheus-secrets    changed_when: no +  # TODO remove this when annotations are supported by oc_serviceaccount  - name: annotate serviceaccount    command: >      {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} -    serviceaccount prometheus -    serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' -    serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - +    serviceaccount {{ openshift_prometheus_service_name }} {{ item }} +  with_items: +    "{{ openshift_prometheus_serviceaccount_annotations }}"  # create clusterrolebinding for prometheus serviceaccount  - name: Set cluster-reader permissions for prometheus @@ -52,63 +48,61 @@      namespace: "{{ openshift_prometheus_namespace }}"      resource_kind: cluster-role      resource_name: cluster-reader -    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus" +    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:{{ openshift_prometheus_service_name }}" + -# create prometheus and alerts services -# TODO join into 1 task with loop -- name: Create prometheus service +- name: create services for prometheus    oc_service: -    state: present -    name: "{{ item.name }}" +    name: "{{ openshift_prometheus_service_name }}"      namespace: "{{ openshift_prometheus_namespace }}" -    selector: -      app: prometheus      labels: -      name: "{{ item.name }}" -      #    TODO add annotations when supported -      #    annotations: -      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" +      name: prometheus +    annotations: +      oprometheus.io/scrape: 'true' +      oprometheus.io/scheme: https +      service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls      ports: -      - port: 443 -        targetPort: 8443 -  with_items: -    - name: prometheus +      - name: prometheus +        port: "{{ openshift_prometheus_service_port }}" +        targetPort: "{{ openshift_prometheus_service_targetport }}" +        protocol: TCP +    selector: +      app: prometheus -- name: Create alerts service +- name: create services for alert buffer    oc_service: -    state: present -    name: "{{ item.name }}" +    name: "{{ openshift_prometheus_alerts_service_name }}"      namespace: "{{ openshift_prometheus_namespace }}" +    labels: +      name: prometheus +    annotations: +      service.alpha.openshift.io/serving-cert-secret-name: alerts-tls +    ports: +      - name: prometheus +        port: "{{ openshift_prometheus_service_port }}" +        targetPort: "{{ openshift_prometheus_alerts_service_targetport }}" +        protocol: TCP      selector:        app: prometheus + +- name: create services for alertmanager +  oc_service: +    name: "{{ openshift_prometheus_alertmanager_service_name }}" +    namespace: "{{ openshift_prometheus_namespace }}"      labels: -      name: "{{ item.name }}" -      #    TODO add annotations when supported -      #    annotations: -      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" +      name: prometheus +    annotations: +      service.alpha.openshift.io/serving-cert-secret-name: alertmanager-tls      ports: -      - port: 443 -        targetPort: 9443 -  with_items: -    - name: alerts - - -# Annotate services with secret name -# TODO remove this when annotations are supported by oc_service -- name: annotate prometheus service -  command: > -    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} -    service prometheus -    prometheus.io/scrape='true' -    prometheus.io/scheme=https -    service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls - -- name: annotate alerts service -  command: > -    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} -    service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls' +      - name: prometheus +        port: "{{ openshift_prometheus_service_port }}" +        targetPort: "{{ openshift_prometheus_alertmanager_service_targetport }}" +        protocol: TCP +    selector: +      app: prometheus  # create prometheus and alerts routes +# TODO: oc_route module should support insecureEdgeTerminationPolicy: Redirect  - name: create prometheus and alerts routes    oc_route:      state: present @@ -122,6 +116,8 @@        host: "{{ openshift_prometheus_hostname }}"      - name: alerts        host: "{{ openshift_prometheus_alerts_hostname }}" +    - name: alertmanager +      host: "{{ openshift_prometheus_alertmanager_hostname }}"  # Storage  - name: create prometheus pvc @@ -169,15 +165,6 @@      path: "{{ tempdir }}/prometheus.additional.rules"    register: additional_rules_stat -# The kubernetes version impacts the prometheus scraping endpoint -# so gathering it before constructing the configmap -- name: get oc version -  oc_version: -  register: oc_version - -- set_fact: -    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" -  - template:      src: prometheus.yml.j2      dest: "{{ tempdir }}/prometheus.yml" @@ -219,7 +206,7 @@  - name: Set alertmanager configmap    oc_configmap:      state: present -    name: "prometheus-alerts" +    name: "alertmanager"      namespace: "{{ openshift_prometheus_namespace }}"      from_file:        alertmanager.yml: "{{ tempdir }}/alertmanager.yml" diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml index b859eb111..66d65a3f2 100644 --- a/roles/openshift_prometheus/tasks/main.yaml +++ b/roles/openshift_prometheus/tasks/main.yaml @@ -16,9 +16,11 @@  - name: Create templates subdirectory    file:      state: directory -    path: "{{ tempdir }}/templates" +    path: "{{ tempdir }}/{{ item }}"      mode: 0755    changed_when: False +  with_items: +    - templates  - include_tasks: install_prometheus.yaml    when: openshift_prometheus_state == 'present' diff --git a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml b/roles/openshift_prometheus/tasks/uninstall.yaml index d746402db..d746402db 100644 --- a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/uninstall.yaml diff --git a/roles/openshift_prometheus/templates/prometheus.j2 b/roles/openshift_prometheus/templates/prometheus.j2 index d780550b8..c0abd483b 100644 --- a/roles/openshift_prometheus/templates/prometheus.j2 +++ b/roles/openshift_prometheus/templates/prometheus.j2 @@ -19,7 +19,7 @@ spec:        labels:          app: prometheus      spec: -      serviceAccountName: prometheus +      serviceAccountName: "{{ openshift_prometheus_service_name }}"  {% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}        nodeSelector:  {% for key, value in openshift_prometheus_node_selector.items() %} @@ -47,15 +47,15 @@ spec:              cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"  {% endif %}          ports: -        - containerPort: 8443 +        - containerPort: {{ openshift_prometheus_service_targetport }}            name: web          args:          - -provider=openshift -        - -https-address=:8443 +        - -https-address=:{{ openshift_prometheus_service_targetport }}          - -http-address=          - -email-domain=*          - -upstream=http://localhost:9090 -        - -client-id=system:serviceaccount:{{ namespace }}:prometheus +        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}          - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'          - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'          - -tls-cert=/etc/tls/private/tls.crt @@ -67,9 +67,9 @@ spec:          - -skip-auth-regex=^/metrics          volumeMounts:          - mountPath: /etc/tls/private -          name: prometheus-tls +          name: prometheus-tls-secret          - mountPath: /etc/proxy/secrets -          name: prometheus-secrets +          name: prometheus-proxy-secret          - mountPath: /prometheus            name: prometheus-data @@ -104,7 +104,7 @@ spec:          - mountPath: /prometheus            name: prometheus-data -      # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy +      # Deploy alert-buffer behind oauth alerts-proxy        - name: alerts-proxy          image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}"          imagePullPolicy: IfNotPresent @@ -124,15 +124,15 @@ spec:              cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"  {% endif %}          ports: -        - containerPort: 9443 +        - containerPort: {{ openshift_prometheus_alerts_service_targetport }}            name: web          args:          - -provider=openshift -        - -https-address=:9443 +        - -https-address=:{{ openshift_prometheus_alerts_service_targetport }}          - -http-address=          - -email-domain=*          - -upstream=http://localhost:9099 -        - -client-id=system:serviceaccount:{{ namespace }}:prometheus +        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}          - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'          - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'          - -tls-cert=/etc/tls/private/tls.crt @@ -143,9 +143,9 @@ spec:          - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt          volumeMounts:          - mountPath: /etc/tls/private -          name: alerts-tls +          name: alerts-tls-secret          - mountPath: /etc/proxy/secrets -          name: alerts-secrets +          name: alerts-proxy-secret        - name: alert-buffer          args: @@ -169,11 +169,54 @@ spec:  {% endif %}          volumeMounts:          - mountPath: /alert-buffer -          name: alert-buffer-data +          name: alerts-data          ports:          - containerPort: 9099            name: alert-buf +      # Deploy alertmanager behind oauth alertmanager-proxy +      - name: alertmanager-proxy +        image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}" +        imagePullPolicy: IfNotPresent +        requests: +{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %} +          memory: "{{ openshift_prometheus_oauth_proxy_memory_requests }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %} +          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_requests }}" +{% endif %} +        limits: +{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %} +          memory: "{{ openshift_prometheus_oauth_proxy_memory_limit }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %} +          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" +{% endif %} +        ports: +        - containerPort: {{ openshift_prometheus_alertmanager_service_targetport }} +          name: web +        args: +        - -provider=openshift +        - -https-address=:{{ openshift_prometheus_alertmanager_service_targetport }} +        - -http-address= +        - -email-domain=* +        - -upstream=http://localhost:9093 +        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} +        - -openshift-ca=/etc/pki/tls/cert.pem +        - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt +        - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' +        - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' +        - -tls-cert=/etc/tls/private/tls.crt +        - -tls-key=/etc/tls/private/tls.key +        - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token +        - -cookie-secret-file=/etc/proxy/secrets/session_secret +        - -skip-auth-regex=^/metrics +        volumeMounts: +        - mountPath: /etc/tls/private +          name: alertmanager-tls-secret +        - mountPath: /etc/proxy/secrets +          name: alertmanager-proxy-secret +        - name: alertmanager          args:          - -config.file=/etc/alertmanager/alertmanager.yml @@ -205,14 +248,15 @@ spec:        restartPolicy: Always        volumes: +              - name: prometheus-config          configMap:            defaultMode: 420            name: prometheus -      - name: prometheus-secrets +      - name: prometheus-proxy-secret          secret:            secretName: prometheus-proxy -      - name: prometheus-tls +      - name: prometheus-tls-secret          secret:            secretName: prometheus-tls        - name: prometheus-data @@ -225,13 +269,19 @@ spec:        - name: alertmanager-config          configMap:            defaultMode: 420 -          name: prometheus-alerts -      - name: alerts-secrets +          name: alertmanager +      - name: alertmanager-proxy-secret          secret: -          secretName: alerts-proxy -      - name: alerts-tls +          secretName: alertmanager-proxy   +      - name: alertmanager-tls-secret +        secret: +          secretName: alertmanager-tls  +      - name: alerts-tls-secret          secret: -          secretName: prometheus-alerts-tls +          secretName: alerts-tls +      - name: alerts-proxy-secret +        secret: +          secretName: alerts-proxy        - name: alertmanager-data  {% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}          persistentVolumeClaim: @@ -239,7 +289,7 @@ spec:  {% else %}          emptydir: {}  {% endif %} -      - name: alert-buffer-data +      - name: alerts-data  {% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}          persistentVolumeClaim:            claimName: {{ openshift_prometheus_alertbuffer_pvc_name }} diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2 index 63430f834..005c2c564 100644 --- a/roles/openshift_prometheus/templates/prometheus.yml.j2 +++ b/roles/openshift_prometheus/templates/prometheus.yml.j2 @@ -1,10 +1,5 @@  rule_files: -  - 'prometheus.rules' -{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %} -  - 'prometheus.additional.rules' -{% endif %} - - +  - '*.rules'  # A scrape configuration for running Prometheus on a Kubernetes cluster.  # This uses separate scrape configs for cluster components (i.e. API server, node) @@ -39,31 +34,11 @@ scrape_configs:      action: keep      regex: default;kubernetes;https -# Scrape config for nodes. -# -# Each node exposes a /metrics endpoint that contains operational metrics for -# the Kubelet and other components. -- job_name: 'kubernetes-nodes' - -  scheme: https -  tls_config: -    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt -  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - -  kubernetes_sd_configs: -  - role: node - -  relabel_configs: -  - action: labelmap -    regex: __meta_kubernetes_node_label_(.+) -  # Scrape config for controllers.  #  # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for  # the controllers.  # -# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via -#       endpoints.  - job_name: 'kubernetes-controllers'    scheme: https @@ -87,6 +62,27 @@ scrape_configs:      regex: (.+)(?::\d+)      replacement: $1:8444 +# Scrape config for nodes. +# +# Each node exposes a /metrics endpoint that contains operational metrics for +# the Kubelet and other components. +- job_name: 'kubernetes-nodes' +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token +  kubernetes_sd_configs: +  - role: node +  # Drop a very high cardinality metric that is incorrect in 3.7. It will be +  # fixed in 3.9. +  metric_relabel_configs: +  - source_labels: [__name__] +    action: drop +    regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)' +  relabel_configs: +  - action: labelmap +    regex: __meta_kubernetes_node_label_(.+) +  # Scrape config for cAdvisor.  #  # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that @@ -107,6 +103,14 @@ scrape_configs:    kubernetes_sd_configs:    - role: node +  # Exclude a set of high cardinality metrics that can contribute to significant +  # memory use in large clusters. These can be selectively enabled as necessary +  # for medium or small clusters. +  metric_relabel_configs: +  - source_labels: [__name__] +    action: drop +    regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))' +    relabel_configs:    - action: labelmap      regex: __meta_kubernetes_node_label_(.+) @@ -133,38 +137,101 @@ scrape_configs:    - role: endpoints    relabel_configs: -  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] -    action: keep -    regex: true -  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] -    action: replace -    target_label: __scheme__ -    regex: (https?) -  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] +    # only scrape infrastructure components +    - source_labels: [__meta_kubernetes_namespace] +      action: keep +      regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+' +    # drop infrastructure components managed by other scrape targets +    - source_labels: [__meta_kubernetes_service_name] +      action: drop +      regex: 'prometheus-node-exporter' +    # only those that have requested scraping +    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] +      action: keep +      regex: true +    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] +      action: replace +      target_label: __scheme__ +      regex: (https?) +    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] +      action: replace +      target_label: __metrics_path__ +      regex: (.+) +    - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] +      action: replace +      target_label: __address__ +      regex: (.+)(?::\d+);(\d+) +      replacement: $1:$2 +    - action: labelmap +      regex: __meta_kubernetes_service_label_(.+) +    - source_labels: [__meta_kubernetes_namespace] +      action: replace +      target_label: kubernetes_namespace +    - source_labels: [__meta_kubernetes_service_name] +      action: replace +      target_label: kubernetes_name + +# Scrape config for node-exporter, which is expected to be running on port 9100. +- job_name: 'kubernetes-nodes-exporter' + +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +  kubernetes_sd_configs: +  - role: node + +  metric_relabel_configs: +  - source_labels: [__name__] +    action: drop +    regex: 'node_cpu|node_(disk|scrape_collector)_.+' +  # preserve a subset of the network, netstat, vmstat, and filesystem series +  - source_labels: [__name__]      action: replace -    target_label: __metrics_path__ -    regex: (.+) -  - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] +    regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))' +    target_label: __name__ +    replacement: renamed_$1 +  - source_labels: [__name__] +    action: drop +    regex: 'node_(netstat|vmstat|filesystem|network)_.+' +  - source_labels: [__name__]      action: replace +    regex: 'renamed_(.+)' +    target_label: __name__ +    replacement: $1 +  # drop any partial expensive series +  - source_labels: [__name__, device] +    action: drop +    regex: 'node_network_.+;veth.+' +  - source_labels: [__name__, mountpoint] +    action: drop +    regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)' + +  relabel_configs: +  - source_labels: [__address__] +    regex: '(.*):10250' +    replacement: '${1}:9100'      target_label: __address__ -    regex: (.+)(?::\d+);(\d+) -    replacement: $1:$2 -  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] -    action: replace -    target_label: __basic_auth_username__ -    regex: (.+) -  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] -    action: replace -    target_label: __basic_auth_password__ -    regex: (.+) +  - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] +    target_label: __instance__    - action: labelmap -    regex: __meta_kubernetes_service_label_(.+) -  - source_labels: [__meta_kubernetes_namespace] -    action: replace -    target_label: kubernetes_namespace -  - source_labels: [__meta_kubernetes_service_name] -    action: replace -    target_label: kubernetes_name +    regex: __meta_kubernetes_node_label_(.+) + +# Scrape config for the template service broker +- job_name: 'openshift-template-service-broker' +  scheme: https +  tls_config: +    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt +    server_name: apiserver.openshift-template-service-broker.svc +  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + +  kubernetes_sd_configs: +  - role: endpoints + +  relabel_configs: +  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +    action: keep +    regex: openshift-template-service-broker;apiserver;https +  alerting:    alertmanagers:  | 
