summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScott Dodson <sdodson@redhat.com>2018-01-17 21:37:58 -0500
committerGitHub <noreply@github.com>2018-01-17 21:37:58 -0500
commitb342f3f6bc1817ac62f0c053d28125a991126114 (patch)
tree0ba03acb70868f3a3aee0cfef277442663d3b8fa
parent296d767336a4379d9a0f1d8811dddb3a8e2d89f1 (diff)
parent8fcf4b127af3b96c5cb174157441436d8b11dafd (diff)
downloadopenshift-b342f3f6bc1817ac62f0c053d28125a991126114.tar.gz
openshift-b342f3f6bc1817ac62f0c053d28125a991126114.tar.bz2
openshift-b342f3f6bc1817ac62f0c053d28125a991126114.tar.xz
openshift-b342f3f6bc1817ac62f0c053d28125a991126114.zip
Merge pull request #6181 from mrsiano/grafana
Add new grafana playbook.
-rw-r--r--inventory/hosts.grafana.example12
-rw-r--r--playbooks/openshift-grafana/config.yml4
-rw-r--r--playbooks/openshift-grafana/private/config.yml6
l---------playbooks/openshift-grafana/private/filter_plugins1
l---------playbooks/openshift-grafana/private/lookup_plugins1
l---------playbooks/openshift-grafana/private/roles1
-rw-r--r--roles/openshift_grafana/defaults/main.yml12
-rw-r--r--roles/openshift_grafana/files/grafana-ocp-oauth.yml661
-rw-r--r--roles/openshift_grafana/files/grafana-ocp.yml76
-rw-r--r--roles/openshift_grafana/files/openshift-cluster-monitoring.json5138
-rw-r--r--roles/openshift_grafana/meta/main.yml13
-rw-r--r--roles/openshift_grafana/tasks/gf-permissions.yml12
-rw-r--r--roles/openshift_grafana/tasks/main.yml122
13 files changed, 6059 insertions, 0 deletions
diff --git a/inventory/hosts.grafana.example b/inventory/hosts.grafana.example
new file mode 100644
index 000000000..a5999578f
--- /dev/null
+++ b/inventory/hosts.grafana.example
@@ -0,0 +1,12 @@
+[OSEv3:children]
+masters
+nodes
+
+[OSEv3:vars]
+# Grafana Configuration
+#gf_datasource_name="example"
+#gf_prometheus_namespace="openshift-metrics"
+#gf_oauth=true
+
+[masters]
+master
diff --git a/playbooks/openshift-grafana/config.yml b/playbooks/openshift-grafana/config.yml
new file mode 100644
index 000000000..c7814207c
--- /dev/null
+++ b/playbooks/openshift-grafana/config.yml
@@ -0,0 +1,4 @@
+---
+- import_playbook: ../init/main.yml
+
+- import_playbook: private/config.yml
diff --git a/playbooks/openshift-grafana/private/config.yml b/playbooks/openshift-grafana/private/config.yml
new file mode 100644
index 000000000..ac753d63b
--- /dev/null
+++ b/playbooks/openshift-grafana/private/config.yml
@@ -0,0 +1,6 @@
+---
+- name: Deploy grafana server
+ hosts: masters
+ tasks:
+ - include_role:
+ name: openshift_grafana
diff --git a/playbooks/openshift-grafana/private/filter_plugins b/playbooks/openshift-grafana/private/filter_plugins
new file mode 120000
index 000000000..99a95e4ca
--- /dev/null
+++ b/playbooks/openshift-grafana/private/filter_plugins
@@ -0,0 +1 @@
+../../../filter_plugins \ No newline at end of file
diff --git a/playbooks/openshift-grafana/private/lookup_plugins b/playbooks/openshift-grafana/private/lookup_plugins
new file mode 120000
index 000000000..ac79701db
--- /dev/null
+++ b/playbooks/openshift-grafana/private/lookup_plugins
@@ -0,0 +1 @@
+../../../lookup_plugins \ No newline at end of file
diff --git a/playbooks/openshift-grafana/private/roles b/playbooks/openshift-grafana/private/roles
new file mode 120000
index 000000000..e2b799b9d
--- /dev/null
+++ b/playbooks/openshift-grafana/private/roles
@@ -0,0 +1 @@
+../../../roles/ \ No newline at end of file
diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml
new file mode 100644
index 000000000..7fd7a085d
--- /dev/null
+++ b/roles/openshift_grafana/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+gf_body_tmp:
+ name: grafana_name
+ type: prometheus
+ typeLogoUrl: ''
+ access: proxy
+ url: prometheus_url
+ basicAuth: false
+ withCredentials: false
+ jsonData:
+ tlsSkipVerify: true
+ token: satoken
diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
new file mode 100644
index 000000000..82fa89004
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
@@ -0,0 +1,661 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: The location of the proxy image
+ name: IMAGE_GF
+ value: mrsiano/grafana-ocp:latest
+- description: The location of the proxy image
+ name: IMAGE_PROXY
+ value: openshift/oauth-proxy:v1.0.0
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+- description: The session secret for the proxy
+ name: SESSION_SECRET
+ generate: expression
+ from: "[a-zA-Z0-9]{43}"
+objects:
+- apiVersion: v1
+ kind: ServiceAccount
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ annotations:
+ serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}'
+- apiVersion: authorization.openshift.io/v1
+ kind: ClusterRoleBinding
+ metadata:
+ name: gf-cluster-reader
+ roleRef:
+ name: cluster-reader
+ subjects:
+ - kind: ServiceAccount
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+ tls:
+ termination: Reencrypt
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/scheme: https
+ service.alpha.openshift.io/serving-cert-secret-name: gf-tls
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ ports:
+ - name: grafana-ocp
+ port: 443
+ protocol: TCP
+ targetPort: 8443
+ selector:
+ app: grafana-ocp
+- apiVersion: v1
+ kind: Secret
+ metadata:
+ name: gf-proxy
+ namespace: "${NAMESPACE}"
+ stringData:
+ session_secret: "${SESSION_SECRET}="
+# Deploy Prometheus behind an oauth proxy
+- apiVersion: extensions/v1beta1
+ kind: Deployment
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: grafana-ocp
+ template:
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp-app
+ spec:
+ serviceAccountName: grafana-ocp
+ containers:
+ - name: oauth-proxy
+ image: ${IMAGE_PROXY}
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp
+ - -upstream=http://localhost:3000
+ - -provider=openshift
+# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}'
+ - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+
+ - name: grafana-ocp
+ image: ${IMAGE_GF}
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ name: gf-data
+ - mountPath: "/root/go/src/github.com/grafana/grafana/conf"
+ name: gfconfig
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+ command:
+ - "./bin/grafana-server"
+
+ volumes:
+ - name: gfconfig
+ configMap:
+ name: gf-config
+ - name: secrets
+ secret:
+ secretName: gf-proxy
+ - name: gf-tls
+ secret:
+ secretName: gf-tls
+ - emptyDir: {}
+ name: gf-data
+- apiVersion: v1
+ kind: ConfigMap
+ metadata:
+ name: gf-config
+ namespace: "${NAMESPACE}"
+ data:
+ defaults.ini: |-
+ ##################### Grafana Configuration Defaults #####################
+ #
+ # Do not modify this file in grafana installs
+ #
+
+ # possible values : production, development
+ app_mode = production
+
+ # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
+ instance_name = ${HOSTNAME}
+
+ #################################### Paths ###############################
+ [paths]
+ # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
+ #
+ data = data
+ #
+ # Directory where grafana can store logs
+ #
+ logs = data/log
+ #
+ # Directory where grafana will automatically scan and look for plugins
+ #
+ plugins = data/plugins
+
+ #################################### Server ##############################
+ [server]
+ # Protocol (http, https, socket)
+ protocol = http
+
+ # The ip address to bind to, empty will bind to all interfaces
+ http_addr =
+
+ # The http port to use
+ http_port = 3000
+
+ # The public facing domain name used to access grafana from a browser
+ domain = localhost
+
+ # Redirect to correct domain if host header does not match domain
+ # Prevents DNS rebinding attacks
+ enforce_domain = false
+
+ # The full public facing url
+ root_url = %(protocol)s://%(domain)s:%(http_port)s/
+
+ # Log web requests
+ router_logging = false
+
+ # the path relative working path
+ static_root_path = public
+
+ # enable gzip
+ enable_gzip = false
+
+ # https certs & key file
+ cert_file = /etc/tls/private/tls.crt
+ cert_key = /etc/tls/private/tls.key
+
+ # Unix socket path
+ socket = /tmp/grafana.sock
+
+ #################################### Database ############################
+ [database]
+ # You can configure the database connection by specifying type, host, name, user and password
+ # as separate properties or as on string using the url property.
+
+ # Either "mysql", "postgres" or "sqlite3", it's your choice
+ type = sqlite3
+ host = 127.0.0.1:3306
+ name = grafana
+ user = root
+ # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
+ password =
+ # Use either URL or the previous fields to configure the database
+ # Example: mysql://user:secret@host:port/database
+ url =
+
+ # Max idle conn setting default is 2
+ max_idle_conn = 2
+
+ # Max conn setting default is 0 (mean not set)
+ max_open_conn =
+
+ # For "postgres", use either "disable", "require" or "verify-full"
+ # For "mysql", use either "true", "false", or "skip-verify".
+ ssl_mode = disable
+
+ ca_cert_path =
+ client_key_path =
+ client_cert_path =
+ server_cert_name =
+
+ # For "sqlite3" only, path relative to data_path setting
+ path = grafana.db
+
+ #################################### Session #############################
+ [session]
+ # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file"
+ provider = file
+
+ # Provider config options
+ # memory: not have any config yet
+ # file: session dir path, is relative to grafana data_path
+ # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana`
+ # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable
+ # mysql: go-sql-driver/mysql dsn config string, examples:
+ # `user:password@tcp(127.0.0.1:3306)/database_name`
+ # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name`
+ # memcache: 127.0.0.1:11211
+
+
+ provider_config = sessions
+
+ # Session cookie name
+ cookie_name = grafana_sess
+
+ # If you use session in https only, default is false
+ cookie_secure = false
+
+ # Session life time, default is 86400
+ session_life_time = 86400
+ gc_interval_time = 86400
+
+ #################################### Data proxy ###########################
+ [dataproxy]
+
+ # This enables data proxy logging, default is false
+ logging = false
+
+ #################################### Analytics ###########################
+ [analytics]
+ # Server reporting, sends usage counters to stats.grafana.org every 24 hours.
+ # No ip addresses are being tracked, only simple counters to track
+ # running instances, dashboard and error counts. It is very helpful to us.
+ # Change this option to false to disable reporting.
+ reporting_enabled = true
+
+ # Set to false to disable all checks to https://grafana.com
+ # for new versions (grafana itself and plugins), check is used
+ # in some UI views to notify that grafana or plugin update exists
+ # This option does not cause any auto updates, nor send any information
+ # only a GET request to https://grafana.com to get latest versions
+ check_for_updates = true
+
+ # Google Analytics universal tracking code, only enabled if you specify an id here
+ google_analytics_ua_id =
+
+ # Google Tag Manager ID, only enabled if you specify an id here
+ google_tag_manager_id =
+
+ #################################### Security ############################
+ [security]
+ # default admin user, created on startup
+ admin_user = admin
+
+ # default admin password, can be changed before first start of grafana, or in profile settings
+ admin_password = admin
+
+ # used for signing
+ secret_key = SW2YcwTIb9zpOOhoPsMm
+
+ # Auto-login remember days
+ login_remember_days = 7
+ cookie_username = grafana_user
+ cookie_remember_name = grafana_remember
+
+ # disable gravatar profile images
+ disable_gravatar = false
+
+ # data source proxy whitelist (ip_or_domain:port separated by spaces)
+ data_source_proxy_whitelist =
+
+ [snapshots]
+ # snapshot sharing options
+ external_enabled = true
+ external_snapshot_url = https://snapshots-origin.raintank.io
+ external_snapshot_name = Publish to snapshot.raintank.io
+
+ # remove expired snapshot
+ snapshot_remove_expired = true
+
+ # remove snapshots after 90 days
+ snapshot_TTL_days = 90
+
+ #################################### Users ####################################
+ [users]
+ # disable user signup / registration
+ allow_sign_up = true
+
+ # Allow non admin users to create organizations
+ allow_org_create = true
+
+ # Set to true to automatically assign new users to the default organization (id 1)
+ auto_assign_org = true
+
+ # Default role new users will be automatically assigned (if auto_assign_org above is set to true)
+ auto_assign_org_role = Admin
+
+ # Require email validation before sign up completes
+ verify_email_enabled = false
+
+ # Background text for the user field on the login page
+ login_hint = email or username
+
+ # Default UI theme ("dark" or "light")
+ default_theme = dark
+
+ # External user management
+ external_manage_link_url =
+ external_manage_link_name =
+ external_manage_info =
+
+ [auth]
+ # Set to true to disable (hide) the login form, useful if you use OAuth
+ disable_login_form = true
+
+ # Set to true to disable the signout link in the side menu. useful if you use auth.proxy
+ disable_signout_menu = true
+
+ #################################### Anonymous Auth ######################
+ [auth.anonymous]
+ # enable anonymous access
+ enabled = true
+
+ # specify organization name that should be used for unauthenticated users
+ org_name = Main Org.
+
+ # specify role for unauthenticated users
+ org_role = Admin
+
+ #################################### Github Auth #########################
+ [auth.github]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url = https://github.com/login/oauth/authorize
+ token_url = https://github.com/login/oauth/access_token
+ api_url = https://api.github.com/user
+ team_ids =
+ allowed_organizations =
+
+ #################################### Google Auth #########################
+ [auth.google]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_client_id
+ client_secret = some_client_secret
+ scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
+ auth_url = https://accounts.google.com/o/oauth2/auth
+ token_url = https://accounts.google.com/o/oauth2/token
+ api_url = https://www.googleapis.com/oauth2/v1/userinfo
+ allowed_domains =
+ hosted_domain =
+
+ #################################### Grafana.com Auth ####################
+ # legacy key names (so they work in env variables)
+ [auth.grafananet]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ [auth.grafana_com]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ #################################### Generic OAuth #######################
+ [auth.generic_oauth]
+ name = OAuth
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url =
+ token_url =
+ api_url =
+ team_ids =
+ allowed_organizations =
+
+ #################################### Basic Auth ##########################
+ [auth.basic]
+ enabled = false
+
+ #################################### Auth Proxy ##########################
+ [auth.proxy]
+ enabled = true
+ header_name = X-WEBAUTH-USER
+ header_property = username
+ auto_sign_up = true
+ ldap_sync_ttl = 60
+ whitelist =
+
+ #################################### Auth LDAP ###########################
+ [auth.ldap]
+ enabled = false
+ config_file = /etc/grafana/ldap.toml
+ allow_sign_up = true
+
+ #################################### SMTP / Emailing #####################
+ [smtp]
+ enabled = false
+ host = localhost:25
+ user =
+ # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;"""
+ password =
+ cert_file =
+ key_file =
+ skip_verify = false
+ from_address = admin@grafana.localhost
+ from_name = Grafana
+ ehlo_identity =
+
+ [emails]
+ welcome_email_on_sign_up = false
+ templates_pattern = emails/*.html
+
+ #################################### Logging ##########################
+ [log]
+ # Either "console", "file", "syslog". Default is console and file
+ # Use space to separate multiple modes, e.g. "console file"
+ mode = console file
+
+ # Either "debug", "info", "warn", "error", "critical", default is "info"
+ level = error
+
+ # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug
+ filters =
+
+ # For "console" mode only
+ [log.console]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = console
+
+ # For "file" mode only
+ [log.file]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # This enables automated log rotate(switch of following options), default is true
+ log_rotate = true
+
+ # Max line number of single file, default is 1000000
+ max_lines = 1000000
+
+ # Max size shift of single file, default is 28 means 1 << 28, 256MB
+ max_size_shift = 28
+
+ # Segment log daily, default is true
+ daily_rotate = true
+
+ # Expired days of log file(delete after max days), default is 7
+ max_days = 7
+
+ [log.syslog]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used.
+ network =
+ address =
+
+ # Syslog facility. user, daemon and local0 through local7 are valid.
+ facility =
+
+ # Syslog tag. By default, the process' argv[0] is used.
+ tag =
+
+
+ #################################### AMQP Event Publisher ################
+ [event_publisher]
+ enabled = false
+ rabbitmq_url = amqp://localhost/
+ exchange = grafana_events
+
+ #################################### Dashboard JSON files ################
+ [dashboards.json]
+ enabled = false
+ path = /var/lib/grafana/dashboards
+
+ #################################### Usage Quotas ########################
+ [quota]
+ enabled = false
+
+ #### set quotas to -1 to make unlimited. ####
+ # limit number of users per Org.
+ org_user = 10
+
+ # limit number of dashboards per Org.
+ org_dashboard = 100
+
+ # limit number of data_sources per Org.
+ org_data_source = 10
+
+ # limit number of api_keys per Org.
+ org_api_key = 10
+
+ # limit number of orgs a user can create.
+ user_org = 10
+
+ # Global limit of users.
+ global_user = -1
+
+ # global limit of orgs.
+ global_org = -1
+
+ # global limit of dashboards
+ global_dashboard = -1
+
+ # global limit of api_keys
+ global_api_key = -1
+
+ # global limit on number of logged in users.
+ global_session = -1
+
+ #################################### Alerting ############################
+ [alerting]
+ # Disable alerting engine & UI features
+ enabled = true
+ # Makes it possible to turn off alert rule execution but alerting UI is visible
+ execute_alerts = true
+
+ #################################### Internal Grafana Metrics ############
+ # Metrics available at HTTP API Url /api/metrics
+ [metrics]
+ enabled = true
+ interval_seconds = 10
+
+ # Send internal Grafana metrics to graphite
+ [metrics.graphite]
+ # Enable by setting the address setting (ex localhost:2003)
+ address =
+ prefix = prod.grafana.%(instance_name)s.
+
+ [grafana_net]
+ url = https://grafana.com
+
+ [grafana_com]
+ url = https://grafana.com
+
+ #################################### Distributed tracing ############
+ [tracing.jaeger]
+ # jaeger destination (ex localhost:6831)
+ address =
+ # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2)
+ always_included_tag =
+ # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
+ sampler_type = const
+ # jaeger samplerconfig param
+ # for "const" sampler, 0 or 1 for always false/true respectively
+ # for "probabilistic" sampler, a probability between 0 and 1
+ # for "rateLimiting" sampler, the number of spans per second
+ # for "remote" sampler, param is the same as for "probabilistic"
+ # and indicates the initial sampling rate before the actual one
+ # is received from the mothership
+ sampler_param = 1
+
+ #################################### External Image Storage ##############
+ [external_image_storage]
+ # You can choose between (s3, webdav, gcs)
+ provider =
+
+ [external_image_storage.s3]
+ bucket_url =
+ bucket =
+ region =
+ path =
+ access_key =
+ secret_key =
+
+ [external_image_storage.webdav]
+ url =
+ username =
+ password =
+ public_url =
+
+ [external_image_storage.gcs]
+ key_file =
+ bucket =
diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml
new file mode 100644
index 000000000..bc7b4b286
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp.yml
@@ -0,0 +1,76 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+objects:
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ ports:
+ - port: 8082
+ protocol: TCP
+ targetPort: grafana-http
+- apiVersion: v1
+ kind: ReplicationController
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ replicas: 1
+ template:
+ version: v1
+ metadata:
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ volumes:
+ - name: data
+ emptyDir: {}
+ containers:
+ - image: "mrsiano/grafana-ocp:latest"
+ name: grafana-ocp
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - name: data
+ mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ command:
+ - "./bin/grafana-server"
diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
new file mode 100644
index 000000000..f59ca997f
--- /dev/null
+++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
@@ -0,0 +1,5138 @@
+{
+ "dashboard": {
+ "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.",
+ "editable": true,
+ "gnetId": 315,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": "200px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "200px",
+ "id": 32,
+ "legend": {
+ "alignAsTable": false,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "instant": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Received",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Sent",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network I/O pressure",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Network I/O pressure",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster memory usage",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster CPU usage ",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster filesystem usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 9,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "20%",
+ "prefix": "",
+ "prefixFontSize": "20%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 13,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Total usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "overall cpu usage",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cluster CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "% Usage",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers Cores Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 23,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 411,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes Memory usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 26,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ systemd_service_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 27,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 30,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "D",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "C",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "E",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "F",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 277,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ pod_name }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ pod_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 29,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "instant": true,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ id }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ id }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 35,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total) by (phase,reason)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ phase }} | {{ reason }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_build_total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 54,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 55,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 56,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Failed\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds, regardless of the failure reason.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 57,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 58,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Complete\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of successfully completed builds.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 0,
+ "id": 59,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\"} offset 5m",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ reason }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Builds",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_setup_latency_sum)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_setup_latency_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 41,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_teardown_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Top 10 pods doing the most receive network traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_pod_ips",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }} | {{ role }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_ips",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 39,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 40,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift SDN",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_pleg_relist",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 51,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 52,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_timeout",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 53,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_errors",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Kubelet",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 46,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(scrape_samples_scraped[2m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ kubernetes_name }} | {{ instance }} ",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "scrape_samples_scraped",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU per instance of Prometheus container.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Prometheus",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 48,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ verb }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 49,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ pod }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of non-mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 74,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "endpoint_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": " quantile {{ quantile }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "endpoint_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "API Server",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 61,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "etcd_disk_wal_fsync_duration_seconds_count",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "etcd_disk_wal_fsync_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "etcd",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 62,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(changes(container_start_time_seconds[10m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "The number of containers that start or restart over the last ten minutes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Changes in your cluster",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 63,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of cores in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 64,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of consumed cores.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 65,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per node in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 66,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumption per system service or container on the infrastructure nodes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per namespace on the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 47,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster CPU in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 69,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster memory in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 70,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Aggregate CPU usage (seconds total) of etcd+docker",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "System and container CPU",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 71,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "volumes_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "volumes_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 72,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ request }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "cloudprovider_aws_api_request_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 73,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "storage_operation_duration_seconds_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Volumes",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes",
+ "openshift"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PR}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "Node",
+ "options": [],
+ "query": "label_values(kubernetes_io_hostname)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "1s",
+ "2m",
+ "20s",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "openshift cluster monitoring",
+ "version": 6
+ }
+}
diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml
new file mode 100644
index 000000000..8dea6f197
--- /dev/null
+++ b/roles/openshift_grafana/meta/main.yml
@@ -0,0 +1,13 @@
+---
+galaxy_info:
+ author: Eldad Marciano
+ description: Setup grafana pod
+ company: Red Hat, Inc.
+ license: Apache License, Version 2.0
+ min_ansible_version: 2.3
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ categories:
+ - metrics
diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml
new file mode 100644
index 000000000..9d3c741ee
--- /dev/null
+++ b/roles/openshift_grafana/tasks/gf-permissions.yml
@@ -0,0 +1,12 @@
+---
+- name: Create gf user on htpasswd
+ command: htpasswd -c /etc/origin/master/htpasswd gfadmin
+
+- name: Make sure master config use HTPasswdPasswordIdentityProvider
+ command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml"
+
+- name: Grant permission for gfuser
+ command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin
+
+- name: Restart mater api
+ command: systemctl restart atomic-openshift-master-api.service
diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml
new file mode 100644
index 000000000..6a06d40a9
--- /dev/null
+++ b/roles/openshift_grafana/tasks/main.yml
@@ -0,0 +1,122 @@
+---
+- name: Create grafana namespace
+ oc_project:
+ state: present
+ name: grafana
+
+- name: Configure Grafana Permissions
+ include_tasks: tasks/gf-permissions.yml
+ when: gf_oauth | default(false) | bool == true
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml
+ register:
+ cl_file: /tmp/grafana-ocp.yaml
+ when: gf_oauth | default(false) | bool == false
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml
+ register:
+ cl_file: /tmp/grafana-ocp-oauth.yaml
+ when: gf_oauth | default(false) | bool == true
+
+- name: Process the grafana file
+ oc_process:
+ namespace: grafana
+ template_name: "{{ cl_file }}"
+ create: True
+ when: gf_oauth | default(false) | bool == true
+
+- name: Wait to grafana be running
+ command: oc rollout status deployment/grafana-ocp
+
+- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }}
+ oc_adm_policy_user:
+ user: grafana-ocp
+ resource_kind: cluster-role
+ resource_name: view
+ state: present
+ role_namespace: "{{ gf_prometheus_namespace }}"
+
+- name: Get grafana route
+ oc_obj:
+ kind: route
+ name: grafana
+ namespace: grafana
+ register: route
+
+- name: Get prometheus route
+ oc_obj:
+ kind: route
+ name: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: route
+
+- name: Get the prometheus SA
+ oc_serviceaccount_secret:
+ state: list
+ service_account: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: sa
+
+- name: Get the management SA bearer token
+ set_fact:
+ management_token: "{{ sa.results | oo_filter_sa_secrets }}"
+
+- name: Ensure the SA bearer token value is read
+ oc_secret:
+ state: list
+ name: "{{ management_token }}"
+ namespace: "{{ gf_prometheus_namespace }}"
+ no_log: True
+ register: sa_secret
+
+- name: Get the SA bearer token for prometheus
+ set_fact:
+ token: "{{ sa_secret.results.encoded.token }}"
+
+- name: Convert to json
+ var:
+ ds_json: "{{ gf_body_tmp }} | to_json }}"
+
+- name: Set protocol type
+ var:
+ protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}"
+
+- name: Add gf datasrouce
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/datasources"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex setup ds name
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes
+
+- name: Add new dashboard
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/dashboards/db"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex json tear down
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes