diff options
author | Eldad Marciano <mrsiano@gmail.com> | 2017-11-20 16:41:43 +0200 |
---|---|---|
committer | Eldad Marciano <mrsiano@gmail.com> | 2018-01-17 12:06:42 +0200 |
commit | 8fcf4b127af3b96c5cb174157441436d8b11dafd (patch) | |
tree | 625985e588e23df9a335540cbf0c086f34e4b067 /roles/openshift_grafana | |
parent | dce44f0bf93c1cf7b2d2fd7f01d591be117b2d60 (diff) | |
download | openshift-8fcf4b127af3b96c5cb174157441436d8b11dafd.tar.gz openshift-8fcf4b127af3b96c5cb174157441436d8b11dafd.tar.bz2 openshift-8fcf4b127af3b96c5cb174157441436d8b11dafd.tar.xz openshift-8fcf4b127af3b96c5cb174157441436d8b11dafd.zip |
Add new grafana playbook.
this patch introduce a new playbook and roles to deploy grafana automaticly
using openshift-ansible capabilities.
this patch will abstract the grafana deployment process specially for openshift.
Diffstat (limited to 'roles/openshift_grafana')
-rw-r--r-- | roles/openshift_grafana/defaults/main.yml | 12 | ||||
-rw-r--r-- | roles/openshift_grafana/files/grafana-ocp-oauth.yml | 661 | ||||
-rw-r--r-- | roles/openshift_grafana/files/grafana-ocp.yml | 76 | ||||
-rw-r--r-- | roles/openshift_grafana/files/openshift-cluster-monitoring.json | 5138 | ||||
-rw-r--r-- | roles/openshift_grafana/meta/main.yml | 13 | ||||
-rw-r--r-- | roles/openshift_grafana/tasks/gf-permissions.yml | 12 | ||||
-rw-r--r-- | roles/openshift_grafana/tasks/main.yml | 122 |
7 files changed, 6034 insertions, 0 deletions
diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml new file mode 100644 index 000000000..7fd7a085d --- /dev/null +++ b/roles/openshift_grafana/defaults/main.yml @@ -0,0 +1,12 @@ +--- +gf_body_tmp: + name: grafana_name + type: prometheus + typeLogoUrl: '' + access: proxy + url: prometheus_url + basicAuth: false + withCredentials: false + jsonData: + tlsSkipVerify: true + token: satoken diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml new file mode 100644 index 000000000..82fa89004 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml @@ -0,0 +1,661 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: The location of the proxy image + name: IMAGE_GF + value: mrsiano/grafana-ocp:latest +- description: The location of the proxy image + name: IMAGE_PROXY + value: openshift/oauth-proxy:v1.0.0 +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" +objects: +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}' +- apiVersion: authorization.openshift.io/v1 + kind: ClusterRoleBinding + metadata: + name: gf-cluster-reader + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: grafana-ocp + namespace: "${NAMESPACE}" +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp + tls: + termination: Reencrypt +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: gf-tls + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + ports: + - name: grafana-ocp + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app: grafana-ocp +- apiVersion: v1 + kind: Secret + metadata: + name: gf-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" +# Deploy Prometheus behind an oauth proxy +- apiVersion: extensions/v1beta1 + kind: Deployment + metadata: + labels: + app: grafana-ocp + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + replicas: 1 + selector: + matchLabels: + app: grafana-ocp + template: + metadata: + labels: + app: grafana-ocp + name: grafana-ocp-app + spec: + serviceAccountName: grafana-ocp + containers: + - name: oauth-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp + - -upstream=http://localhost:3000 + - -provider=openshift +# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}' + - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards + volumeMounts: + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + + - name: grafana-ocp + image: ${IMAGE_GF} + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - mountPath: "/root/go/src/github.com/grafana/grafana/data" + name: gf-data + - mountPath: "/root/go/src/github.com/grafana/grafana/conf" + name: gfconfig + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + command: + - "./bin/grafana-server" + + volumes: + - name: gfconfig + configMap: + name: gf-config + - name: secrets + secret: + secretName: gf-proxy + - name: gf-tls + secret: + secretName: gf-tls + - emptyDir: {} + name: gf-data +- apiVersion: v1 + kind: ConfigMap + metadata: + name: gf-config + namespace: "${NAMESPACE}" + data: + defaults.ini: |- + ##################### Grafana Configuration Defaults ##################### + # + # Do not modify this file in grafana installs + # + + # possible values : production, development + app_mode = production + + # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty + instance_name = ${HOSTNAME} + + #################################### Paths ############################### + [paths] + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + # + data = data + # + # Directory where grafana can store logs + # + logs = data/log + # + # Directory where grafana will automatically scan and look for plugins + # + plugins = data/plugins + + #################################### Server ############################## + [server] + # Protocol (http, https, socket) + protocol = http + + # The ip address to bind to, empty will bind to all interfaces + http_addr = + + # The http port to use + http_port = 3000 + + # The public facing domain name used to access grafana from a browser + domain = localhost + + # Redirect to correct domain if host header does not match domain + # Prevents DNS rebinding attacks + enforce_domain = false + + # The full public facing url + root_url = %(protocol)s://%(domain)s:%(http_port)s/ + + # Log web requests + router_logging = false + + # the path relative working path + static_root_path = public + + # enable gzip + enable_gzip = false + + # https certs & key file + cert_file = /etc/tls/private/tls.crt + cert_key = /etc/tls/private/tls.key + + # Unix socket path + socket = /tmp/grafana.sock + + #################################### Database ############################ + [database] + # You can configure the database connection by specifying type, host, name, user and password + # as separate properties or as on string using the url property. + + # Either "mysql", "postgres" or "sqlite3", it's your choice + type = sqlite3 + host = 127.0.0.1:3306 + name = grafana + user = root + # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" + password = + # Use either URL or the previous fields to configure the database + # Example: mysql://user:secret@host:port/database + url = + + # Max idle conn setting default is 2 + max_idle_conn = 2 + + # Max conn setting default is 0 (mean not set) + max_open_conn = + + # For "postgres", use either "disable", "require" or "verify-full" + # For "mysql", use either "true", "false", or "skip-verify". + ssl_mode = disable + + ca_cert_path = + client_key_path = + client_cert_path = + server_cert_name = + + # For "sqlite3" only, path relative to data_path setting + path = grafana.db + + #################################### Session ############################# + [session] + # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file" + provider = file + + # Provider config options + # memory: not have any config yet + # file: session dir path, is relative to grafana data_path + # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` + # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable + # mysql: go-sql-driver/mysql dsn config string, examples: + # `user:password@tcp(127.0.0.1:3306)/database_name` + # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name` + # memcache: 127.0.0.1:11211 + + + provider_config = sessions + + # Session cookie name + cookie_name = grafana_sess + + # If you use session in https only, default is false + cookie_secure = false + + # Session life time, default is 86400 + session_life_time = 86400 + gc_interval_time = 86400 + + #################################### Data proxy ########################### + [dataproxy] + + # This enables data proxy logging, default is false + logging = false + + #################################### Analytics ########################### + [analytics] + # Server reporting, sends usage counters to stats.grafana.org every 24 hours. + # No ip addresses are being tracked, only simple counters to track + # running instances, dashboard and error counts. It is very helpful to us. + # Change this option to false to disable reporting. + reporting_enabled = true + + # Set to false to disable all checks to https://grafana.com + # for new versions (grafana itself and plugins), check is used + # in some UI views to notify that grafana or plugin update exists + # This option does not cause any auto updates, nor send any information + # only a GET request to https://grafana.com to get latest versions + check_for_updates = true + + # Google Analytics universal tracking code, only enabled if you specify an id here + google_analytics_ua_id = + + # Google Tag Manager ID, only enabled if you specify an id here + google_tag_manager_id = + + #################################### Security ############################ + [security] + # default admin user, created on startup + admin_user = admin + + # default admin password, can be changed before first start of grafana, or in profile settings + admin_password = admin + + # used for signing + secret_key = SW2YcwTIb9zpOOhoPsMm + + # Auto-login remember days + login_remember_days = 7 + cookie_username = grafana_user + cookie_remember_name = grafana_remember + + # disable gravatar profile images + disable_gravatar = false + + # data source proxy whitelist (ip_or_domain:port separated by spaces) + data_source_proxy_whitelist = + + [snapshots] + # snapshot sharing options + external_enabled = true + external_snapshot_url = https://snapshots-origin.raintank.io + external_snapshot_name = Publish to snapshot.raintank.io + + # remove expired snapshot + snapshot_remove_expired = true + + # remove snapshots after 90 days + snapshot_TTL_days = 90 + + #################################### Users #################################### + [users] + # disable user signup / registration + allow_sign_up = true + + # Allow non admin users to create organizations + allow_org_create = true + + # Set to true to automatically assign new users to the default organization (id 1) + auto_assign_org = true + + # Default role new users will be automatically assigned (if auto_assign_org above is set to true) + auto_assign_org_role = Admin + + # Require email validation before sign up completes + verify_email_enabled = false + + # Background text for the user field on the login page + login_hint = email or username + + # Default UI theme ("dark" or "light") + default_theme = dark + + # External user management + external_manage_link_url = + external_manage_link_name = + external_manage_info = + + [auth] + # Set to true to disable (hide) the login form, useful if you use OAuth + disable_login_form = true + + # Set to true to disable the signout link in the side menu. useful if you use auth.proxy + disable_signout_menu = true + + #################################### Anonymous Auth ###################### + [auth.anonymous] + # enable anonymous access + enabled = true + + # specify organization name that should be used for unauthenticated users + org_name = Main Org. + + # specify role for unauthenticated users + org_role = Admin + + #################################### Github Auth ######################### + [auth.github] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = https://github.com/login/oauth/authorize + token_url = https://github.com/login/oauth/access_token + api_url = https://api.github.com/user + team_ids = + allowed_organizations = + + #################################### Google Auth ######################### + [auth.google] + enabled = false + allow_sign_up = true + client_id = some_client_id + client_secret = some_client_secret + scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email + auth_url = https://accounts.google.com/o/oauth2/auth + token_url = https://accounts.google.com/o/oauth2/token + api_url = https://www.googleapis.com/oauth2/v1/userinfo + allowed_domains = + hosted_domain = + + #################################### Grafana.com Auth #################### + # legacy key names (so they work in env variables) + [auth.grafananet] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + [auth.grafana_com] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + #################################### Generic OAuth ####################### + [auth.generic_oauth] + name = OAuth + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = + token_url = + api_url = + team_ids = + allowed_organizations = + + #################################### Basic Auth ########################## + [auth.basic] + enabled = false + + #################################### Auth Proxy ########################## + [auth.proxy] + enabled = true + header_name = X-WEBAUTH-USER + header_property = username + auto_sign_up = true + ldap_sync_ttl = 60 + whitelist = + + #################################### Auth LDAP ########################### + [auth.ldap] + enabled = false + config_file = /etc/grafana/ldap.toml + allow_sign_up = true + + #################################### SMTP / Emailing ##################### + [smtp] + enabled = false + host = localhost:25 + user = + # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" + password = + cert_file = + key_file = + skip_verify = false + from_address = admin@grafana.localhost + from_name = Grafana + ehlo_identity = + + [emails] + welcome_email_on_sign_up = false + templates_pattern = emails/*.html + + #################################### Logging ########################## + [log] + # Either "console", "file", "syslog". Default is console and file + # Use space to separate multiple modes, e.g. "console file" + mode = console file + + # Either "debug", "info", "warn", "error", "critical", default is "info" + level = error + + # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug + filters = + + # For "console" mode only + [log.console] + level = + + # log line format, valid options are text, console and json + format = console + + # For "file" mode only + [log.file] + level = + + # log line format, valid options are text, console and json + format = text + + # This enables automated log rotate(switch of following options), default is true + log_rotate = true + + # Max line number of single file, default is 1000000 + max_lines = 1000000 + + # Max size shift of single file, default is 28 means 1 << 28, 256MB + max_size_shift = 28 + + # Segment log daily, default is true + daily_rotate = true + + # Expired days of log file(delete after max days), default is 7 + max_days = 7 + + [log.syslog] + level = + + # log line format, valid options are text, console and json + format = text + + # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. + network = + address = + + # Syslog facility. user, daemon and local0 through local7 are valid. + facility = + + # Syslog tag. By default, the process' argv[0] is used. + tag = + + + #################################### AMQP Event Publisher ################ + [event_publisher] + enabled = false + rabbitmq_url = amqp://localhost/ + exchange = grafana_events + + #################################### Dashboard JSON files ################ + [dashboards.json] + enabled = false + path = /var/lib/grafana/dashboards + + #################################### Usage Quotas ######################## + [quota] + enabled = false + + #### set quotas to -1 to make unlimited. #### + # limit number of users per Org. + org_user = 10 + + # limit number of dashboards per Org. + org_dashboard = 100 + + # limit number of data_sources per Org. + org_data_source = 10 + + # limit number of api_keys per Org. + org_api_key = 10 + + # limit number of orgs a user can create. + user_org = 10 + + # Global limit of users. + global_user = -1 + + # global limit of orgs. + global_org = -1 + + # global limit of dashboards + global_dashboard = -1 + + # global limit of api_keys + global_api_key = -1 + + # global limit on number of logged in users. + global_session = -1 + + #################################### Alerting ############################ + [alerting] + # Disable alerting engine & UI features + enabled = true + # Makes it possible to turn off alert rule execution but alerting UI is visible + execute_alerts = true + + #################################### Internal Grafana Metrics ############ + # Metrics available at HTTP API Url /api/metrics + [metrics] + enabled = true + interval_seconds = 10 + + # Send internal Grafana metrics to graphite + [metrics.graphite] + # Enable by setting the address setting (ex localhost:2003) + address = + prefix = prod.grafana.%(instance_name)s. + + [grafana_net] + url = https://grafana.com + + [grafana_com] + url = https://grafana.com + + #################################### Distributed tracing ############ + [tracing.jaeger] + # jaeger destination (ex localhost:6831) + address = + # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) + always_included_tag = + # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote + sampler_type = const + # jaeger samplerconfig param + # for "const" sampler, 0 or 1 for always false/true respectively + # for "probabilistic" sampler, a probability between 0 and 1 + # for "rateLimiting" sampler, the number of spans per second + # for "remote" sampler, param is the same as for "probabilistic" + # and indicates the initial sampling rate before the actual one + # is received from the mothership + sampler_param = 1 + + #################################### External Image Storage ############## + [external_image_storage] + # You can choose between (s3, webdav, gcs) + provider = + + [external_image_storage.s3] + bucket_url = + bucket = + region = + path = + access_key = + secret_key = + + [external_image_storage.webdav] + url = + username = + password = + public_url = + + [external_image_storage.gcs] + key_file = + bucket = diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml new file mode 100644 index 000000000..bc7b4b286 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp.yml @@ -0,0 +1,76 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + ports: + - port: 8082 + protocol: TCP + targetPort: grafana-http +- apiVersion: v1 + kind: ReplicationController + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + replicas: 1 + template: + version: v1 + metadata: + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + volumes: + - name: data + emptyDir: {} + containers: + - image: "mrsiano/grafana-ocp:latest" + name: grafana-ocp + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - name: data + mountPath: "/root/go/src/github.com/grafana/grafana/data" + command: + - "./bin/grafana-server" diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json new file mode 100644 index 000000000..f59ca997f --- /dev/null +++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json @@ -0,0 +1,5138 @@ +{ + "dashboard": { + "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", + "editable": true, + "gnetId": 315, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "height": "200px", + "id": 32, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Network I/O pressure", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster memory usage", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster CPU usage ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Total usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "overall cpu usage", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Usage", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Cores Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 411, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes Memory usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ systemd_service_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "B", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "D", + "step": 1 + }, + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "C", + "step": 1 + }, + { + "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "E", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "F", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 277, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ pod_name }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ pod_name }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "instant": true, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ id }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ id }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total) by (phase,reason)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ phase }} | {{ reason }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_build_total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 54, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 55, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Failed\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds, regardless of the failure reason.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 57, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 58, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Complete\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of successfully completed builds.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 0, + "id": 59, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\"} offset 5m", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Builds", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_setup_latency_sum)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_setup_latency_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_teardown_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top 10 pods doing the most receive network traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_pod_ips", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }} | {{ role }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_ips", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift SDN", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_pleg_relist", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 51, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 52, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_timeout", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_errors", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Kubelet", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(scrape_samples_scraped[2m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ kubernetes_name }} | {{ instance }} ", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "scrape_samples_scraped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU per instance of Prometheus container.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ verb }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ pod }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of non-mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "endpoint_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " quantile {{ quantile }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "endpoint_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "API Server", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_disk_wal_fsync_duration_seconds_count", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "etcd_disk_wal_fsync_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "etcd", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 62, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(changes(container_start_time_seconds[10m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "The number of containers that start or restart over the last ten minutes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Changes in your cluster", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of cores in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of consumed cores.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per node in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumption per system service or container on the infrastructure nodes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per namespace on the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster CPU in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster memory in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate CPU usage (seconds total) of etcd+docker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "System and container CPU", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 71, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "volumes_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "volumes_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ request }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "cloudprovider_aws_api_request_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "storage_operation_duration_seconds_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Volumes", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes", + "openshift" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PR}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Node", + "options": [], + "query": "label_values(kubernetes_io_hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "1s", + "2m", + "20s", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "openshift cluster monitoring", + "version": 6 + } +} diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml new file mode 100644 index 000000000..8dea6f197 --- /dev/null +++ b/roles/openshift_grafana/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Eldad Marciano + description: Setup grafana pod + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.3 + platforms: + - name: EL + versions: + - 7 + categories: + - metrics diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml new file mode 100644 index 000000000..9d3c741ee --- /dev/null +++ b/roles/openshift_grafana/tasks/gf-permissions.yml @@ -0,0 +1,12 @@ +--- +- name: Create gf user on htpasswd + command: htpasswd -c /etc/origin/master/htpasswd gfadmin + +- name: Make sure master config use HTPasswdPasswordIdentityProvider + command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml" + +- name: Grant permission for gfuser + command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin + +- name: Restart mater api + command: systemctl restart atomic-openshift-master-api.service diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml new file mode 100644 index 000000000..6a06d40a9 --- /dev/null +++ b/roles/openshift_grafana/tasks/main.yml @@ -0,0 +1,122 @@ +--- +- name: Create grafana namespace + oc_project: + state: present + name: grafana + +- name: Configure Grafana Permissions + include_tasks: tasks/gf-permissions.yml + when: gf_oauth | default(false) | bool == true + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml + register: + cl_file: /tmp/grafana-ocp.yaml + when: gf_oauth | default(false) | bool == false + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml + register: + cl_file: /tmp/grafana-ocp-oauth.yaml + when: gf_oauth | default(false) | bool == true + +- name: Process the grafana file + oc_process: + namespace: grafana + template_name: "{{ cl_file }}" + create: True + when: gf_oauth | default(false) | bool == true + +- name: Wait to grafana be running + command: oc rollout status deployment/grafana-ocp + +- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }} + oc_adm_policy_user: + user: grafana-ocp + resource_kind: cluster-role + resource_name: view + state: present + role_namespace: "{{ gf_prometheus_namespace }}" + +- name: Get grafana route + oc_obj: + kind: route + name: grafana + namespace: grafana + register: route + +- name: Get prometheus route + oc_obj: + kind: route + name: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: route + +- name: Get the prometheus SA + oc_serviceaccount_secret: + state: list + service_account: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: sa + +- name: Get the management SA bearer token + set_fact: + management_token: "{{ sa.results | oo_filter_sa_secrets }}" + +- name: Ensure the SA bearer token value is read + oc_secret: + state: list + name: "{{ management_token }}" + namespace: "{{ gf_prometheus_namespace }}" + no_log: True + register: sa_secret + +- name: Get the SA bearer token for prometheus + set_fact: + token: "{{ sa_secret.results.encoded.token }}" + +- name: Convert to json + var: + ds_json: "{{ gf_body_tmp }} | to_json }}" + +- name: Set protocol type + var: + protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}" + +- name: Add gf datasrouce + uri: + url: "{{ protocol }}://{{ route }}/api/datasources" + user: admin + password: admin + method: POST + body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex setup ds name + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes + +- name: Add new dashboard + uri: + url: "{{ protocol }}://{{ route }}/api/dashboards/db" + user: admin + password: admin + method: POST + body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex json tear down + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes |