summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/calico_master/tasks/main.yml2
-rw-r--r--roles/openshift_ca/tasks/main.yml3
-rw-r--r--roles/openshift_examples/meta/main.yml1
-rw-r--r--roles/openshift_excluder/tasks/verify_excluder.yml2
-rw-r--r--roles/openshift_expand_partition/tasks/main.yml2
-rw-r--r--roles/openshift_grafana/defaults/main.yml12
-rw-r--r--roles/openshift_grafana/files/grafana-ocp-oauth.yml661
-rw-r--r--roles/openshift_grafana/files/grafana-ocp.yml76
-rw-r--r--roles/openshift_grafana/files/openshift-cluster-monitoring.json5138
-rw-r--r--roles/openshift_grafana/meta/main.yml13
-rw-r--r--roles/openshift_grafana/tasks/gf-permissions.yml12
-rw-r--r--roles/openshift_grafana/tasks/main.yml122
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py13
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py12
-rw-r--r--roles/openshift_logging/tasks/install_logging.yaml8
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/get_es_version.yml4
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml4
-rw-r--r--roles/openshift_master/tasks/upgrade/rpm_upgrade.yml22
-rw-r--r--roles/openshift_metrics/tasks/oc_apply.yaml8
-rw-r--r--roles/openshift_persistent_volumes/tasks/pv.yml2
-rw-r--r--roles/openshift_persistent_volumes/tasks/pvc.yml2
-rw-r--r--roles/openshift_provisioners/tasks/oc_apply.yaml12
-rw-r--r--roles/openshift_version/tasks/check_available_rpms.yml2
-rw-r--r--roles/openshift_version/tasks/first_master_containerized_version.yml5
-rw-r--r--roles/openshift_version/tasks/first_master_rpm_version.yml6
-rw-r--r--roles/openshift_version/tasks/masters_and_nodes.yml7
-rw-r--r--roles/template_service_broker/vars/default_images.yml4
-rw-r--r--roles/template_service_broker/vars/openshift-enterprise.yml4
30 files changed, 6116 insertions, 47 deletions
diff --git a/roles/calico_master/tasks/main.yml b/roles/calico_master/tasks/main.yml
index 05415a4d6..834ebba64 100644
--- a/roles/calico_master/tasks/main.yml
+++ b/roles/calico_master/tasks/main.yml
@@ -23,7 +23,7 @@
-f {{ mktemp.stdout }}/calico-policy-controller.yml
--config={{ openshift.common.config_base }}/master/admin.kubeconfig
register: calico_create_output
- failed_when: ('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout)
+ failed_when: "('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) and calico_create_output.rc != 0"
changed_when: ('created' in calico_create_output.stdout)
- name: Calico Master | Delete temp directory
diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml
index b94cd9fba..9c8534c74 100644
--- a/roles/openshift_ca/tasks/main.yml
+++ b/roles/openshift_ca/tasks/main.yml
@@ -19,7 +19,8 @@
- name: Reload generated facts
openshift_facts:
- when: hostvars[openshift_ca_host].install_result is changed
+ when:
+ - hostvars[openshift_ca_host].install_result | default({'changed':false}) is changed
- name: Create openshift_ca_config_dir if it does not exist
file:
diff --git a/roles/openshift_examples/meta/main.yml b/roles/openshift_examples/meta/main.yml
index 1a34c85fc..9f46a4683 100644
--- a/roles/openshift_examples/meta/main.yml
+++ b/roles/openshift_examples/meta/main.yml
@@ -13,3 +13,4 @@ galaxy_info:
- cloud
dependencies:
- role: lib_utils
+- role: openshift_facts
diff --git a/roles/openshift_excluder/tasks/verify_excluder.yml b/roles/openshift_excluder/tasks/verify_excluder.yml
index 4f5277fa2..22a3fcd3b 100644
--- a/roles/openshift_excluder/tasks/verify_excluder.yml
+++ b/roles/openshift_excluder/tasks/verify_excluder.yml
@@ -3,7 +3,7 @@
# - excluder
- name: Get available excluder version
repoquery:
- name: "{{ excluder }}"
+ name: "{{ excluder }}{{ '-' ~ r_openshift_excluder_upgrade_target.split('.')[0:2] | join('.') ~ '*' if r_openshift_excluder_upgrade_target is defined else '' }}"
ignore_excluders: true
register: repoquery_out
diff --git a/roles/openshift_expand_partition/tasks/main.yml b/roles/openshift_expand_partition/tasks/main.yml
index 5ae863871..b38ebdfb4 100644
--- a/roles/openshift_expand_partition/tasks/main.yml
+++ b/roles/openshift_expand_partition/tasks/main.yml
@@ -8,7 +8,7 @@
- name: Determine if growpart is installed
command: "rpm -q cloud-utils-growpart"
register: has_growpart
- failed_when: has_growpart.cr != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout
+ failed_when: has_growpart.rc != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout
changed_when: false
when: openshift_is_containerized | bool
diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml
new file mode 100644
index 000000000..7fd7a085d
--- /dev/null
+++ b/roles/openshift_grafana/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+gf_body_tmp:
+ name: grafana_name
+ type: prometheus
+ typeLogoUrl: ''
+ access: proxy
+ url: prometheus_url
+ basicAuth: false
+ withCredentials: false
+ jsonData:
+ tlsSkipVerify: true
+ token: satoken
diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
new file mode 100644
index 000000000..82fa89004
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml
@@ -0,0 +1,661 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: The location of the proxy image
+ name: IMAGE_GF
+ value: mrsiano/grafana-ocp:latest
+- description: The location of the proxy image
+ name: IMAGE_PROXY
+ value: openshift/oauth-proxy:v1.0.0
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+- description: The session secret for the proxy
+ name: SESSION_SECRET
+ generate: expression
+ from: "[a-zA-Z0-9]{43}"
+objects:
+- apiVersion: v1
+ kind: ServiceAccount
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ annotations:
+ serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}'
+- apiVersion: authorization.openshift.io/v1
+ kind: ClusterRoleBinding
+ metadata:
+ name: gf-cluster-reader
+ roleRef:
+ name: cluster-reader
+ subjects:
+ - kind: ServiceAccount
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+ tls:
+ termination: Reencrypt
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/scheme: https
+ service.alpha.openshift.io/serving-cert-secret-name: gf-tls
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ ports:
+ - name: grafana-ocp
+ port: 443
+ protocol: TCP
+ targetPort: 8443
+ selector:
+ app: grafana-ocp
+- apiVersion: v1
+ kind: Secret
+ metadata:
+ name: gf-proxy
+ namespace: "${NAMESPACE}"
+ stringData:
+ session_secret: "${SESSION_SECRET}="
+# Deploy Prometheus behind an oauth proxy
+- apiVersion: extensions/v1beta1
+ kind: Deployment
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: grafana-ocp
+ template:
+ metadata:
+ labels:
+ app: grafana-ocp
+ name: grafana-ocp-app
+ spec:
+ serviceAccountName: grafana-ocp
+ containers:
+ - name: oauth-proxy
+ image: ${IMAGE_PROXY}
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp
+ - -upstream=http://localhost:3000
+ - -provider=openshift
+# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}'
+ - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+
+ - name: grafana-ocp
+ image: ${IMAGE_GF}
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ name: gf-data
+ - mountPath: "/root/go/src/github.com/grafana/grafana/conf"
+ name: gfconfig
+ - mountPath: /etc/tls/private
+ name: gf-tls
+ - mountPath: /etc/proxy/secrets
+ name: secrets
+ command:
+ - "./bin/grafana-server"
+
+ volumes:
+ - name: gfconfig
+ configMap:
+ name: gf-config
+ - name: secrets
+ secret:
+ secretName: gf-proxy
+ - name: gf-tls
+ secret:
+ secretName: gf-tls
+ - emptyDir: {}
+ name: gf-data
+- apiVersion: v1
+ kind: ConfigMap
+ metadata:
+ name: gf-config
+ namespace: "${NAMESPACE}"
+ data:
+ defaults.ini: |-
+ ##################### Grafana Configuration Defaults #####################
+ #
+ # Do not modify this file in grafana installs
+ #
+
+ # possible values : production, development
+ app_mode = production
+
+ # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
+ instance_name = ${HOSTNAME}
+
+ #################################### Paths ###############################
+ [paths]
+ # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
+ #
+ data = data
+ #
+ # Directory where grafana can store logs
+ #
+ logs = data/log
+ #
+ # Directory where grafana will automatically scan and look for plugins
+ #
+ plugins = data/plugins
+
+ #################################### Server ##############################
+ [server]
+ # Protocol (http, https, socket)
+ protocol = http
+
+ # The ip address to bind to, empty will bind to all interfaces
+ http_addr =
+
+ # The http port to use
+ http_port = 3000
+
+ # The public facing domain name used to access grafana from a browser
+ domain = localhost
+
+ # Redirect to correct domain if host header does not match domain
+ # Prevents DNS rebinding attacks
+ enforce_domain = false
+
+ # The full public facing url
+ root_url = %(protocol)s://%(domain)s:%(http_port)s/
+
+ # Log web requests
+ router_logging = false
+
+ # the path relative working path
+ static_root_path = public
+
+ # enable gzip
+ enable_gzip = false
+
+ # https certs & key file
+ cert_file = /etc/tls/private/tls.crt
+ cert_key = /etc/tls/private/tls.key
+
+ # Unix socket path
+ socket = /tmp/grafana.sock
+
+ #################################### Database ############################
+ [database]
+ # You can configure the database connection by specifying type, host, name, user and password
+ # as separate properties or as on string using the url property.
+
+ # Either "mysql", "postgres" or "sqlite3", it's your choice
+ type = sqlite3
+ host = 127.0.0.1:3306
+ name = grafana
+ user = root
+ # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
+ password =
+ # Use either URL or the previous fields to configure the database
+ # Example: mysql://user:secret@host:port/database
+ url =
+
+ # Max idle conn setting default is 2
+ max_idle_conn = 2
+
+ # Max conn setting default is 0 (mean not set)
+ max_open_conn =
+
+ # For "postgres", use either "disable", "require" or "verify-full"
+ # For "mysql", use either "true", "false", or "skip-verify".
+ ssl_mode = disable
+
+ ca_cert_path =
+ client_key_path =
+ client_cert_path =
+ server_cert_name =
+
+ # For "sqlite3" only, path relative to data_path setting
+ path = grafana.db
+
+ #################################### Session #############################
+ [session]
+ # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file"
+ provider = file
+
+ # Provider config options
+ # memory: not have any config yet
+ # file: session dir path, is relative to grafana data_path
+ # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana`
+ # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable
+ # mysql: go-sql-driver/mysql dsn config string, examples:
+ # `user:password@tcp(127.0.0.1:3306)/database_name`
+ # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name`
+ # memcache: 127.0.0.1:11211
+
+
+ provider_config = sessions
+
+ # Session cookie name
+ cookie_name = grafana_sess
+
+ # If you use session in https only, default is false
+ cookie_secure = false
+
+ # Session life time, default is 86400
+ session_life_time = 86400
+ gc_interval_time = 86400
+
+ #################################### Data proxy ###########################
+ [dataproxy]
+
+ # This enables data proxy logging, default is false
+ logging = false
+
+ #################################### Analytics ###########################
+ [analytics]
+ # Server reporting, sends usage counters to stats.grafana.org every 24 hours.
+ # No ip addresses are being tracked, only simple counters to track
+ # running instances, dashboard and error counts. It is very helpful to us.
+ # Change this option to false to disable reporting.
+ reporting_enabled = true
+
+ # Set to false to disable all checks to https://grafana.com
+ # for new versions (grafana itself and plugins), check is used
+ # in some UI views to notify that grafana or plugin update exists
+ # This option does not cause any auto updates, nor send any information
+ # only a GET request to https://grafana.com to get latest versions
+ check_for_updates = true
+
+ # Google Analytics universal tracking code, only enabled if you specify an id here
+ google_analytics_ua_id =
+
+ # Google Tag Manager ID, only enabled if you specify an id here
+ google_tag_manager_id =
+
+ #################################### Security ############################
+ [security]
+ # default admin user, created on startup
+ admin_user = admin
+
+ # default admin password, can be changed before first start of grafana, or in profile settings
+ admin_password = admin
+
+ # used for signing
+ secret_key = SW2YcwTIb9zpOOhoPsMm
+
+ # Auto-login remember days
+ login_remember_days = 7
+ cookie_username = grafana_user
+ cookie_remember_name = grafana_remember
+
+ # disable gravatar profile images
+ disable_gravatar = false
+
+ # data source proxy whitelist (ip_or_domain:port separated by spaces)
+ data_source_proxy_whitelist =
+
+ [snapshots]
+ # snapshot sharing options
+ external_enabled = true
+ external_snapshot_url = https://snapshots-origin.raintank.io
+ external_snapshot_name = Publish to snapshot.raintank.io
+
+ # remove expired snapshot
+ snapshot_remove_expired = true
+
+ # remove snapshots after 90 days
+ snapshot_TTL_days = 90
+
+ #################################### Users ####################################
+ [users]
+ # disable user signup / registration
+ allow_sign_up = true
+
+ # Allow non admin users to create organizations
+ allow_org_create = true
+
+ # Set to true to automatically assign new users to the default organization (id 1)
+ auto_assign_org = true
+
+ # Default role new users will be automatically assigned (if auto_assign_org above is set to true)
+ auto_assign_org_role = Admin
+
+ # Require email validation before sign up completes
+ verify_email_enabled = false
+
+ # Background text for the user field on the login page
+ login_hint = email or username
+
+ # Default UI theme ("dark" or "light")
+ default_theme = dark
+
+ # External user management
+ external_manage_link_url =
+ external_manage_link_name =
+ external_manage_info =
+
+ [auth]
+ # Set to true to disable (hide) the login form, useful if you use OAuth
+ disable_login_form = true
+
+ # Set to true to disable the signout link in the side menu. useful if you use auth.proxy
+ disable_signout_menu = true
+
+ #################################### Anonymous Auth ######################
+ [auth.anonymous]
+ # enable anonymous access
+ enabled = true
+
+ # specify organization name that should be used for unauthenticated users
+ org_name = Main Org.
+
+ # specify role for unauthenticated users
+ org_role = Admin
+
+ #################################### Github Auth #########################
+ [auth.github]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url = https://github.com/login/oauth/authorize
+ token_url = https://github.com/login/oauth/access_token
+ api_url = https://api.github.com/user
+ team_ids =
+ allowed_organizations =
+
+ #################################### Google Auth #########################
+ [auth.google]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_client_id
+ client_secret = some_client_secret
+ scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
+ auth_url = https://accounts.google.com/o/oauth2/auth
+ token_url = https://accounts.google.com/o/oauth2/token
+ api_url = https://www.googleapis.com/oauth2/v1/userinfo
+ allowed_domains =
+ hosted_domain =
+
+ #################################### Grafana.com Auth ####################
+ # legacy key names (so they work in env variables)
+ [auth.grafananet]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ [auth.grafana_com]
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ allowed_organizations =
+
+ #################################### Generic OAuth #######################
+ [auth.generic_oauth]
+ name = OAuth
+ enabled = false
+ allow_sign_up = true
+ client_id = some_id
+ client_secret = some_secret
+ scopes = user:email
+ auth_url =
+ token_url =
+ api_url =
+ team_ids =
+ allowed_organizations =
+
+ #################################### Basic Auth ##########################
+ [auth.basic]
+ enabled = false
+
+ #################################### Auth Proxy ##########################
+ [auth.proxy]
+ enabled = true
+ header_name = X-WEBAUTH-USER
+ header_property = username
+ auto_sign_up = true
+ ldap_sync_ttl = 60
+ whitelist =
+
+ #################################### Auth LDAP ###########################
+ [auth.ldap]
+ enabled = false
+ config_file = /etc/grafana/ldap.toml
+ allow_sign_up = true
+
+ #################################### SMTP / Emailing #####################
+ [smtp]
+ enabled = false
+ host = localhost:25
+ user =
+ # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;"""
+ password =
+ cert_file =
+ key_file =
+ skip_verify = false
+ from_address = admin@grafana.localhost
+ from_name = Grafana
+ ehlo_identity =
+
+ [emails]
+ welcome_email_on_sign_up = false
+ templates_pattern = emails/*.html
+
+ #################################### Logging ##########################
+ [log]
+ # Either "console", "file", "syslog". Default is console and file
+ # Use space to separate multiple modes, e.g. "console file"
+ mode = console file
+
+ # Either "debug", "info", "warn", "error", "critical", default is "info"
+ level = error
+
+ # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug
+ filters =
+
+ # For "console" mode only
+ [log.console]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = console
+
+ # For "file" mode only
+ [log.file]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # This enables automated log rotate(switch of following options), default is true
+ log_rotate = true
+
+ # Max line number of single file, default is 1000000
+ max_lines = 1000000
+
+ # Max size shift of single file, default is 28 means 1 << 28, 256MB
+ max_size_shift = 28
+
+ # Segment log daily, default is true
+ daily_rotate = true
+
+ # Expired days of log file(delete after max days), default is 7
+ max_days = 7
+
+ [log.syslog]
+ level =
+
+ # log line format, valid options are text, console and json
+ format = text
+
+ # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used.
+ network =
+ address =
+
+ # Syslog facility. user, daemon and local0 through local7 are valid.
+ facility =
+
+ # Syslog tag. By default, the process' argv[0] is used.
+ tag =
+
+
+ #################################### AMQP Event Publisher ################
+ [event_publisher]
+ enabled = false
+ rabbitmq_url = amqp://localhost/
+ exchange = grafana_events
+
+ #################################### Dashboard JSON files ################
+ [dashboards.json]
+ enabled = false
+ path = /var/lib/grafana/dashboards
+
+ #################################### Usage Quotas ########################
+ [quota]
+ enabled = false
+
+ #### set quotas to -1 to make unlimited. ####
+ # limit number of users per Org.
+ org_user = 10
+
+ # limit number of dashboards per Org.
+ org_dashboard = 100
+
+ # limit number of data_sources per Org.
+ org_data_source = 10
+
+ # limit number of api_keys per Org.
+ org_api_key = 10
+
+ # limit number of orgs a user can create.
+ user_org = 10
+
+ # Global limit of users.
+ global_user = -1
+
+ # global limit of orgs.
+ global_org = -1
+
+ # global limit of dashboards
+ global_dashboard = -1
+
+ # global limit of api_keys
+ global_api_key = -1
+
+ # global limit on number of logged in users.
+ global_session = -1
+
+ #################################### Alerting ############################
+ [alerting]
+ # Disable alerting engine & UI features
+ enabled = true
+ # Makes it possible to turn off alert rule execution but alerting UI is visible
+ execute_alerts = true
+
+ #################################### Internal Grafana Metrics ############
+ # Metrics available at HTTP API Url /api/metrics
+ [metrics]
+ enabled = true
+ interval_seconds = 10
+
+ # Send internal Grafana metrics to graphite
+ [metrics.graphite]
+ # Enable by setting the address setting (ex localhost:2003)
+ address =
+ prefix = prod.grafana.%(instance_name)s.
+
+ [grafana_net]
+ url = https://grafana.com
+
+ [grafana_com]
+ url = https://grafana.com
+
+ #################################### Distributed tracing ############
+ [tracing.jaeger]
+ # jaeger destination (ex localhost:6831)
+ address =
+ # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2)
+ always_included_tag =
+ # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
+ sampler_type = const
+ # jaeger samplerconfig param
+ # for "const" sampler, 0 or 1 for always false/true respectively
+ # for "probabilistic" sampler, a probability between 0 and 1
+ # for "rateLimiting" sampler, the number of spans per second
+ # for "remote" sampler, param is the same as for "probabilistic"
+ # and indicates the initial sampling rate before the actual one
+ # is received from the mothership
+ sampler_param = 1
+
+ #################################### External Image Storage ##############
+ [external_image_storage]
+ # You can choose between (s3, webdav, gcs)
+ provider =
+
+ [external_image_storage.s3]
+ bucket_url =
+ bucket =
+ region =
+ path =
+ access_key =
+ secret_key =
+
+ [external_image_storage.webdav]
+ url =
+ username =
+ password =
+ public_url =
+
+ [external_image_storage.gcs]
+ key_file =
+ bucket =
diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml
new file mode 100644
index 000000000..bc7b4b286
--- /dev/null
+++ b/roles/openshift_grafana/files/grafana-ocp.yml
@@ -0,0 +1,76 @@
+---
+kind: Template
+apiVersion: v1
+metadata:
+ name: grafana-ocp
+ annotations:
+ "openshift.io/display-name": Grafana ocp
+ description: |
+ Grafana server with patched Prometheus datasource.
+ iconClass: icon-cogs
+ tags: "metrics,monitoring,grafana,prometheus"
+parameters:
+- description: External URL for the grafana route
+ name: ROUTE_URL
+ value: ""
+- description: The namespace to instantiate heapster under. Defaults to 'grafana'.
+ name: NAMESPACE
+ value: grafana
+objects:
+- apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ spec:
+ host: "${ROUTE_URL}"
+ to:
+ name: grafana-ocp
+- apiVersion: v1
+ kind: Service
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ ports:
+ - port: 8082
+ protocol: TCP
+ targetPort: grafana-http
+- apiVersion: v1
+ kind: ReplicationController
+ metadata:
+ name: grafana-ocp
+ namespace: "${NAMESPACE}"
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ selector:
+ name: grafana-ocp
+ replicas: 1
+ template:
+ version: v1
+ metadata:
+ labels:
+ metrics-infra: grafana-ocp
+ name: grafana-ocp
+ spec:
+ volumes:
+ - name: data
+ emptyDir: {}
+ containers:
+ - image: "mrsiano/grafana-ocp:latest"
+ name: grafana-ocp
+ ports:
+ - name: grafana-http
+ containerPort: 3000
+ volumeMounts:
+ - name: data
+ mountPath: "/root/go/src/github.com/grafana/grafana/data"
+ command:
+ - "./bin/grafana-server"
diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
new file mode 100644
index 000000000..f59ca997f
--- /dev/null
+++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json
@@ -0,0 +1,5138 @@
+{
+ "dashboard": {
+ "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.",
+ "editable": true,
+ "gnetId": 315,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": "200px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "height": "200px",
+ "id": 32,
+ "legend": {
+ "alignAsTable": false,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "instant": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Received",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "Sent",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network I/O pressure",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Network I/O pressure",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster memory usage",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster CPU usage ",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "180px",
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Cluster filesystem usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 9,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "20%",
+ "prefix": "",
+ "prefixFontSize": "20%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": " cores",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 13,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Used",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "height": "1px",
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "",
+ "title": "Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Total usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "overall cpu usage",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cluster CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "% Usage",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers Cores Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "height": "",
+ "id": 23,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services CPU usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 411,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_cpu",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes Memory usage ",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes CPU usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 26,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ systemd_service_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System services memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "System services memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 27,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ id }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes memory usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes memory usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 30,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}",
+ "metric": "network",
+ "refId": "D",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
+ "metric": "network",
+ "refId": "C",
+ "step": 1
+ },
+ {
+ "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "E",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
+ "metric": "network",
+ "refId": "F",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Containers network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Containers network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 277,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ pod_name }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ pod_name }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pods network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pods network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": "500px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 29,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "instant": true,
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "-> {{ id }}",
+ "metric": "network",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "<- {{ id }}",
+ "metric": "network",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "All processes network I/O ",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "All processes network I/O",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 35,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total) by (phase,reason)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ phase }} | {{ reason }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_build_total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 54,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 55,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 56,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Failed\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds, regardless of the failure reason.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 57,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 58,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_build_total{phase=\"Complete\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the number of successfully completed builds.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 0,
+ "id": 59,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_build_total{phase=\"Failed\"} offset 5m",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ reason }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Builds",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_setup_latency_sum)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_setup_latency_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 41,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_teardown_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{ pod_name }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Top 10 pods doing the most receive network traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_pod_ips",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }} | {{ role }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_pod_ips",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 39,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 40,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "openshift_sdn_arp_cache_entries",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "openshift_sdn_arp_cache_entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift SDN",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ role }} | {{ instance }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_pleg_relist",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 51,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 52,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_timeout",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 53,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "kubelet_docker_operations_errors",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_type }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Kubelet",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 46,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(scrape_samples_scraped[2m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ kubernetes_name }} | {{ instance }} ",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "scrape_samples_scraped",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU per instance of Prometheus container.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Prometheus",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 48,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ verb }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 49,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ resource }} || {{ pod }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of non-mutating API requests being made to the control plane.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 74,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "endpoint_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": " quantile {{ quantile }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "endpoint_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "API Server",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 61,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "etcd_disk_wal_fsync_duration_seconds_count",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "etcd_disk_wal_fsync_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "etcd",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 62,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(changes(container_start_time_seconds[10m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "The number of containers that start or restart over the last ten minutes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Changes in your cluster",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 63,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of cores in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 64,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of consumed cores.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 65,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per node in the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 66,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumption per system service or container on the infrastructure nodes.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU consumed per namespace on the cluster.",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 47,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster CPU in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 69,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Percentage of total cluster memory in use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 70,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Aggregate CPU usage (seconds total) of etcd+docker",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "System and container CPU",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 71,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "volumes_queue_latency",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "volumes_queue_latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 72,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ request }}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "cloudprovider_aws_api_request_duration_seconds_count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PR}",
+ "fill": 1,
+ "id": 73,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "title": "Kubernetes Storage Metrics via Prometheus",
+ "type": "absolute",
+ "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)",
+ "format": "time_series",
+ "interval": "1s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "storage_operation_duration_seconds_sum",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OpenShift Volumes",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes",
+ "openshift"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PR}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "Node",
+ "options": [],
+ "query": "label_values(kubernetes_io_hostname)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "1s",
+ "2m",
+ "20s",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "openshift cluster monitoring",
+ "version": 6
+ }
+}
diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml
new file mode 100644
index 000000000..8dea6f197
--- /dev/null
+++ b/roles/openshift_grafana/meta/main.yml
@@ -0,0 +1,13 @@
+---
+galaxy_info:
+ author: Eldad Marciano
+ description: Setup grafana pod
+ company: Red Hat, Inc.
+ license: Apache License, Version 2.0
+ min_ansible_version: 2.3
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ categories:
+ - metrics
diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml
new file mode 100644
index 000000000..9d3c741ee
--- /dev/null
+++ b/roles/openshift_grafana/tasks/gf-permissions.yml
@@ -0,0 +1,12 @@
+---
+- name: Create gf user on htpasswd
+ command: htpasswd -c /etc/origin/master/htpasswd gfadmin
+
+- name: Make sure master config use HTPasswdPasswordIdentityProvider
+ command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml"
+
+- name: Grant permission for gfuser
+ command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin
+
+- name: Restart mater api
+ command: systemctl restart atomic-openshift-master-api.service
diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml
new file mode 100644
index 000000000..6a06d40a9
--- /dev/null
+++ b/roles/openshift_grafana/tasks/main.yml
@@ -0,0 +1,122 @@
+---
+- name: Create grafana namespace
+ oc_project:
+ state: present
+ name: grafana
+
+- name: Configure Grafana Permissions
+ include_tasks: tasks/gf-permissions.yml
+ when: gf_oauth | default(false) | bool == true
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml
+ register:
+ cl_file: /tmp/grafana-ocp.yaml
+ when: gf_oauth | default(false) | bool == false
+
+# TODO: we should grab this yaml file from openshift/origin
+- name: Templatize grafana yaml
+ template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml
+ register:
+ cl_file: /tmp/grafana-ocp-oauth.yaml
+ when: gf_oauth | default(false) | bool == true
+
+- name: Process the grafana file
+ oc_process:
+ namespace: grafana
+ template_name: "{{ cl_file }}"
+ create: True
+ when: gf_oauth | default(false) | bool == true
+
+- name: Wait to grafana be running
+ command: oc rollout status deployment/grafana-ocp
+
+- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }}
+ oc_adm_policy_user:
+ user: grafana-ocp
+ resource_kind: cluster-role
+ resource_name: view
+ state: present
+ role_namespace: "{{ gf_prometheus_namespace }}"
+
+- name: Get grafana route
+ oc_obj:
+ kind: route
+ name: grafana
+ namespace: grafana
+ register: route
+
+- name: Get prometheus route
+ oc_obj:
+ kind: route
+ name: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: route
+
+- name: Get the prometheus SA
+ oc_serviceaccount_secret:
+ state: list
+ service_account: prometheus
+ namespace: "{{ gf_prometheus_namespace }}"
+ register: sa
+
+- name: Get the management SA bearer token
+ set_fact:
+ management_token: "{{ sa.results | oo_filter_sa_secrets }}"
+
+- name: Ensure the SA bearer token value is read
+ oc_secret:
+ state: list
+ name: "{{ management_token }}"
+ namespace: "{{ gf_prometheus_namespace }}"
+ no_log: True
+ register: sa_secret
+
+- name: Get the SA bearer token for prometheus
+ set_fact:
+ token: "{{ sa_secret.results.encoded.token }}"
+
+- name: Convert to json
+ var:
+ ds_json: "{{ gf_body_tmp }} | to_json }}"
+
+- name: Set protocol type
+ var:
+ protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}"
+
+- name: Add gf datasrouce
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/datasources"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex setup ds name
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes
+
+- name: Add new dashboard
+ uri:
+ url: "{{ protocol }}://{{ route }}/api/dashboards/db"
+ user: admin
+ password: admin
+ method: POST
+ body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ headers:
+ Content-Type: "Content-Type: application/json"
+ register: add_ds
+
+- name: Regex json tear down
+ replace:
+ path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}"
+ regexp: '${DS_PR}'
+ replace: '{{ gf_datasource_name }}'
+ backup: yes
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 7afb8f730..ac6ffbbad 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -56,7 +56,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# ordered list of registries (according to inventory vars) that docker will try for unscoped images
regs = self.ensure_list("openshift_docker_additional_registries")
# currently one of these registries is added whether the user wants it or not.
- deployment_type = self.get_var("openshift_deployment_type")
+ deployment_type = self.get_var("openshift_deployment_type", default="")
if deployment_type == "origin" and "docker.io" not in regs:
regs.append("docker.io")
elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs:
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
index 986a01f38..7f8c6ebdc 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -170,7 +170,7 @@ class Elasticsearch(LoggingCheck):
"""
errors = []
for pod_name in pods_by_name.keys():
- df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
+ df_cmd = '-c elasticsearch exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json')
lines = disk_output.splitlines()
# expecting one header looking like 'IUse% Use%' and one body line
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index 3b1cf8baa..16ec3a7f6 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -5,12 +5,11 @@ Module for performing checks on a Kibana logging deployment
import json
import ssl
-try:
- from urllib2 import HTTPError, URLError
- import urllib2
-except ImportError:
- from urllib.error import HTTPError, URLError
- import urllib.request as urllib2
+# pylint can't find the package when its installed in virtualenv
+# pylint: disable=import-error,no-name-in-module
+from ansible.module_utils.six.moves.urllib import request
+# pylint: disable=import-error,no-name-in-module
+from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError
from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
@@ -65,7 +64,7 @@ class Kibana(LoggingCheck):
# Verify that the url is returning a valid response
try:
# We only care if the url connects and responds
- return_code = urllib2.urlopen(url, context=ctx).getcode()
+ return_code = request.urlopen(url, context=ctx).getcode()
except HTTPError as httperr:
return httperr.reason
except URLError as urlerr:
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 04a5e89c4..750d4b9e9 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -1,12 +1,10 @@
import pytest
import json
-try:
- import urllib2
- from urllib2 import HTTPError, URLError
-except ImportError:
- from urllib.error import HTTPError, URLError
- import urllib.request as urllib2
+# pylint can't find the package when its installed in virtualenv
+from ansible.module_utils.six.moves.urllib import request # pylint: disable=import-error
+# pylint: disable=import-error
+from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError
from openshift_checks.logging.kibana import Kibana, OpenShiftCheckException
@@ -202,7 +200,7 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch):
if type(lib_result) is int:
return _http_return(lib_result)
raise lib_result
- monkeypatch.setattr(urllib2, 'urlopen', urlopen)
+ monkeypatch.setattr(request, 'urlopen', urlopen)
check = Kibana()
check._get_kibana_url = lambda: 'url'
diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml
index ff62b6136..f82e55b98 100644
--- a/roles/openshift_logging/tasks/install_logging.yaml
+++ b/roles/openshift_logging/tasks/install_logging.yaml
@@ -87,7 +87,7 @@
openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_nodeselector if outer_item.0.nodeSelector | default(None) is none else outer_item.0.nodeSelector }}"
openshift_logging_elasticsearch_storage_group: "{{ [openshift_logging_es_storage_group] if outer_item.0.storageGroups | default([]) | length == 0 else outer_item.0.storageGroups }}"
_es_containers: "{{ outer_item.0.containers}}"
@@ -114,7 +114,7 @@
openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}"
with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }}
loop_control:
@@ -151,7 +151,7 @@
openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}"
openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}"
@@ -193,7 +193,7 @@
openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}"
openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}"
openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}"
- openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}"
+ openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}"
openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}"
openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}"
openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}"
diff --git a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
index 9182bddb2..16de6f252 100644
--- a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml
@@ -1,6 +1,6 @@
---
- command: >
- oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Getting ES version for logging-es cluster"
@@ -10,7 +10,7 @@
when: _cluster_pods.stdout_lines | count > 0
- command: >
- oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _ops_cluster_pods
- name: "Getting ES version for logging-es-ops cluster"
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
index d55beec86..6bce13d1d 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
@@ -19,7 +19,7 @@
## get all pods for the cluster
- command: >
- oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
@@ -64,7 +64,7 @@
## we may need a new first pod to run against -- fetch them all again
- command: >
- oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
diff --git a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml
index 96079884e..4564f33dd 100644
--- a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml
+++ b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml
@@ -8,8 +8,10 @@
# TODO: If the sdn package isn't already installed this will install it, we
# should fix that
-- name: Upgrade master packages
- package: name={{ master_pkgs | join(',') }} state=present
+- name: Upgrade master packages - yum
+ command:
+ yum install -y {{ master_pkgs | join(' ') }} \
+ {{ ' --exclude *' ~ openshift_service_type ~ '*3.9*' if openshift_release | version_compare('3.9','<') else '' }}
vars:
master_pkgs:
- "{{ openshift_service_type }}{{ openshift_pkg_version | default('') }}"
@@ -19,3 +21,19 @@
- "{{ openshift_service_type }}-clients{{ openshift_pkg_version | default('') }}"
register: result
until: result is succeeded
+ when: ansible_pkg_mgr == 'yum'
+
+- name: Upgrade master packages - dnf
+ dnf:
+ name: "{{ master_pkgs | join(',') }}"
+ state: present
+ vars:
+ master_pkgs:
+ - "{{ openshift_service_type }}{{ openshift_pkg_version }}"
+ - "{{ openshift_service_type }}-master{{ openshift_pkg_version }}"
+ - "{{ openshift_service_type }}-node{{ openshift_pkg_version }}"
+ - "{{ openshift_service_type }}-sdn-ovs{{ openshift_pkg_version }}"
+ - "{{ openshift_service_type }}-clients{{ openshift_pkg_version }}"
+ register: result
+ until: result is succeeded
+ when: ansible_pkg_mgr == 'dnf'
diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml
index 8ccfb7192..057963c1a 100644
--- a/roles/openshift_metrics/tasks/oc_apply.yaml
+++ b/roles/openshift_metrics/tasks/oc_apply.yaml
@@ -16,7 +16,9 @@
apply -f {{ file_name }}
-n {{namespace}}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: no
- name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}}
@@ -28,5 +30,7 @@
register: version_changed
vars:
init_version: "{{ (generation_init is defined) | ternary(generation_init.stdout, '0') }}"
- failed_when: "'error' in version_changed.stderr"
+ failed_when:
+ - "'error' in version_changed.stderr"
+ - "version_changed.rc != 0"
changed_when: version_changed.stdout | int > init_version | int
diff --git a/roles/openshift_persistent_volumes/tasks/pv.yml b/roles/openshift_persistent_volumes/tasks/pv.yml
index ef9ab7f5f..865269b7a 100644
--- a/roles/openshift_persistent_volumes/tasks/pv.yml
+++ b/roles/openshift_persistent_volumes/tasks/pv.yml
@@ -13,5 +13,5 @@
--config={{ mktemp.stdout }}/admin.kubeconfig
register: pv_create_output
when: persistent_volumes | length > 0
- failed_when: ('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout)
+ failed_when: "('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) and pv_create_output.rc != 0"
changed_when: ('created' in pv_create_output.stdout)
diff --git a/roles/openshift_persistent_volumes/tasks/pvc.yml b/roles/openshift_persistent_volumes/tasks/pvc.yml
index 2c5519192..6c12d128c 100644
--- a/roles/openshift_persistent_volumes/tasks/pvc.yml
+++ b/roles/openshift_persistent_volumes/tasks/pvc.yml
@@ -13,5 +13,5 @@
--config={{ mktemp.stdout }}/admin.kubeconfig
register: pvc_create_output
when: persistent_volume_claims | length > 0
- failed_when: ('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout)
+ failed_when: "('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) and pvc_create_output.rc != 0"
changed_when: ('created' in pvc_create_output.stdout)
diff --git a/roles/openshift_provisioners/tasks/oc_apply.yaml b/roles/openshift_provisioners/tasks/oc_apply.yaml
index a4ce53eae..239e1f1cc 100644
--- a/roles/openshift_provisioners/tasks/oc_apply.yaml
+++ b/roles/openshift_provisioners/tasks/oc_apply.yaml
@@ -15,7 +15,9 @@
apply -f {{ file_name }}
-n {{ namespace }}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: no
- name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}}
@@ -36,7 +38,9 @@
delete -f {{ file_name }}
-n {{ namespace }}
register: generation_delete
- failed_when: "'error' in generation_delete.stderr"
+ failed_when:
+ - "'error' in generation_delete.stderr"
+ - "generation_delete.rc != 0"
changed_when: generation_delete.rc == 0
when: generation_apply.rc != 0
@@ -46,6 +50,8 @@
apply -f {{ file_name }}
-n {{ namespace }}
register: generation_apply
- failed_when: "'error' in generation_apply.stderr"
+ failed_when:
+ - "'error' in generation_apply.stderr"
+ - "generation_apply.rc != 0"
changed_when: generation_apply.rc == 0
when: generation_apply.rc != 0
diff --git a/roles/openshift_version/tasks/check_available_rpms.yml b/roles/openshift_version/tasks/check_available_rpms.yml
index bdbc63d27..fea0daf77 100644
--- a/roles/openshift_version/tasks/check_available_rpms.yml
+++ b/roles/openshift_version/tasks/check_available_rpms.yml
@@ -1,7 +1,7 @@
---
- name: Get available {{ openshift_service_type}} version
repoquery:
- name: "{{ openshift_service_type}}"
+ name: "{{ openshift_service_type}}{{ '-' ~ openshift_release ~ '*' if openshift_release is defined else '' }}"
ignore_excluders: true
register: rpm_results
diff --git a/roles/openshift_version/tasks/first_master_containerized_version.yml b/roles/openshift_version/tasks/first_master_containerized_version.yml
index e02a75eab..3ed1d2cfe 100644
--- a/roles/openshift_version/tasks/first_master_containerized_version.yml
+++ b/roles/openshift_version/tasks/first_master_containerized_version.yml
@@ -7,6 +7,7 @@
when:
- openshift_image_tag is defined
- openshift_version is not defined
+ - not (openshift_version_reinit | default(false))
- name: Set containerized version to configure if openshift_release specified
set_fact:
@@ -20,7 +21,7 @@
docker run --rm {{ openshift_cli_image }}:latest version
register: cli_image_version
when:
- - openshift_version is not defined
+ - openshift_version is not defined or openshift_version_reinit | default(false)
- not openshift_use_crio_only
# Origin latest = pre-release version (i.e. v1.3.0-alpha.1-321-gb095e3a)
@@ -34,7 +35,7 @@
- set_fact:
openshift_version: "{{ cli_image_version.stdout_lines[0].split(' ')[1].split('-')[0][1:] }}"
- when: openshift_version is not defined
+ when: openshift_version is not defined or openshift_version_reinit | default(false)
# If we got an openshift_version like "3.2", lookup the latest 3.2 container version
# and use that value instead.
diff --git a/roles/openshift_version/tasks/first_master_rpm_version.yml b/roles/openshift_version/tasks/first_master_rpm_version.yml
index 264baca65..5d92f90c6 100644
--- a/roles/openshift_version/tasks/first_master_rpm_version.yml
+++ b/roles/openshift_version/tasks/first_master_rpm_version.yml
@@ -6,6 +6,7 @@
when:
- openshift_pkg_version is defined
- openshift_version is not defined
+ - not (openshift_version_reinit | default(false))
# These tasks should only be run against masters and nodes
- name: Set openshift_version for rpm installation
@@ -13,4 +14,7 @@
- set_fact:
openshift_version: "{{ rpm_results.results.versions.available_versions.0 }}"
- when: openshift_version is not defined
+ when: openshift_version is not defined or ( openshift_version_reinit | default(false) )
+- set_fact:
+ openshift_pkg_version: "-{{ rpm_results.results.versions.available_versions.0 }}"
+ when: openshift_version_reinit | default(false)
diff --git a/roles/openshift_version/tasks/masters_and_nodes.yml b/roles/openshift_version/tasks/masters_and_nodes.yml
index fbeb22d8b..eddd5ff42 100644
--- a/roles/openshift_version/tasks/masters_and_nodes.yml
+++ b/roles/openshift_version/tasks/masters_and_nodes.yml
@@ -6,9 +6,12 @@
include_tasks: check_available_rpms.yml
- name: Fail if rpm version and docker image version are different
fail:
- msg: "OCP rpm version {{ openshift_rpm_version }} is different from OCP image version {{ openshift_version }}"
+ msg: "OCP rpm version {{ rpm_results.results.versions.available_versions.0 }} is different from OCP image version {{ openshift_version }}"
# Both versions have the same string representation
- when: rpm_results.results.versions.available_versions.0 != openshift_version
+ when:
+ - openshift_version not in rpm_results.results.versions.available_versions.0
+ - openshift_version_reinit | default(false)
+
# block when
when: not openshift_is_atomic | bool
diff --git a/roles/template_service_broker/vars/default_images.yml b/roles/template_service_broker/vars/default_images.yml
index 662d65d9f..dc164a4db 100644
--- a/roles/template_service_broker/vars/default_images.yml
+++ b/roles/template_service_broker/vars/default_images.yml
@@ -1,4 +1,4 @@
---
-__template_service_broker_prefix: "docker.io/openshift/"
+__template_service_broker_prefix: "docker.io/openshift/origin-"
__template_service_broker_version: "latest"
-__template_service_broker_image_name: "origin-template-service-broker"
+__template_service_broker_image_name: "template-service-broker"
diff --git a/roles/template_service_broker/vars/openshift-enterprise.yml b/roles/template_service_broker/vars/openshift-enterprise.yml
index 16a08e72f..b65b97691 100644
--- a/roles/template_service_broker/vars/openshift-enterprise.yml
+++ b/roles/template_service_broker/vars/openshift-enterprise.yml
@@ -1,4 +1,4 @@
---
-__template_service_broker_prefix: "registry.access.redhat.com/openshift3/"
+__template_service_broker_prefix: "registry.access.redhat.com/openshift3/ose-"
__template_service_broker_version: "v3.7"
-__template_service_broker_image_name: "ose-template-service-broker"
+__template_service_broker_image_name: "template-service-broker"