diff options
Diffstat (limited to 'roles')
32 files changed, 6185 insertions, 74 deletions
diff --git a/roles/calico_master/tasks/main.yml b/roles/calico_master/tasks/main.yml index 05415a4d6..834ebba64 100644 --- a/roles/calico_master/tasks/main.yml +++ b/roles/calico_master/tasks/main.yml @@ -23,7 +23,7 @@ -f {{ mktemp.stdout }}/calico-policy-controller.yml --config={{ openshift.common.config_base }}/master/admin.kubeconfig register: calico_create_output - failed_when: ('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) + failed_when: "('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) and calico_create_output.rc != 0" changed_when: ('created' in calico_create_output.stdout) - name: Calico Master | Delete temp directory diff --git a/roles/lib_utils/filter_plugins/oo_filters.py b/roles/lib_utils/filter_plugins/oo_filters.py index fc14b5633..9f73510c4 100644 --- a/roles/lib_utils/filter_plugins/oo_filters.py +++ b/roles/lib_utils/filter_plugins/oo_filters.py @@ -21,13 +21,10 @@ import yaml from ansible import errors from ansible.parsing.yaml.dumper import AnsibleDumper -# ansible.compat.six goes away with Ansible 2.4 -try: - from ansible.compat.six import string_types, u - from ansible.compat.six.moves.urllib.parse import urlparse -except ImportError: - from ansible.module_utils.six import string_types, u - from ansible.module_utils.six.moves.urllib.parse import urlparse +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six import string_types, u +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six.moves.urllib.parse import urlparse HAS_OPENSSL = False try: diff --git a/roles/lib_utils/filter_plugins/openshift_master.py b/roles/lib_utils/filter_plugins/openshift_master.py index ff15f693b..e67b19c28 100644 --- a/roles/lib_utils/filter_plugins/openshift_master.py +++ b/roles/lib_utils/filter_plugins/openshift_master.py @@ -10,11 +10,7 @@ from ansible import errors from ansible.parsing.yaml.dumper import AnsibleDumper from ansible.plugins.filter.core import to_bool as ansible_bool -# ansible.compat.six goes away with Ansible 2.4 -try: - from ansible.compat.six import string_types, u -except ImportError: - from ansible.module_utils.six import string_types, u +from ansible.module_utils.six import string_types, u import yaml diff --git a/roles/openshift_aws/tasks/provision_nodes.yml b/roles/openshift_aws/tasks/provision_nodes.yml index d82f18574..9105b5b4c 100644 --- a/roles/openshift_aws/tasks/provision_nodes.yml +++ b/roles/openshift_aws/tasks/provision_nodes.yml @@ -2,25 +2,12 @@ # Get bootstrap config token # bootstrap should be created on first master # need to fetch it and shove it into cloud data -- name: fetch master instances - ec2_instance_facts: - region: "{{ openshift_aws_region }}" - filters: - "tag:clusterid": "{{ openshift_aws_clusterid }}" - "tag:host-type": master - instance-state-name: running - register: instancesout - retries: 20 - delay: 3 - until: - - "'instances' in instancesout" - - instancesout.instances|length > 0 +- include_tasks: setup_master_group.yml - name: slurp down the bootstrap.kubeconfig slurp: src: /etc/origin/master/bootstrap.kubeconfig - delegate_to: "{{ instancesout.instances[0].public_ip_address }}" - remote_user: root + delegate_to: "{{ groups.masters.0 }}" register: bootstrap - name: set_fact for kubeconfig token diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml index b94cd9fba..9c8534c74 100644 --- a/roles/openshift_ca/tasks/main.yml +++ b/roles/openshift_ca/tasks/main.yml @@ -19,7 +19,8 @@ - name: Reload generated facts openshift_facts: - when: hostvars[openshift_ca_host].install_result is changed + when: + - hostvars[openshift_ca_host].install_result | default({'changed':false}) is changed - name: Create openshift_ca_config_dir if it does not exist file: diff --git a/roles/openshift_examples/meta/main.yml b/roles/openshift_examples/meta/main.yml index 1a34c85fc..9f46a4683 100644 --- a/roles/openshift_examples/meta/main.yml +++ b/roles/openshift_examples/meta/main.yml @@ -13,3 +13,4 @@ galaxy_info: - cloud dependencies: - role: lib_utils +- role: openshift_facts diff --git a/roles/openshift_excluder/tasks/verify_excluder.yml b/roles/openshift_excluder/tasks/verify_excluder.yml index 4f5277fa2..22a3fcd3b 100644 --- a/roles/openshift_excluder/tasks/verify_excluder.yml +++ b/roles/openshift_excluder/tasks/verify_excluder.yml @@ -3,7 +3,7 @@ # - excluder - name: Get available excluder version repoquery: - name: "{{ excluder }}" + name: "{{ excluder }}{{ '-' ~ r_openshift_excluder_upgrade_target.split('.')[0:2] | join('.') ~ '*' if r_openshift_excluder_upgrade_target is defined else '' }}" ignore_excluders: true register: repoquery_out diff --git a/roles/openshift_expand_partition/tasks/main.yml b/roles/openshift_expand_partition/tasks/main.yml index 5ae863871..b38ebdfb4 100644 --- a/roles/openshift_expand_partition/tasks/main.yml +++ b/roles/openshift_expand_partition/tasks/main.yml @@ -8,7 +8,7 @@ - name: Determine if growpart is installed command: "rpm -q cloud-utils-growpart" register: has_growpart - failed_when: has_growpart.cr != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout + failed_when: has_growpart.rc != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout changed_when: false when: openshift_is_containerized | bool diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml new file mode 100644 index 000000000..7fd7a085d --- /dev/null +++ b/roles/openshift_grafana/defaults/main.yml @@ -0,0 +1,12 @@ +--- +gf_body_tmp: + name: grafana_name + type: prometheus + typeLogoUrl: '' + access: proxy + url: prometheus_url + basicAuth: false + withCredentials: false + jsonData: + tlsSkipVerify: true + token: satoken diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml new file mode 100644 index 000000000..82fa89004 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml @@ -0,0 +1,661 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: The location of the proxy image + name: IMAGE_GF + value: mrsiano/grafana-ocp:latest +- description: The location of the proxy image + name: IMAGE_PROXY + value: openshift/oauth-proxy:v1.0.0 +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" +objects: +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}' +- apiVersion: authorization.openshift.io/v1 + kind: ClusterRoleBinding + metadata: + name: gf-cluster-reader + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: grafana-ocp + namespace: "${NAMESPACE}" +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp + tls: + termination: Reencrypt +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: gf-tls + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + ports: + - name: grafana-ocp + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app: grafana-ocp +- apiVersion: v1 + kind: Secret + metadata: + name: gf-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" +# Deploy Prometheus behind an oauth proxy +- apiVersion: extensions/v1beta1 + kind: Deployment + metadata: + labels: + app: grafana-ocp + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + replicas: 1 + selector: + matchLabels: + app: grafana-ocp + template: + metadata: + labels: + app: grafana-ocp + name: grafana-ocp-app + spec: + serviceAccountName: grafana-ocp + containers: + - name: oauth-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp + - -upstream=http://localhost:3000 + - -provider=openshift +# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}' + - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards + volumeMounts: + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + + - name: grafana-ocp + image: ${IMAGE_GF} + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - mountPath: "/root/go/src/github.com/grafana/grafana/data" + name: gf-data + - mountPath: "/root/go/src/github.com/grafana/grafana/conf" + name: gfconfig + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + command: + - "./bin/grafana-server" + + volumes: + - name: gfconfig + configMap: + name: gf-config + - name: secrets + secret: + secretName: gf-proxy + - name: gf-tls + secret: + secretName: gf-tls + - emptyDir: {} + name: gf-data +- apiVersion: v1 + kind: ConfigMap + metadata: + name: gf-config + namespace: "${NAMESPACE}" + data: + defaults.ini: |- + ##################### Grafana Configuration Defaults ##################### + # + # Do not modify this file in grafana installs + # + + # possible values : production, development + app_mode = production + + # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty + instance_name = ${HOSTNAME} + + #################################### Paths ############################### + [paths] + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + # + data = data + # + # Directory where grafana can store logs + # + logs = data/log + # + # Directory where grafana will automatically scan and look for plugins + # + plugins = data/plugins + + #################################### Server ############################## + [server] + # Protocol (http, https, socket) + protocol = http + + # The ip address to bind to, empty will bind to all interfaces + http_addr = + + # The http port to use + http_port = 3000 + + # The public facing domain name used to access grafana from a browser + domain = localhost + + # Redirect to correct domain if host header does not match domain + # Prevents DNS rebinding attacks + enforce_domain = false + + # The full public facing url + root_url = %(protocol)s://%(domain)s:%(http_port)s/ + + # Log web requests + router_logging = false + + # the path relative working path + static_root_path = public + + # enable gzip + enable_gzip = false + + # https certs & key file + cert_file = /etc/tls/private/tls.crt + cert_key = /etc/tls/private/tls.key + + # Unix socket path + socket = /tmp/grafana.sock + + #################################### Database ############################ + [database] + # You can configure the database connection by specifying type, host, name, user and password + # as separate properties or as on string using the url property. + + # Either "mysql", "postgres" or "sqlite3", it's your choice + type = sqlite3 + host = 127.0.0.1:3306 + name = grafana + user = root + # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" + password = + # Use either URL or the previous fields to configure the database + # Example: mysql://user:secret@host:port/database + url = + + # Max idle conn setting default is 2 + max_idle_conn = 2 + + # Max conn setting default is 0 (mean not set) + max_open_conn = + + # For "postgres", use either "disable", "require" or "verify-full" + # For "mysql", use either "true", "false", or "skip-verify". + ssl_mode = disable + + ca_cert_path = + client_key_path = + client_cert_path = + server_cert_name = + + # For "sqlite3" only, path relative to data_path setting + path = grafana.db + + #################################### Session ############################# + [session] + # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file" + provider = file + + # Provider config options + # memory: not have any config yet + # file: session dir path, is relative to grafana data_path + # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` + # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable + # mysql: go-sql-driver/mysql dsn config string, examples: + # `user:password@tcp(127.0.0.1:3306)/database_name` + # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name` + # memcache: 127.0.0.1:11211 + + + provider_config = sessions + + # Session cookie name + cookie_name = grafana_sess + + # If you use session in https only, default is false + cookie_secure = false + + # Session life time, default is 86400 + session_life_time = 86400 + gc_interval_time = 86400 + + #################################### Data proxy ########################### + [dataproxy] + + # This enables data proxy logging, default is false + logging = false + + #################################### Analytics ########################### + [analytics] + # Server reporting, sends usage counters to stats.grafana.org every 24 hours. + # No ip addresses are being tracked, only simple counters to track + # running instances, dashboard and error counts. It is very helpful to us. + # Change this option to false to disable reporting. + reporting_enabled = true + + # Set to false to disable all checks to https://grafana.com + # for new versions (grafana itself and plugins), check is used + # in some UI views to notify that grafana or plugin update exists + # This option does not cause any auto updates, nor send any information + # only a GET request to https://grafana.com to get latest versions + check_for_updates = true + + # Google Analytics universal tracking code, only enabled if you specify an id here + google_analytics_ua_id = + + # Google Tag Manager ID, only enabled if you specify an id here + google_tag_manager_id = + + #################################### Security ############################ + [security] + # default admin user, created on startup + admin_user = admin + + # default admin password, can be changed before first start of grafana, or in profile settings + admin_password = admin + + # used for signing + secret_key = SW2YcwTIb9zpOOhoPsMm + + # Auto-login remember days + login_remember_days = 7 + cookie_username = grafana_user + cookie_remember_name = grafana_remember + + # disable gravatar profile images + disable_gravatar = false + + # data source proxy whitelist (ip_or_domain:port separated by spaces) + data_source_proxy_whitelist = + + [snapshots] + # snapshot sharing options + external_enabled = true + external_snapshot_url = https://snapshots-origin.raintank.io + external_snapshot_name = Publish to snapshot.raintank.io + + # remove expired snapshot + snapshot_remove_expired = true + + # remove snapshots after 90 days + snapshot_TTL_days = 90 + + #################################### Users #################################### + [users] + # disable user signup / registration + allow_sign_up = true + + # Allow non admin users to create organizations + allow_org_create = true + + # Set to true to automatically assign new users to the default organization (id 1) + auto_assign_org = true + + # Default role new users will be automatically assigned (if auto_assign_org above is set to true) + auto_assign_org_role = Admin + + # Require email validation before sign up completes + verify_email_enabled = false + + # Background text for the user field on the login page + login_hint = email or username + + # Default UI theme ("dark" or "light") + default_theme = dark + + # External user management + external_manage_link_url = + external_manage_link_name = + external_manage_info = + + [auth] + # Set to true to disable (hide) the login form, useful if you use OAuth + disable_login_form = true + + # Set to true to disable the signout link in the side menu. useful if you use auth.proxy + disable_signout_menu = true + + #################################### Anonymous Auth ###################### + [auth.anonymous] + # enable anonymous access + enabled = true + + # specify organization name that should be used for unauthenticated users + org_name = Main Org. + + # specify role for unauthenticated users + org_role = Admin + + #################################### Github Auth ######################### + [auth.github] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = https://github.com/login/oauth/authorize + token_url = https://github.com/login/oauth/access_token + api_url = https://api.github.com/user + team_ids = + allowed_organizations = + + #################################### Google Auth ######################### + [auth.google] + enabled = false + allow_sign_up = true + client_id = some_client_id + client_secret = some_client_secret + scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email + auth_url = https://accounts.google.com/o/oauth2/auth + token_url = https://accounts.google.com/o/oauth2/token + api_url = https://www.googleapis.com/oauth2/v1/userinfo + allowed_domains = + hosted_domain = + + #################################### Grafana.com Auth #################### + # legacy key names (so they work in env variables) + [auth.grafananet] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + [auth.grafana_com] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + #################################### Generic OAuth ####################### + [auth.generic_oauth] + name = OAuth + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = + token_url = + api_url = + team_ids = + allowed_organizations = + + #################################### Basic Auth ########################## + [auth.basic] + enabled = false + + #################################### Auth Proxy ########################## + [auth.proxy] + enabled = true + header_name = X-WEBAUTH-USER + header_property = username + auto_sign_up = true + ldap_sync_ttl = 60 + whitelist = + + #################################### Auth LDAP ########################### + [auth.ldap] + enabled = false + config_file = /etc/grafana/ldap.toml + allow_sign_up = true + + #################################### SMTP / Emailing ##################### + [smtp] + enabled = false + host = localhost:25 + user = + # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" + password = + cert_file = + key_file = + skip_verify = false + from_address = admin@grafana.localhost + from_name = Grafana + ehlo_identity = + + [emails] + welcome_email_on_sign_up = false + templates_pattern = emails/*.html + + #################################### Logging ########################## + [log] + # Either "console", "file", "syslog". Default is console and file + # Use space to separate multiple modes, e.g. "console file" + mode = console file + + # Either "debug", "info", "warn", "error", "critical", default is "info" + level = error + + # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug + filters = + + # For "console" mode only + [log.console] + level = + + # log line format, valid options are text, console and json + format = console + + # For "file" mode only + [log.file] + level = + + # log line format, valid options are text, console and json + format = text + + # This enables automated log rotate(switch of following options), default is true + log_rotate = true + + # Max line number of single file, default is 1000000 + max_lines = 1000000 + + # Max size shift of single file, default is 28 means 1 << 28, 256MB + max_size_shift = 28 + + # Segment log daily, default is true + daily_rotate = true + + # Expired days of log file(delete after max days), default is 7 + max_days = 7 + + [log.syslog] + level = + + # log line format, valid options are text, console and json + format = text + + # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. + network = + address = + + # Syslog facility. user, daemon and local0 through local7 are valid. + facility = + + # Syslog tag. By default, the process' argv[0] is used. + tag = + + + #################################### AMQP Event Publisher ################ + [event_publisher] + enabled = false + rabbitmq_url = amqp://localhost/ + exchange = grafana_events + + #################################### Dashboard JSON files ################ + [dashboards.json] + enabled = false + path = /var/lib/grafana/dashboards + + #################################### Usage Quotas ######################## + [quota] + enabled = false + + #### set quotas to -1 to make unlimited. #### + # limit number of users per Org. + org_user = 10 + + # limit number of dashboards per Org. + org_dashboard = 100 + + # limit number of data_sources per Org. + org_data_source = 10 + + # limit number of api_keys per Org. + org_api_key = 10 + + # limit number of orgs a user can create. + user_org = 10 + + # Global limit of users. + global_user = -1 + + # global limit of orgs. + global_org = -1 + + # global limit of dashboards + global_dashboard = -1 + + # global limit of api_keys + global_api_key = -1 + + # global limit on number of logged in users. + global_session = -1 + + #################################### Alerting ############################ + [alerting] + # Disable alerting engine & UI features + enabled = true + # Makes it possible to turn off alert rule execution but alerting UI is visible + execute_alerts = true + + #################################### Internal Grafana Metrics ############ + # Metrics available at HTTP API Url /api/metrics + [metrics] + enabled = true + interval_seconds = 10 + + # Send internal Grafana metrics to graphite + [metrics.graphite] + # Enable by setting the address setting (ex localhost:2003) + address = + prefix = prod.grafana.%(instance_name)s. + + [grafana_net] + url = https://grafana.com + + [grafana_com] + url = https://grafana.com + + #################################### Distributed tracing ############ + [tracing.jaeger] + # jaeger destination (ex localhost:6831) + address = + # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) + always_included_tag = + # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote + sampler_type = const + # jaeger samplerconfig param + # for "const" sampler, 0 or 1 for always false/true respectively + # for "probabilistic" sampler, a probability between 0 and 1 + # for "rateLimiting" sampler, the number of spans per second + # for "remote" sampler, param is the same as for "probabilistic" + # and indicates the initial sampling rate before the actual one + # is received from the mothership + sampler_param = 1 + + #################################### External Image Storage ############## + [external_image_storage] + # You can choose between (s3, webdav, gcs) + provider = + + [external_image_storage.s3] + bucket_url = + bucket = + region = + path = + access_key = + secret_key = + + [external_image_storage.webdav] + url = + username = + password = + public_url = + + [external_image_storage.gcs] + key_file = + bucket = diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml new file mode 100644 index 000000000..bc7b4b286 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp.yml @@ -0,0 +1,76 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + ports: + - port: 8082 + protocol: TCP + targetPort: grafana-http +- apiVersion: v1 + kind: ReplicationController + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + replicas: 1 + template: + version: v1 + metadata: + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + volumes: + - name: data + emptyDir: {} + containers: + - image: "mrsiano/grafana-ocp:latest" + name: grafana-ocp + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - name: data + mountPath: "/root/go/src/github.com/grafana/grafana/data" + command: + - "./bin/grafana-server" diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json new file mode 100644 index 000000000..f59ca997f --- /dev/null +++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json @@ -0,0 +1,5138 @@ +{ + "dashboard": { + "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", + "editable": true, + "gnetId": 315, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "height": "200px", + "id": 32, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Network I/O pressure", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster memory usage", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster CPU usage ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Total usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "overall cpu usage", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Usage", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Cores Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 411, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes Memory usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ systemd_service_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "B", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "D", + "step": 1 + }, + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "C", + "step": 1 + }, + { + "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "E", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "F", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 277, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ pod_name }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ pod_name }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "instant": true, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ id }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ id }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total) by (phase,reason)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ phase }} | {{ reason }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_build_total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 54, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 55, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Failed\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds, regardless of the failure reason.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 57, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 58, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Complete\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of successfully completed builds.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 0, + "id": 59, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\"} offset 5m", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Builds", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_setup_latency_sum)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_setup_latency_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_teardown_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top 10 pods doing the most receive network traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_pod_ips", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }} | {{ role }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_ips", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift SDN", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_pleg_relist", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 51, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 52, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_timeout", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_errors", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Kubelet", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(scrape_samples_scraped[2m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ kubernetes_name }} | {{ instance }} ", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "scrape_samples_scraped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU per instance of Prometheus container.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ verb }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ pod }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of non-mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "endpoint_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " quantile {{ quantile }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "endpoint_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "API Server", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_disk_wal_fsync_duration_seconds_count", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "etcd_disk_wal_fsync_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "etcd", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 62, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(changes(container_start_time_seconds[10m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "The number of containers that start or restart over the last ten minutes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Changes in your cluster", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of cores in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of consumed cores.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per node in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumption per system service or container on the infrastructure nodes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per namespace on the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster CPU in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster memory in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate CPU usage (seconds total) of etcd+docker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "System and container CPU", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 71, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "volumes_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "volumes_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ request }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "cloudprovider_aws_api_request_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "storage_operation_duration_seconds_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Volumes", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes", + "openshift" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PR}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Node", + "options": [], + "query": "label_values(kubernetes_io_hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "1s", + "2m", + "20s", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "openshift cluster monitoring", + "version": 6 + } +} diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml new file mode 100644 index 000000000..8dea6f197 --- /dev/null +++ b/roles/openshift_grafana/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Eldad Marciano + description: Setup grafana pod + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.3 + platforms: + - name: EL + versions: + - 7 + categories: + - metrics diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml new file mode 100644 index 000000000..9d3c741ee --- /dev/null +++ b/roles/openshift_grafana/tasks/gf-permissions.yml @@ -0,0 +1,12 @@ +--- +- name: Create gf user on htpasswd + command: htpasswd -c /etc/origin/master/htpasswd gfadmin + +- name: Make sure master config use HTPasswdPasswordIdentityProvider + command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml" + +- name: Grant permission for gfuser + command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin + +- name: Restart mater api + command: systemctl restart atomic-openshift-master-api.service diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml new file mode 100644 index 000000000..6a06d40a9 --- /dev/null +++ b/roles/openshift_grafana/tasks/main.yml @@ -0,0 +1,122 @@ +--- +- name: Create grafana namespace + oc_project: + state: present + name: grafana + +- name: Configure Grafana Permissions + include_tasks: tasks/gf-permissions.yml + when: gf_oauth | default(false) | bool == true + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml + register: + cl_file: /tmp/grafana-ocp.yaml + when: gf_oauth | default(false) | bool == false + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml + register: + cl_file: /tmp/grafana-ocp-oauth.yaml + when: gf_oauth | default(false) | bool == true + +- name: Process the grafana file + oc_process: + namespace: grafana + template_name: "{{ cl_file }}" + create: True + when: gf_oauth | default(false) | bool == true + +- name: Wait to grafana be running + command: oc rollout status deployment/grafana-ocp + +- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }} + oc_adm_policy_user: + user: grafana-ocp + resource_kind: cluster-role + resource_name: view + state: present + role_namespace: "{{ gf_prometheus_namespace }}" + +- name: Get grafana route + oc_obj: + kind: route + name: grafana + namespace: grafana + register: route + +- name: Get prometheus route + oc_obj: + kind: route + name: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: route + +- name: Get the prometheus SA + oc_serviceaccount_secret: + state: list + service_account: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: sa + +- name: Get the management SA bearer token + set_fact: + management_token: "{{ sa.results | oo_filter_sa_secrets }}" + +- name: Ensure the SA bearer token value is read + oc_secret: + state: list + name: "{{ management_token }}" + namespace: "{{ gf_prometheus_namespace }}" + no_log: True + register: sa_secret + +- name: Get the SA bearer token for prometheus + set_fact: + token: "{{ sa_secret.results.encoded.token }}" + +- name: Convert to json + var: + ds_json: "{{ gf_body_tmp }} | to_json }}" + +- name: Set protocol type + var: + protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}" + +- name: Add gf datasrouce + uri: + url: "{{ protocol }}://{{ route }}/api/datasources" + user: admin + password: admin + method: POST + body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex setup ds name + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes + +- name: Add new dashboard + uri: + url: "{{ protocol }}://{{ route }}/api/dashboards/db" + user: admin + password: admin + method: POST + body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex json tear down + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 7afb8f730..ac6ffbbad 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -56,7 +56,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # ordered list of registries (according to inventory vars) that docker will try for unscoped images regs = self.ensure_list("openshift_docker_additional_registries") # currently one of these registries is added whether the user wants it or not. - deployment_type = self.get_var("openshift_deployment_type") + deployment_type = self.get_var("openshift_deployment_type", default="") if deployment_type == "origin" and "docker.io" not in regs: regs.append("docker.io") elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs: diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 986a01f38..7f8c6ebdc 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -170,7 +170,7 @@ class Elasticsearch(LoggingCheck): """ errors = [] for pod_name in pods_by_name.keys(): - df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) + df_cmd = '-c elasticsearch exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json') lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line diff --git a/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml b/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml index 77f020357..fef945d51 100644 --- a/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml +++ b/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml @@ -1,4 +1,10 @@ --- +- name: Create temp directory for doing work in + command: mktemp -d /tmp/openshift-hosted-ansible-XXXXXX + register: mktempHosted + changed_when: False + check_mode: no + - name: Generate GlusterFS registry endpoints template: src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-endpoints.yml.j2" @@ -14,3 +20,10 @@ with_items: - "{{ mktempHosted.stdout }}/glusterfs-registry-service.yml" - "{{ mktempHosted.stdout }}/glusterfs-registry-endpoints.yml" + +- name: Delete temp directory + file: + name: "{{ mktempHosted.stdout }}" + state: absent + changed_when: False + check_mode: no diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index ebd2d747b..f82e55b98 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -87,7 +87,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_nodeselector if outer_item.0.nodeSelector | default(None) is none else outer_item.0.nodeSelector }}" openshift_logging_elasticsearch_storage_group: "{{ [openshift_logging_es_storage_group] if outer_item.0.storageGroups | default([]) | length == 0 else outer_item.0.storageGroups }}" _es_containers: "{{ outer_item.0.containers}}" @@ -114,7 +114,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }} loop_control: @@ -151,7 +151,7 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}" openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}" openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}" @@ -193,7 +193,7 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}" openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}" openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}" @@ -321,9 +321,14 @@ - name: Add Kibana route information to web console asset config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: + console_config_edits: + - key: clusterInfo#loggingPublicURL + value: "https://{{ openshift_logging_kibana_hostname }}" + # Continue to set the old deprecated property until the + # origin-web-console image is updated for the new name. + # This will be removed in a future pull. - key: loggingPublicURL value: "https://{{ openshift_logging_kibana_hostname }}" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml index 96079884e..4564f33dd 100644 --- a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml +++ b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml @@ -8,8 +8,10 @@ # TODO: If the sdn package isn't already installed this will install it, we # should fix that -- name: Upgrade master packages - package: name={{ master_pkgs | join(',') }} state=present +- name: Upgrade master packages - yum + command: + yum install -y {{ master_pkgs | join(' ') }} \ + {{ ' --exclude *' ~ openshift_service_type ~ '*3.9*' if openshift_release | version_compare('3.9','<') else '' }} vars: master_pkgs: - "{{ openshift_service_type }}{{ openshift_pkg_version | default('') }}" @@ -19,3 +21,19 @@ - "{{ openshift_service_type }}-clients{{ openshift_pkg_version | default('') }}" register: result until: result is succeeded + when: ansible_pkg_mgr == 'yum' + +- name: Upgrade master packages - dnf + dnf: + name: "{{ master_pkgs | join(',') }}" + state: present + vars: + master_pkgs: + - "{{ openshift_service_type }}{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-master{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-node{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-sdn-ovs{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-clients{{ openshift_pkg_version }}" + register: result + until: result is succeeded + when: ansible_pkg_mgr == 'dnf' diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 0866fe0d2..4a63d081e 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -74,9 +74,14 @@ - name: Add metrics route information to web console asset config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: + console_config_edits: + - key: clusterInfo#metricsPublicURL + value: "https://{{ openshift_metrics_hawkular_hostname}}/hawkular/metrics" + # Continue to set the old deprecated property until the + # origin-web-console image is updated for the new name. + # This will be removed in a future pull. - key: metricsPublicURL value: "https://{{ openshift_metrics_hawkular_hostname}}/hawkular/metrics" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index 8ccfb7192..057963c1a 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -16,7 +16,9 @@ apply -f {{ file_name }} -n {{namespace}} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: no - name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}} @@ -28,5 +30,7 @@ register: version_changed vars: init_version: "{{ (generation_init is defined) | ternary(generation_init.stdout, '0') }}" - failed_when: "'error' in version_changed.stderr" + failed_when: + - "'error' in version_changed.stderr" + - "version_changed.rc != 0" changed_when: version_changed.stdout | int > init_version | int diff --git a/roles/openshift_persistent_volumes/tasks/pv.yml b/roles/openshift_persistent_volumes/tasks/pv.yml index ef9ab7f5f..865269b7a 100644 --- a/roles/openshift_persistent_volumes/tasks/pv.yml +++ b/roles/openshift_persistent_volumes/tasks/pv.yml @@ -13,5 +13,5 @@ --config={{ mktemp.stdout }}/admin.kubeconfig register: pv_create_output when: persistent_volumes | length > 0 - failed_when: ('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) + failed_when: "('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) and pv_create_output.rc != 0" changed_when: ('created' in pv_create_output.stdout) diff --git a/roles/openshift_persistent_volumes/tasks/pvc.yml b/roles/openshift_persistent_volumes/tasks/pvc.yml index 2c5519192..6c12d128c 100644 --- a/roles/openshift_persistent_volumes/tasks/pvc.yml +++ b/roles/openshift_persistent_volumes/tasks/pvc.yml @@ -13,5 +13,5 @@ --config={{ mktemp.stdout }}/admin.kubeconfig register: pvc_create_output when: persistent_volume_claims | length > 0 - failed_when: ('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) + failed_when: "('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) and pvc_create_output.rc != 0" changed_when: ('created' in pvc_create_output.stdout) diff --git a/roles/openshift_provisioners/tasks/oc_apply.yaml b/roles/openshift_provisioners/tasks/oc_apply.yaml index a4ce53eae..239e1f1cc 100644 --- a/roles/openshift_provisioners/tasks/oc_apply.yaml +++ b/roles/openshift_provisioners/tasks/oc_apply.yaml @@ -15,7 +15,9 @@ apply -f {{ file_name }} -n {{ namespace }} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: no - name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}} @@ -36,7 +38,9 @@ delete -f {{ file_name }} -n {{ namespace }} register: generation_delete - failed_when: "'error' in generation_delete.stderr" + failed_when: + - "'error' in generation_delete.stderr" + - "generation_delete.rc != 0" changed_when: generation_delete.rc == 0 when: generation_apply.rc != 0 @@ -46,6 +50,8 @@ apply -f {{ file_name }} -n {{ namespace }} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: generation_apply.rc == 0 when: generation_apply.rc != 0 diff --git a/roles/openshift_version/tasks/check_available_rpms.yml b/roles/openshift_version/tasks/check_available_rpms.yml index bdbc63d27..fea0daf77 100644 --- a/roles/openshift_version/tasks/check_available_rpms.yml +++ b/roles/openshift_version/tasks/check_available_rpms.yml @@ -1,7 +1,7 @@ --- - name: Get available {{ openshift_service_type}} version repoquery: - name: "{{ openshift_service_type}}" + name: "{{ openshift_service_type}}{{ '-' ~ openshift_release ~ '*' if openshift_release is defined else '' }}" ignore_excluders: true register: rpm_results diff --git a/roles/openshift_version/tasks/first_master_containerized_version.yml b/roles/openshift_version/tasks/first_master_containerized_version.yml index e02a75eab..3ed1d2cfe 100644 --- a/roles/openshift_version/tasks/first_master_containerized_version.yml +++ b/roles/openshift_version/tasks/first_master_containerized_version.yml @@ -7,6 +7,7 @@ when: - openshift_image_tag is defined - openshift_version is not defined + - not (openshift_version_reinit | default(false)) - name: Set containerized version to configure if openshift_release specified set_fact: @@ -20,7 +21,7 @@ docker run --rm {{ openshift_cli_image }}:latest version register: cli_image_version when: - - openshift_version is not defined + - openshift_version is not defined or openshift_version_reinit | default(false) - not openshift_use_crio_only # Origin latest = pre-release version (i.e. v1.3.0-alpha.1-321-gb095e3a) @@ -34,7 +35,7 @@ - set_fact: openshift_version: "{{ cli_image_version.stdout_lines[0].split(' ')[1].split('-')[0][1:] }}" - when: openshift_version is not defined + when: openshift_version is not defined or openshift_version_reinit | default(false) # If we got an openshift_version like "3.2", lookup the latest 3.2 container version # and use that value instead. diff --git a/roles/openshift_version/tasks/first_master_rpm_version.yml b/roles/openshift_version/tasks/first_master_rpm_version.yml index 264baca65..5d92f90c6 100644 --- a/roles/openshift_version/tasks/first_master_rpm_version.yml +++ b/roles/openshift_version/tasks/first_master_rpm_version.yml @@ -6,6 +6,7 @@ when: - openshift_pkg_version is defined - openshift_version is not defined + - not (openshift_version_reinit | default(false)) # These tasks should only be run against masters and nodes - name: Set openshift_version for rpm installation @@ -13,4 +14,7 @@ - set_fact: openshift_version: "{{ rpm_results.results.versions.available_versions.0 }}" - when: openshift_version is not defined + when: openshift_version is not defined or ( openshift_version_reinit | default(false) ) +- set_fact: + openshift_pkg_version: "-{{ rpm_results.results.versions.available_versions.0 }}" + when: openshift_version_reinit | default(false) diff --git a/roles/openshift_version/tasks/masters_and_nodes.yml b/roles/openshift_version/tasks/masters_and_nodes.yml index fbeb22d8b..eddd5ff42 100644 --- a/roles/openshift_version/tasks/masters_and_nodes.yml +++ b/roles/openshift_version/tasks/masters_and_nodes.yml @@ -6,9 +6,12 @@ include_tasks: check_available_rpms.yml - name: Fail if rpm version and docker image version are different fail: - msg: "OCP rpm version {{ openshift_rpm_version }} is different from OCP image version {{ openshift_version }}" + msg: "OCP rpm version {{ rpm_results.results.versions.available_versions.0 }} is different from OCP image version {{ openshift_version }}" # Both versions have the same string representation - when: rpm_results.results.versions.available_versions.0 != openshift_version + when: + - openshift_version not in rpm_results.results.versions.available_versions.0 + - openshift_version_reinit | default(false) + # block when when: not openshift_is_atomic | bool diff --git a/roles/openshift_web_console/tasks/install.yml b/roles/openshift_web_console/tasks/install.yml index 12916961b..50e72657f 100644 --- a/roles/openshift_web_console/tasks/install.yml +++ b/roles/openshift_web_console/tasks/install.yml @@ -21,36 +21,68 @@ node_selector: - "" -- name: Make temp directory for asset config files +- name: Make temp directory for the web console config files command: mktemp -d /tmp/console-ansible-XXXXXX register: mktemp changed_when: False -- name: Copy asset config template to temp directory +- name: Copy the web console config template to temp directory copy: src: "{{ __console_files_location }}/{{ item }}" dest: "{{ mktemp.stdout }}/{{ item }}" with_items: - "{{ __console_template_file }}" + - "{{ __console_rbac_file }}" - "{{ __console_config_file }}" -- name: Update asset config properties +- name: Update the web console config properties yedit: src: "{{ mktemp.stdout }}/{{ __console_config_file }}" edits: - - key: logoutURL + - key: clusterInfo#consolePublicURL + # Must have a trailing slash + value: "{{ openshift.master.public_console_url }}/" + - key: clusterInfo#masterPublicURL + value: "{{ openshift.master.public_api_url }}" + - key: clusterInfo#logoutPublicURL value: "{{ openshift.master.logout_url | default('') }}" + - key: features#inactivityTimeoutMinutes + value: "{{ openshift_web_console_inactivity_timeout_minutes | default(0) }}" + + # TODO: The new extensions properties cannot be set until + # origin-web-console-server has been updated with the API changes since + # `extensions` in the old asset config was an array. + + # - key: extensions#scriptURLs + # value: "{{ openshift_web_console_extension_script_urls | default([]) }}" + # - key: extensions#stylesheetURLs + # value: "{{ openshift_web_console_extension_stylesheet_urls | default([]) }}" + # - key: extensions#properties + # value: "{{ openshift_web_console_extension_properties | default({}) }}" + + # DEPRECATED PROPERTIES + # These properties have been renamed and will be removed from the install + # in a future pull. Keep both the old and new properties for now so that + # the install is not broken while the origin-web-console image is updated. - key: publicURL # Must have a trailing slash value: "{{ openshift.master.public_console_url }}/" + - key: logoutURL + value: "{{ openshift.master.logout_url | default('') }}" - key: masterPublicURL value: "{{ openshift.master.public_api_url }}" + separator: '#' + state: present - slurp: src: "{{ mktemp.stdout }}/{{ __console_config_file }}" register: config -- name: Apply template file +- name: Reconcile with the web console RBAC file + shell: > + {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/{{ __console_rbac_file }}" | {{ openshift_client_binary }} auth reconcile -f - + +- name: Apply the web console template file shell: > {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/{{ __console_template_file }}" --param API_SERVER_CONFIG="{{ config['content'] | b64decode }}" diff --git a/roles/openshift_web_console/tasks/update_asset_config.yml b/roles/openshift_web_console/tasks/update_console_config.yml index 0992b32e1..e347c0193 100644 --- a/roles/openshift_web_console/tasks/update_asset_config.yml +++ b/roles/openshift_web_console/tasks/update_console_config.yml @@ -1,9 +1,9 @@ --- # This task updates asset config values in the webconsole-config config map in # the openshift-web-console namespace. The values to set are pased in the -# variable `asset_config_edits`, which is an array of objects with `key` and +# variable `console_config_edits`, which is an array of objects with `key` and # `value` properties in the same format as `yedit` module `edits`. Only -# properties passed are updated. +# properties passed are updated. The separator for nested properties is `#`. # # Note that this triggers a redeployment on the console and a brief downtime # since it uses a `Recreate` strategy. @@ -12,10 +12,10 @@ # # - include_role: # name: openshift_web_console -# tasks_from: update_asset_config.yml +# tasks_from: update_console_config.yml # vars: -# asset_config_edits: -# - key: loggingPublicURL +# console_config_edits: +# - key: clusterInfo#loggingPublicURL # value: "https://{{ openshift_logging_kibana_hostname }}" # when: openshift_web_console_install | default(true) | bool @@ -28,18 +28,20 @@ - name: Make temp directory command: mktemp -d /tmp/console-ansible-XXXXXX - register: mktemp + register: mktemp_console changed_when: False -- name: Copy asset config to temp file +- name: Copy web console config to temp file copy: content: "{{webconsole_config.results.results[0].data['webconsole-config.yaml']}}" - dest: "{{ mktemp.stdout }}/webconsole-config.yaml" + dest: "{{ mktemp_console.stdout }}/webconsole-config.yaml" -- name: Change asset config properties +- name: Change web console config properties yedit: - src: "{{ mktemp.stdout }}/webconsole-config.yaml" - edits: "{{asset_config_edits}}" + src: "{{ mktemp_console.stdout }}/webconsole-config.yaml" + edits: "{{console_config_edits}}" + separator: '#' + state: present - name: Update web console config map oc_configmap: @@ -47,14 +49,15 @@ name: webconsole-config state: present from_file: - webconsole-config.yaml: "{{ mktemp.stdout }}/webconsole-config.yaml" + webconsole-config.yaml: "{{ mktemp_console.stdout }}/webconsole-config.yaml" - name: Remove temp directory file: state: absent - name: "{{ mktemp.stdout }}" + name: "{{ mktemp_console.stdout }}" changed_when: False +# TODO: Only rollout if config has changed. # There's currently no command to trigger a rollout for a k8s deployment # without changing the pod spec. Add an annotation to force a rollout after # the config map has been edited. diff --git a/roles/openshift_web_console/vars/main.yml b/roles/openshift_web_console/vars/main.yml index 80bc56a17..e91048e38 100644 --- a/roles/openshift_web_console/vars/main.yml +++ b/roles/openshift_web_console/vars/main.yml @@ -2,4 +2,5 @@ __console_files_location: "../../../files/origin-components/" __console_template_file: "console-template.yaml" +__console_rbac_file: "console-rbac-template.yaml" __console_config_file: "console-config.yaml" |