diff options
Diffstat (limited to 'roles')
68 files changed, 6347 insertions, 253 deletions
diff --git a/roles/ansible_service_broker/tasks/install.yml b/roles/ansible_service_broker/tasks/install.yml index 1bc1b5e43..f869b5fae 100644 --- a/roles/ansible_service_broker/tasks/install.yml +++ b/roles/ansible_service_broker/tasks/install.yml @@ -375,6 +375,11 @@ secret: secretName: etcd-auth-secret +- name: set auth name and type facts if needed + set_fact: + ansible_service_broker_registry_auth_type: "secret" + ansible_service_broker_registry_auth_name: "asb-registry-auth" + when: ansible_service_broker_registry_user != "" and ansible_service_broker_registry_password != "" # TODO: saw a oc_configmap in the library, but didn't understand how to get it to do the following: - name: Create config map for ansible-service-broker @@ -402,6 +407,8 @@ org: {{ ansible_service_broker_registry_organization }} tag: {{ ansible_service_broker_registry_tag }} white_list: {{ ansible_service_broker_registry_whitelist | to_yaml }} + auth_type: "{{ ansible_service_broker_registry_auth_type | default("") }}" + auth_name: "{{ ansible_service_broker_registry_auth_name | default("") }}" - type: local_openshift name: localregistry namespaces: ['openshift'] @@ -447,6 +454,7 @@ data: "{{ ansible_service_broker_registry_user }}" - path: password data: "{{ ansible_service_broker_registry_password }}" + when: ansible_service_broker_registry_user != "" and ansible_service_broker_registry_password != "" - name: Create the Broker resource in the catalog oc_obj: diff --git a/roles/calico_master/tasks/main.yml b/roles/calico_master/tasks/main.yml index 05415a4d6..834ebba64 100644 --- a/roles/calico_master/tasks/main.yml +++ b/roles/calico_master/tasks/main.yml @@ -23,7 +23,7 @@ -f {{ mktemp.stdout }}/calico-policy-controller.yml --config={{ openshift.common.config_base }}/master/admin.kubeconfig register: calico_create_output - failed_when: ('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) + failed_when: "('already exists' not in calico_create_output.stderr) and ('created' not in calico_create_output.stdout) and calico_create_output.rc != 0" changed_when: ('created' in calico_create_output.stdout) - name: Calico Master | Delete temp directory diff --git a/roles/container_runtime/tasks/package_docker.yml b/roles/container_runtime/tasks/package_docker.yml index d6e7e7fed..ed9a2709b 100644 --- a/roles/container_runtime/tasks/package_docker.yml +++ b/roles/container_runtime/tasks/package_docker.yml @@ -1,6 +1,17 @@ --- - include_tasks: common/pre.yml +# In some cases, some services may be run as containers and docker may still +# be installed via rpm. +- include_tasks: common/atomic_proxy.yml + when: + - > + (openshift_use_system_containers | default(False)) | bool + or (openshift_use_etcd_system_container | default(False)) | bool + or (openshift_use_openvswitch_system_container | default(False)) | bool + or (openshift_use_node_system_container | default(False)) | bool + or (openshift_use_master_system_container | default(False)) | bool + - name: Get current installed Docker version command: "{{ repoquery_installed }} --qf '%{version}' docker" when: not openshift_is_atomic | bool diff --git a/roles/etcd/tasks/auxiliary/drop_etcdctl.yml b/roles/etcd/tasks/auxiliary/drop_etcdctl.yml index 881a8c270..cab835e20 100644 --- a/roles/etcd/tasks/auxiliary/drop_etcdctl.yml +++ b/roles/etcd/tasks/auxiliary/drop_etcdctl.yml @@ -1,7 +1,7 @@ --- - name: Install etcd for etcdctl package: name=etcd{{ '-' + etcd_version if etcd_version is defined else '' }} state=present - when: not openshift_is_atomic | bool + when: not openshift_is_containerized | bool register: result until: result is succeeded diff --git a/roles/lib_utils/filter_plugins/oo_filters.py b/roles/lib_utils/filter_plugins/oo_filters.py index fc14b5633..9f73510c4 100644 --- a/roles/lib_utils/filter_plugins/oo_filters.py +++ b/roles/lib_utils/filter_plugins/oo_filters.py @@ -21,13 +21,10 @@ import yaml from ansible import errors from ansible.parsing.yaml.dumper import AnsibleDumper -# ansible.compat.six goes away with Ansible 2.4 -try: - from ansible.compat.six import string_types, u - from ansible.compat.six.moves.urllib.parse import urlparse -except ImportError: - from ansible.module_utils.six import string_types, u - from ansible.module_utils.six.moves.urllib.parse import urlparse +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six import string_types, u +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six.moves.urllib.parse import urlparse HAS_OPENSSL = False try: diff --git a/roles/lib_utils/filter_plugins/openshift_master.py b/roles/lib_utils/filter_plugins/openshift_master.py index ff15f693b..e67b19c28 100644 --- a/roles/lib_utils/filter_plugins/openshift_master.py +++ b/roles/lib_utils/filter_plugins/openshift_master.py @@ -10,11 +10,7 @@ from ansible import errors from ansible.parsing.yaml.dumper import AnsibleDumper from ansible.plugins.filter.core import to_bool as ansible_bool -# ansible.compat.six goes away with Ansible 2.4 -try: - from ansible.compat.six import string_types, u -except ImportError: - from ansible.module_utils.six import string_types, u +from ansible.module_utils.six import string_types, u import yaml diff --git a/roles/openshift_aws/tasks/accept_nodes.yml b/roles/openshift_aws/tasks/accept_nodes.yml index c2a2cea30..db30fe5c9 100644 --- a/roles/openshift_aws/tasks/accept_nodes.yml +++ b/roles/openshift_aws/tasks/accept_nodes.yml @@ -1,4 +1,6 @@ --- +- include_tasks: setup_master_group.yml + - name: fetch masters ec2_instance_facts: region: "{{ openshift_aws_region | default('us-east-1') }}" @@ -36,4 +38,4 @@ nodes: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}" timeout: 60 register: nodeout - delegate_to: "{{ mastersout.instances[0].public_ip_address }}" + delegate_to: "{{ groups.masters.0 }}" diff --git a/roles/openshift_aws/tasks/provision_nodes.yml b/roles/openshift_aws/tasks/provision_nodes.yml index d82f18574..9105b5b4c 100644 --- a/roles/openshift_aws/tasks/provision_nodes.yml +++ b/roles/openshift_aws/tasks/provision_nodes.yml @@ -2,25 +2,12 @@ # Get bootstrap config token # bootstrap should be created on first master # need to fetch it and shove it into cloud data -- name: fetch master instances - ec2_instance_facts: - region: "{{ openshift_aws_region }}" - filters: - "tag:clusterid": "{{ openshift_aws_clusterid }}" - "tag:host-type": master - instance-state-name: running - register: instancesout - retries: 20 - delay: 3 - until: - - "'instances' in instancesout" - - instancesout.instances|length > 0 +- include_tasks: setup_master_group.yml - name: slurp down the bootstrap.kubeconfig slurp: src: /etc/origin/master/bootstrap.kubeconfig - delegate_to: "{{ instancesout.instances[0].public_ip_address }}" - remote_user: root + delegate_to: "{{ groups.masters.0 }}" register: bootstrap - name: set_fact for kubeconfig token diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml index b94cd9fba..9c8534c74 100644 --- a/roles/openshift_ca/tasks/main.yml +++ b/roles/openshift_ca/tasks/main.yml @@ -19,7 +19,8 @@ - name: Reload generated facts openshift_facts: - when: hostvars[openshift_ca_host].install_result is changed + when: + - hostvars[openshift_ca_host].install_result | default({'changed':false}) is changed - name: Create openshift_ca_config_dir if it does not exist file: diff --git a/roles/openshift_examples/meta/main.yml b/roles/openshift_examples/meta/main.yml index 1a34c85fc..9f46a4683 100644 --- a/roles/openshift_examples/meta/main.yml +++ b/roles/openshift_examples/meta/main.yml @@ -13,3 +13,4 @@ galaxy_info: - cloud dependencies: - role: lib_utils +- role: openshift_facts diff --git a/roles/openshift_excluder/tasks/verify_excluder.yml b/roles/openshift_excluder/tasks/verify_excluder.yml index 4f5277fa2..22a3fcd3b 100644 --- a/roles/openshift_excluder/tasks/verify_excluder.yml +++ b/roles/openshift_excluder/tasks/verify_excluder.yml @@ -3,7 +3,7 @@ # - excluder - name: Get available excluder version repoquery: - name: "{{ excluder }}" + name: "{{ excluder }}{{ '-' ~ r_openshift_excluder_upgrade_target.split('.')[0:2] | join('.') ~ '*' if r_openshift_excluder_upgrade_target is defined else '' }}" ignore_excluders: true register: repoquery_out diff --git a/roles/openshift_expand_partition/tasks/main.yml b/roles/openshift_expand_partition/tasks/main.yml index 5ae863871..b38ebdfb4 100644 --- a/roles/openshift_expand_partition/tasks/main.yml +++ b/roles/openshift_expand_partition/tasks/main.yml @@ -8,7 +8,7 @@ - name: Determine if growpart is installed command: "rpm -q cloud-utils-growpart" register: has_growpart - failed_when: has_growpart.cr != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout + failed_when: has_growpart.rc != 0 and 'package cloud-utils-growpart is not installed' not in has_growpart.stdout changed_when: false when: openshift_is_containerized | bool diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index d7c358a2f..26f0525e9 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -1465,6 +1465,11 @@ class OpenShiftFacts(object): if metadata: metadata['project']['attributes'].pop('sshKeys', None) metadata['instance'].pop('serviceAccounts', None) + elif bios_vendor == 'Amazon EC2': + # Adds support for Amazon EC2 C5 instance types + provider = 'aws' + metadata_url = 'http://169.254.169.254/latest/meta-data/' + metadata = get_provider_metadata(metadata_url) elif virt_type == 'xen' and virt_role == 'guest' and re.match(r'.*\.amazon$', product_version): provider = 'aws' metadata_url = 'http://169.254.169.254/latest/meta-data/' diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml new file mode 100644 index 000000000..7fd7a085d --- /dev/null +++ b/roles/openshift_grafana/defaults/main.yml @@ -0,0 +1,12 @@ +--- +gf_body_tmp: + name: grafana_name + type: prometheus + typeLogoUrl: '' + access: proxy + url: prometheus_url + basicAuth: false + withCredentials: false + jsonData: + tlsSkipVerify: true + token: satoken diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml new file mode 100644 index 000000000..82fa89004 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml @@ -0,0 +1,661 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: The location of the proxy image + name: IMAGE_GF + value: mrsiano/grafana-ocp:latest +- description: The location of the proxy image + name: IMAGE_PROXY + value: openshift/oauth-proxy:v1.0.0 +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" +objects: +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}' +- apiVersion: authorization.openshift.io/v1 + kind: ClusterRoleBinding + metadata: + name: gf-cluster-reader + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: grafana-ocp + namespace: "${NAMESPACE}" +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp + tls: + termination: Reencrypt +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: gf-tls + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + ports: + - name: grafana-ocp + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app: grafana-ocp +- apiVersion: v1 + kind: Secret + metadata: + name: gf-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" +# Deploy Prometheus behind an oauth proxy +- apiVersion: extensions/v1beta1 + kind: Deployment + metadata: + labels: + app: grafana-ocp + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + replicas: 1 + selector: + matchLabels: + app: grafana-ocp + template: + metadata: + labels: + app: grafana-ocp + name: grafana-ocp-app + spec: + serviceAccountName: grafana-ocp + containers: + - name: oauth-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp + - -upstream=http://localhost:3000 + - -provider=openshift +# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}' + - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards + volumeMounts: + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + + - name: grafana-ocp + image: ${IMAGE_GF} + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - mountPath: "/root/go/src/github.com/grafana/grafana/data" + name: gf-data + - mountPath: "/root/go/src/github.com/grafana/grafana/conf" + name: gfconfig + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + command: + - "./bin/grafana-server" + + volumes: + - name: gfconfig + configMap: + name: gf-config + - name: secrets + secret: + secretName: gf-proxy + - name: gf-tls + secret: + secretName: gf-tls + - emptyDir: {} + name: gf-data +- apiVersion: v1 + kind: ConfigMap + metadata: + name: gf-config + namespace: "${NAMESPACE}" + data: + defaults.ini: |- + ##################### Grafana Configuration Defaults ##################### + # + # Do not modify this file in grafana installs + # + + # possible values : production, development + app_mode = production + + # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty + instance_name = ${HOSTNAME} + + #################################### Paths ############################### + [paths] + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + # + data = data + # + # Directory where grafana can store logs + # + logs = data/log + # + # Directory where grafana will automatically scan and look for plugins + # + plugins = data/plugins + + #################################### Server ############################## + [server] + # Protocol (http, https, socket) + protocol = http + + # The ip address to bind to, empty will bind to all interfaces + http_addr = + + # The http port to use + http_port = 3000 + + # The public facing domain name used to access grafana from a browser + domain = localhost + + # Redirect to correct domain if host header does not match domain + # Prevents DNS rebinding attacks + enforce_domain = false + + # The full public facing url + root_url = %(protocol)s://%(domain)s:%(http_port)s/ + + # Log web requests + router_logging = false + + # the path relative working path + static_root_path = public + + # enable gzip + enable_gzip = false + + # https certs & key file + cert_file = /etc/tls/private/tls.crt + cert_key = /etc/tls/private/tls.key + + # Unix socket path + socket = /tmp/grafana.sock + + #################################### Database ############################ + [database] + # You can configure the database connection by specifying type, host, name, user and password + # as separate properties or as on string using the url property. + + # Either "mysql", "postgres" or "sqlite3", it's your choice + type = sqlite3 + host = 127.0.0.1:3306 + name = grafana + user = root + # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" + password = + # Use either URL or the previous fields to configure the database + # Example: mysql://user:secret@host:port/database + url = + + # Max idle conn setting default is 2 + max_idle_conn = 2 + + # Max conn setting default is 0 (mean not set) + max_open_conn = + + # For "postgres", use either "disable", "require" or "verify-full" + # For "mysql", use either "true", "false", or "skip-verify". + ssl_mode = disable + + ca_cert_path = + client_key_path = + client_cert_path = + server_cert_name = + + # For "sqlite3" only, path relative to data_path setting + path = grafana.db + + #################################### Session ############################# + [session] + # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file" + provider = file + + # Provider config options + # memory: not have any config yet + # file: session dir path, is relative to grafana data_path + # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` + # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable + # mysql: go-sql-driver/mysql dsn config string, examples: + # `user:password@tcp(127.0.0.1:3306)/database_name` + # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name` + # memcache: 127.0.0.1:11211 + + + provider_config = sessions + + # Session cookie name + cookie_name = grafana_sess + + # If you use session in https only, default is false + cookie_secure = false + + # Session life time, default is 86400 + session_life_time = 86400 + gc_interval_time = 86400 + + #################################### Data proxy ########################### + [dataproxy] + + # This enables data proxy logging, default is false + logging = false + + #################################### Analytics ########################### + [analytics] + # Server reporting, sends usage counters to stats.grafana.org every 24 hours. + # No ip addresses are being tracked, only simple counters to track + # running instances, dashboard and error counts. It is very helpful to us. + # Change this option to false to disable reporting. + reporting_enabled = true + + # Set to false to disable all checks to https://grafana.com + # for new versions (grafana itself and plugins), check is used + # in some UI views to notify that grafana or plugin update exists + # This option does not cause any auto updates, nor send any information + # only a GET request to https://grafana.com to get latest versions + check_for_updates = true + + # Google Analytics universal tracking code, only enabled if you specify an id here + google_analytics_ua_id = + + # Google Tag Manager ID, only enabled if you specify an id here + google_tag_manager_id = + + #################################### Security ############################ + [security] + # default admin user, created on startup + admin_user = admin + + # default admin password, can be changed before first start of grafana, or in profile settings + admin_password = admin + + # used for signing + secret_key = SW2YcwTIb9zpOOhoPsMm + + # Auto-login remember days + login_remember_days = 7 + cookie_username = grafana_user + cookie_remember_name = grafana_remember + + # disable gravatar profile images + disable_gravatar = false + + # data source proxy whitelist (ip_or_domain:port separated by spaces) + data_source_proxy_whitelist = + + [snapshots] + # snapshot sharing options + external_enabled = true + external_snapshot_url = https://snapshots-origin.raintank.io + external_snapshot_name = Publish to snapshot.raintank.io + + # remove expired snapshot + snapshot_remove_expired = true + + # remove snapshots after 90 days + snapshot_TTL_days = 90 + + #################################### Users #################################### + [users] + # disable user signup / registration + allow_sign_up = true + + # Allow non admin users to create organizations + allow_org_create = true + + # Set to true to automatically assign new users to the default organization (id 1) + auto_assign_org = true + + # Default role new users will be automatically assigned (if auto_assign_org above is set to true) + auto_assign_org_role = Admin + + # Require email validation before sign up completes + verify_email_enabled = false + + # Background text for the user field on the login page + login_hint = email or username + + # Default UI theme ("dark" or "light") + default_theme = dark + + # External user management + external_manage_link_url = + external_manage_link_name = + external_manage_info = + + [auth] + # Set to true to disable (hide) the login form, useful if you use OAuth + disable_login_form = true + + # Set to true to disable the signout link in the side menu. useful if you use auth.proxy + disable_signout_menu = true + + #################################### Anonymous Auth ###################### + [auth.anonymous] + # enable anonymous access + enabled = true + + # specify organization name that should be used for unauthenticated users + org_name = Main Org. + + # specify role for unauthenticated users + org_role = Admin + + #################################### Github Auth ######################### + [auth.github] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = https://github.com/login/oauth/authorize + token_url = https://github.com/login/oauth/access_token + api_url = https://api.github.com/user + team_ids = + allowed_organizations = + + #################################### Google Auth ######################### + [auth.google] + enabled = false + allow_sign_up = true + client_id = some_client_id + client_secret = some_client_secret + scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email + auth_url = https://accounts.google.com/o/oauth2/auth + token_url = https://accounts.google.com/o/oauth2/token + api_url = https://www.googleapis.com/oauth2/v1/userinfo + allowed_domains = + hosted_domain = + + #################################### Grafana.com Auth #################### + # legacy key names (so they work in env variables) + [auth.grafananet] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + [auth.grafana_com] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + #################################### Generic OAuth ####################### + [auth.generic_oauth] + name = OAuth + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = + token_url = + api_url = + team_ids = + allowed_organizations = + + #################################### Basic Auth ########################## + [auth.basic] + enabled = false + + #################################### Auth Proxy ########################## + [auth.proxy] + enabled = true + header_name = X-WEBAUTH-USER + header_property = username + auto_sign_up = true + ldap_sync_ttl = 60 + whitelist = + + #################################### Auth LDAP ########################### + [auth.ldap] + enabled = false + config_file = /etc/grafana/ldap.toml + allow_sign_up = true + + #################################### SMTP / Emailing ##################### + [smtp] + enabled = false + host = localhost:25 + user = + # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" + password = + cert_file = + key_file = + skip_verify = false + from_address = admin@grafana.localhost + from_name = Grafana + ehlo_identity = + + [emails] + welcome_email_on_sign_up = false + templates_pattern = emails/*.html + + #################################### Logging ########################## + [log] + # Either "console", "file", "syslog". Default is console and file + # Use space to separate multiple modes, e.g. "console file" + mode = console file + + # Either "debug", "info", "warn", "error", "critical", default is "info" + level = error + + # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug + filters = + + # For "console" mode only + [log.console] + level = + + # log line format, valid options are text, console and json + format = console + + # For "file" mode only + [log.file] + level = + + # log line format, valid options are text, console and json + format = text + + # This enables automated log rotate(switch of following options), default is true + log_rotate = true + + # Max line number of single file, default is 1000000 + max_lines = 1000000 + + # Max size shift of single file, default is 28 means 1 << 28, 256MB + max_size_shift = 28 + + # Segment log daily, default is true + daily_rotate = true + + # Expired days of log file(delete after max days), default is 7 + max_days = 7 + + [log.syslog] + level = + + # log line format, valid options are text, console and json + format = text + + # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. + network = + address = + + # Syslog facility. user, daemon and local0 through local7 are valid. + facility = + + # Syslog tag. By default, the process' argv[0] is used. + tag = + + + #################################### AMQP Event Publisher ################ + [event_publisher] + enabled = false + rabbitmq_url = amqp://localhost/ + exchange = grafana_events + + #################################### Dashboard JSON files ################ + [dashboards.json] + enabled = false + path = /var/lib/grafana/dashboards + + #################################### Usage Quotas ######################## + [quota] + enabled = false + + #### set quotas to -1 to make unlimited. #### + # limit number of users per Org. + org_user = 10 + + # limit number of dashboards per Org. + org_dashboard = 100 + + # limit number of data_sources per Org. + org_data_source = 10 + + # limit number of api_keys per Org. + org_api_key = 10 + + # limit number of orgs a user can create. + user_org = 10 + + # Global limit of users. + global_user = -1 + + # global limit of orgs. + global_org = -1 + + # global limit of dashboards + global_dashboard = -1 + + # global limit of api_keys + global_api_key = -1 + + # global limit on number of logged in users. + global_session = -1 + + #################################### Alerting ############################ + [alerting] + # Disable alerting engine & UI features + enabled = true + # Makes it possible to turn off alert rule execution but alerting UI is visible + execute_alerts = true + + #################################### Internal Grafana Metrics ############ + # Metrics available at HTTP API Url /api/metrics + [metrics] + enabled = true + interval_seconds = 10 + + # Send internal Grafana metrics to graphite + [metrics.graphite] + # Enable by setting the address setting (ex localhost:2003) + address = + prefix = prod.grafana.%(instance_name)s. + + [grafana_net] + url = https://grafana.com + + [grafana_com] + url = https://grafana.com + + #################################### Distributed tracing ############ + [tracing.jaeger] + # jaeger destination (ex localhost:6831) + address = + # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) + always_included_tag = + # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote + sampler_type = const + # jaeger samplerconfig param + # for "const" sampler, 0 or 1 for always false/true respectively + # for "probabilistic" sampler, a probability between 0 and 1 + # for "rateLimiting" sampler, the number of spans per second + # for "remote" sampler, param is the same as for "probabilistic" + # and indicates the initial sampling rate before the actual one + # is received from the mothership + sampler_param = 1 + + #################################### External Image Storage ############## + [external_image_storage] + # You can choose between (s3, webdav, gcs) + provider = + + [external_image_storage.s3] + bucket_url = + bucket = + region = + path = + access_key = + secret_key = + + [external_image_storage.webdav] + url = + username = + password = + public_url = + + [external_image_storage.gcs] + key_file = + bucket = diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml new file mode 100644 index 000000000..bc7b4b286 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp.yml @@ -0,0 +1,76 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + ports: + - port: 8082 + protocol: TCP + targetPort: grafana-http +- apiVersion: v1 + kind: ReplicationController + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + replicas: 1 + template: + version: v1 + metadata: + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + volumes: + - name: data + emptyDir: {} + containers: + - image: "mrsiano/grafana-ocp:latest" + name: grafana-ocp + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - name: data + mountPath: "/root/go/src/github.com/grafana/grafana/data" + command: + - "./bin/grafana-server" diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json new file mode 100644 index 000000000..f59ca997f --- /dev/null +++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json @@ -0,0 +1,5138 @@ +{ + "dashboard": { + "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", + "editable": true, + "gnetId": 315, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "height": "200px", + "id": 32, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Network I/O pressure", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster memory usage", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster CPU usage ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Total usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "overall cpu usage", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Usage", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Cores Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 411, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes Memory usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ systemd_service_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "B", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "D", + "step": 1 + }, + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "C", + "step": 1 + }, + { + "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "E", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "F", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 277, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ pod_name }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ pod_name }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "instant": true, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ id }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ id }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total) by (phase,reason)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ phase }} | {{ reason }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_build_total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 54, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 55, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Failed\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds, regardless of the failure reason.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 57, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 58, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Complete\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of successfully completed builds.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 0, + "id": 59, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\"} offset 5m", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Builds", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_setup_latency_sum)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_setup_latency_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_teardown_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top 10 pods doing the most receive network traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_pod_ips", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }} | {{ role }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_ips", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift SDN", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_pleg_relist", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 51, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 52, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_timeout", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_errors", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Kubelet", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(scrape_samples_scraped[2m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ kubernetes_name }} | {{ instance }} ", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "scrape_samples_scraped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU per instance of Prometheus container.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ verb }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ pod }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of non-mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "endpoint_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " quantile {{ quantile }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "endpoint_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "API Server", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_disk_wal_fsync_duration_seconds_count", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "etcd_disk_wal_fsync_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "etcd", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 62, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(changes(container_start_time_seconds[10m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "The number of containers that start or restart over the last ten minutes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Changes in your cluster", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of cores in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of consumed cores.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per node in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumption per system service or container on the infrastructure nodes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per namespace on the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster CPU in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster memory in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate CPU usage (seconds total) of etcd+docker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "System and container CPU", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 71, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "volumes_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "volumes_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ request }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "cloudprovider_aws_api_request_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "storage_operation_duration_seconds_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Volumes", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes", + "openshift" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PR}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Node", + "options": [], + "query": "label_values(kubernetes_io_hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "1s", + "2m", + "20s", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "openshift cluster monitoring", + "version": 6 + } +} diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml new file mode 100644 index 000000000..8dea6f197 --- /dev/null +++ b/roles/openshift_grafana/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Eldad Marciano + description: Setup grafana pod + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.3 + platforms: + - name: EL + versions: + - 7 + categories: + - metrics diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml new file mode 100644 index 000000000..9d3c741ee --- /dev/null +++ b/roles/openshift_grafana/tasks/gf-permissions.yml @@ -0,0 +1,12 @@ +--- +- name: Create gf user on htpasswd + command: htpasswd -c /etc/origin/master/htpasswd gfadmin + +- name: Make sure master config use HTPasswdPasswordIdentityProvider + command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml" + +- name: Grant permission for gfuser + command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin + +- name: Restart mater api + command: systemctl restart atomic-openshift-master-api.service diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml new file mode 100644 index 000000000..6a06d40a9 --- /dev/null +++ b/roles/openshift_grafana/tasks/main.yml @@ -0,0 +1,122 @@ +--- +- name: Create grafana namespace + oc_project: + state: present + name: grafana + +- name: Configure Grafana Permissions + include_tasks: tasks/gf-permissions.yml + when: gf_oauth | default(false) | bool == true + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml + register: + cl_file: /tmp/grafana-ocp.yaml + when: gf_oauth | default(false) | bool == false + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml + register: + cl_file: /tmp/grafana-ocp-oauth.yaml + when: gf_oauth | default(false) | bool == true + +- name: Process the grafana file + oc_process: + namespace: grafana + template_name: "{{ cl_file }}" + create: True + when: gf_oauth | default(false) | bool == true + +- name: Wait to grafana be running + command: oc rollout status deployment/grafana-ocp + +- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }} + oc_adm_policy_user: + user: grafana-ocp + resource_kind: cluster-role + resource_name: view + state: present + role_namespace: "{{ gf_prometheus_namespace }}" + +- name: Get grafana route + oc_obj: + kind: route + name: grafana + namespace: grafana + register: route + +- name: Get prometheus route + oc_obj: + kind: route + name: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: route + +- name: Get the prometheus SA + oc_serviceaccount_secret: + state: list + service_account: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: sa + +- name: Get the management SA bearer token + set_fact: + management_token: "{{ sa.results | oo_filter_sa_secrets }}" + +- name: Ensure the SA bearer token value is read + oc_secret: + state: list + name: "{{ management_token }}" + namespace: "{{ gf_prometheus_namespace }}" + no_log: True + register: sa_secret + +- name: Get the SA bearer token for prometheus + set_fact: + token: "{{ sa_secret.results.encoded.token }}" + +- name: Convert to json + var: + ds_json: "{{ gf_body_tmp }} | to_json }}" + +- name: Set protocol type + var: + protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}" + +- name: Add gf datasrouce + uri: + url: "{{ protocol }}://{{ route }}/api/datasources" + user: admin + password: admin + method: POST + body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex setup ds name + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes + +- name: Add new dashboard + uri: + url: "{{ protocol }}://{{ route }}/api/dashboards/db" + user: admin + password: admin + method: POST + body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex json tear down + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 83e551b5d..b9c41d1b4 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -5,6 +5,7 @@ Health checks for OpenShift clusters. import json import operator import os +import re import time import collections @@ -309,28 +310,38 @@ class OpenShiftCheck(object): name_list = name_list.split(',') return [name.strip() for name in name_list if name.strip()] - @staticmethod - def get_major_minor_version(openshift_image_tag): + def get_major_minor_version(self, openshift_image_tag=None): """Parse and return the deployed version of OpenShift as a tuple.""" - if openshift_image_tag and openshift_image_tag[0] == 'v': - openshift_image_tag = openshift_image_tag[1:] - # map major release versions across releases - # to a common major version - openshift_major_release_version = { - "1": "3", - } + version = openshift_image_tag or self.get_var("openshift_image_tag") + components = [int(component) for component in re.findall(r'\d+', version)] - components = openshift_image_tag.split(".") - if not components or len(components) < 2: + if len(components) < 2: msg = "An invalid version of OpenShift was found for this host: {}" - raise OpenShiftCheckException(msg.format(openshift_image_tag)) + raise OpenShiftCheckException(msg.format(version)) + + # map major release version across releases to OCP major version + components[0] = {1: 3}.get(components[0], components[0]) + + return tuple(int(x) for x in components[:2]) + + def get_required_version(self, name, version_map): + """Return the correct required version(s) for the current (or nearest) OpenShift version.""" + openshift_version = self.get_major_minor_version() + + earliest = min(version_map) + latest = max(version_map) + if openshift_version < earliest: + return version_map[earliest] + if openshift_version > latest: + return version_map[latest] - if components[0] in openshift_major_release_version: - components[0] = openshift_major_release_version[components[0]] + required_version = version_map.get(openshift_version) + if not required_version: + msg = "There is no recommended version of {} for the current version of OpenShift ({})" + raise OpenShiftCheckException(msg.format(name, ".".join(str(comp) for comp in openshift_version))) - components = tuple(int(x) for x in components[:2]) - return components + return required_version def find_ansible_mount(self, path): """Return the mount point for path from ansible_mounts.""" diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 7afb8f730..ac6ffbbad 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -56,7 +56,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # ordered list of registries (according to inventory vars) that docker will try for unscoped images regs = self.ensure_list("openshift_docker_additional_registries") # currently one of these registries is added whether the user wants it or not. - deployment_type = self.get_var("openshift_deployment_type") + deployment_type = self.get_var("openshift_deployment_type", default="") if deployment_type == "origin" and "docker.io" not in regs: regs.append("docker.io") elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs: diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 986a01f38..7f8c6ebdc 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -170,7 +170,7 @@ class Elasticsearch(LoggingCheck): """ errors = [] for pod_name in pods_by_name.keys(): - df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) + df_cmd = '-c elasticsearch exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json') lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index 3b1cf8baa..16ec3a7f6 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -5,12 +5,11 @@ Module for performing checks on a Kibana logging deployment import json import ssl -try: - from urllib2 import HTTPError, URLError - import urllib2 -except ImportError: - from urllib.error import HTTPError, URLError - import urllib.request as urllib2 +# pylint can't find the package when its installed in virtualenv +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six.moves.urllib import request +# pylint: disable=import-error,no-name-in-module +from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException @@ -65,7 +64,7 @@ class Kibana(LoggingCheck): # Verify that the url is returning a valid response try: # We only care if the url connects and responds - return_code = urllib2.urlopen(url, context=ctx).getcode() + return_code = request.urlopen(url, context=ctx).getcode() except HTTPError as httperr: return httperr.reason except URLError as urlerr: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 0cad19842..58a2692bd 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -3,7 +3,7 @@ Ansible module for determining if an installed version of Open vSwitch is incomp currently installed version of OpenShift. """ -from openshift_checks import OpenShiftCheck, OpenShiftCheckException +from openshift_checks import OpenShiftCheck from openshift_checks.mixins import NotContainerizedMixin @@ -16,10 +16,12 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): tags = ["health"] openshift_to_ovs_version = { - "3.7": ["2.6", "2.7", "2.8"], - "3.6": ["2.6", "2.7", "2.8"], - "3.5": ["2.6", "2.7"], - "3.4": "2.4", + (3, 4): "2.4", + (3, 5): ["2.6", "2.7"], + (3, 6): ["2.6", "2.7", "2.8"], + (3, 7): ["2.6", "2.7", "2.8"], + (3, 8): ["2.6", "2.7", "2.8"], + (3, 9): ["2.6", "2.7", "2.8"], } def is_active(self): @@ -40,16 +42,5 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): return self.execute_module("rpm_version", args) def get_required_ovs_version(self): - """Return the correct Open vSwitch version for the current OpenShift version""" - openshift_version_tuple = self.get_major_minor_version(self.get_var("openshift_image_tag")) - - if openshift_version_tuple < (3, 5): - return self.openshift_to_ovs_version["3.4"] - - openshift_version = ".".join(str(x) for x in openshift_version_tuple) - ovs_version = self.openshift_to_ovs_version.get(openshift_version) - if ovs_version: - return self.openshift_to_ovs_version[openshift_version] - - msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" - raise OpenShiftCheckException(msg.format(openshift_version)) + """Return the correct Open vSwitch version(s) for the current OpenShift version.""" + return self.get_required_version("Open vSwitch", self.openshift_to_ovs_version) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index f3a628e28..28aee8b35 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -1,8 +1,6 @@ """Check that available RPM packages match the required versions.""" -import re - -from openshift_checks import OpenShiftCheck, OpenShiftCheckException +from openshift_checks import OpenShiftCheck from openshift_checks.mixins import NotContainerizedMixin @@ -18,6 +16,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): (3, 5): ["2.6", "2.7"], (3, 6): ["2.6", "2.7", "2.8"], (3, 7): ["2.6", "2.7", "2.8"], + (3, 8): ["2.6", "2.7", "2.8"], + (3, 9): ["2.6", "2.7", "2.8"], } openshift_to_docker_version = { @@ -27,11 +27,9 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): (3, 4): "1.12", (3, 5): "1.12", (3, 6): "1.12", - } - - # map major OpenShift release versions across releases to a common major version - map_major_release_version = { - 1: 3, + (3, 7): "1.12", + (3, 8): "1.12", + (3, 9): ["1.12", "1.13"], } def is_active(self): @@ -83,48 +81,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): def get_required_ovs_version(self): """Return the correct Open vSwitch version(s) for the current OpenShift version.""" - openshift_version = self.get_openshift_version_tuple() - - earliest = min(self.openshift_to_ovs_version) - latest = max(self.openshift_to_ovs_version) - if openshift_version < earliest: - return self.openshift_to_ovs_version[earliest] - if openshift_version > latest: - return self.openshift_to_ovs_version[latest] - - ovs_version = self.openshift_to_ovs_version.get(openshift_version) - if not ovs_version: - msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" - raise OpenShiftCheckException(msg.format(".".join(str(comp) for comp in openshift_version))) - - return ovs_version + return self.get_required_version("Open vSwitch", self.openshift_to_ovs_version) def get_required_docker_version(self): """Return the correct Docker version(s) for the current OpenShift version.""" - openshift_version = self.get_openshift_version_tuple() - - earliest = min(self.openshift_to_docker_version) - latest = max(self.openshift_to_docker_version) - if openshift_version < earliest: - return self.openshift_to_docker_version[earliest] - if openshift_version > latest: - return self.openshift_to_docker_version[latest] - - docker_version = self.openshift_to_docker_version.get(openshift_version) - if not docker_version: - msg = "There is no recommended version of Docker for the current version of OpenShift: {}" - raise OpenShiftCheckException(msg.format(".".join(str(comp) for comp in openshift_version))) - - return docker_version - - def get_openshift_version_tuple(self): - """Return received image tag as a normalized (X, Y) minor version tuple.""" - version = self.get_var("openshift_image_tag") - comps = [int(component) for component in re.findall(r'\d+', version)] - - if len(comps) < 2: - msg = "An invalid version of OpenShift was found for this host: {}" - raise OpenShiftCheckException(msg.format(version)) - - comps[0] = self.map_major_release_version.get(comps[0], comps[0]) - return tuple(comps[0:2]) + return self.get_required_version("Docker", self.openshift_to_docker_version) diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py index 04a5e89c4..750d4b9e9 100644 --- a/roles/openshift_health_checker/test/kibana_test.py +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -1,12 +1,10 @@ import pytest import json -try: - import urllib2 - from urllib2 import HTTPError, URLError -except ImportError: - from urllib.error import HTTPError, URLError - import urllib.request as urllib2 +# pylint can't find the package when its installed in virtualenv +from ansible.module_utils.six.moves.urllib import request # pylint: disable=import-error +# pylint: disable=import-error +from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError from openshift_checks.logging.kibana import Kibana, OpenShiftCheckException @@ -202,7 +200,7 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch): if type(lib_result) is int: return _http_return(lib_result) raise lib_result - monkeypatch.setattr(urllib2, 'urlopen', urlopen) + monkeypatch.setattr(request, 'urlopen', urlopen) check = Kibana() check._get_kibana_url = lambda: 'url' diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py index 0238f49d5..80c7a0541 100644 --- a/roles/openshift_health_checker/test/ovs_version_test.py +++ b/roles/openshift_health_checker/test/ovs_version_test.py @@ -1,26 +1,7 @@ import pytest -from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException - - -def test_openshift_version_not_supported(): - def execute_module(*_): - return {} - - openshift_release = '111.7.0' - - task_vars = dict( - openshift=dict(common=dict()), - openshift_release=openshift_release, - openshift_image_tag='v' + openshift_release, - openshift_deployment_type='origin', - openshift_service_type='origin' - ) - - with pytest.raises(OpenShiftCheckException) as excinfo: - OvsVersion(execute_module, task_vars).run() - - assert "no recommended version of Open vSwitch" in str(excinfo.value) +from openshift_checks.ovs_version import OvsVersion +from openshift_checks import OpenShiftCheckException def test_invalid_openshift_release_format(): diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index d2916f617..868b4bd12 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -1,6 +1,7 @@ import pytest -from openshift_checks.package_version import PackageVersion, OpenShiftCheckException +from openshift_checks.package_version import PackageVersion +from openshift_checks import OpenShiftCheckException def task_vars_for(openshift_release, deployment_type): @@ -18,7 +19,7 @@ def task_vars_for(openshift_release, deployment_type): def test_openshift_version_not_supported(): check = PackageVersion(None, task_vars_for("1.2.3", 'origin')) - check.get_openshift_version_tuple = lambda: (3, 4, 1) # won't be in the dict + check.get_major_minor_version = lambda: (3, 4, 1) # won't be in the dict with pytest.raises(OpenShiftCheckException) as excinfo: check.get_required_ovs_version() diff --git a/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml b/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml index 77f020357..fef945d51 100644 --- a/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml +++ b/roles/openshift_hosted/tasks/storage/glusterfs_endpoints.yml @@ -1,4 +1,10 @@ --- +- name: Create temp directory for doing work in + command: mktemp -d /tmp/openshift-hosted-ansible-XXXXXX + register: mktempHosted + changed_when: False + check_mode: no + - name: Generate GlusterFS registry endpoints template: src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-endpoints.yml.j2" @@ -14,3 +20,10 @@ with_items: - "{{ mktempHosted.stdout }}/glusterfs-registry-service.yml" - "{{ mktempHosted.stdout }}/glusterfs-registry-endpoints.yml" + +- name: Delete temp directory + file: + name: "{{ mktempHosted.stdout }}" + state: absent + changed_when: False + check_mode: no diff --git a/roles/openshift_hosted_templates/files/v3.6/enterprise/registry-console.yaml b/roles/openshift_hosted_templates/files/v3.6/enterprise/registry-console.yaml index cc3159a32..0786e2d2f 100644 --- a/roles/openshift_hosted_templates/files/v3.6/enterprise/registry-console.yaml +++ b/roles/openshift_hosted_templates/files/v3.6/enterprise/registry-console.yaml @@ -102,7 +102,7 @@ objects: parameters: - description: 'Specify "registry/repository" prefix for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", set prefix "registry.access.redhat.com/openshift3/"' name: IMAGE_PREFIX - value: "openshift3/" + value: "registry.access.redhat.com/openshift3/" - description: 'Specify component name for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", use base name "registry-console"' name: IMAGE_BASENAME value: "registry-console" diff --git a/roles/openshift_hosted_templates/files/v3.7/enterprise/registry-console.yaml b/roles/openshift_hosted_templates/files/v3.7/enterprise/registry-console.yaml index 9f2e6125d..ccea54aaf 100644 --- a/roles/openshift_hosted_templates/files/v3.7/enterprise/registry-console.yaml +++ b/roles/openshift_hosted_templates/files/v3.7/enterprise/registry-console.yaml @@ -102,7 +102,7 @@ objects: parameters: - description: 'Specify "registry/repository" prefix for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", set prefix "registry.access.redhat.com/openshift3/"' name: IMAGE_PREFIX - value: "openshift3/" + value: "registry.access.redhat.com/openshift3/" - description: 'Specify component name for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", use base name "registry-console"' name: IMAGE_BASENAME value: "registry-console" diff --git a/roles/openshift_hosted_templates/files/v3.8/enterprise/registry-console.yaml b/roles/openshift_hosted_templates/files/v3.8/enterprise/registry-console.yaml index f04ce06d3..15ad4e9af 100644 --- a/roles/openshift_hosted_templates/files/v3.8/enterprise/registry-console.yaml +++ b/roles/openshift_hosted_templates/files/v3.8/enterprise/registry-console.yaml @@ -102,7 +102,7 @@ objects: parameters: - description: 'Specify "registry/repository" prefix for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", set prefix "registry.access.redhat.com/openshift3/"' name: IMAGE_PREFIX - value: "openshift3/" + value: "registry.access.redhat.com/openshift3/" - description: 'Specify component name for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", use base name "registry-console"' name: IMAGE_BASENAME value: "registry-console" diff --git a/roles/openshift_hosted_templates/files/v3.9/enterprise/registry-console.yaml b/roles/openshift_hosted_templates/files/v3.9/enterprise/registry-console.yaml index c178cf432..7acefa0f0 100644 --- a/roles/openshift_hosted_templates/files/v3.9/enterprise/registry-console.yaml +++ b/roles/openshift_hosted_templates/files/v3.9/enterprise/registry-console.yaml @@ -102,7 +102,7 @@ objects: parameters: - description: 'Specify "registry/repository" prefix for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", set prefix "registry.access.redhat.com/openshift3/"' name: IMAGE_PREFIX - value: "openshift3/" + value: "registry.access.redhat.com/openshift3/" - description: 'Specify component name for container image; e.g. for "registry.access.redhat.com/openshift3/registry-console:latest", use base name "registry-console"' name: IMAGE_BASENAME value: "registry-console" diff --git a/roles/openshift_logging/tasks/annotate_ops_projects.yaml b/roles/openshift_logging/tasks/annotate_ops_projects.yaml index 4a2ee64f0..6fdba6580 100644 --- a/roles/openshift_logging/tasks/annotate_ops_projects.yaml +++ b/roles/openshift_logging/tasks/annotate_ops_projects.yaml @@ -12,6 +12,7 @@ separator: '#' content: metadata#annotations#openshift.io/logging.ui.hostname: "{{ openshift_logging_kibana_ops_hostname }}" + metadata#annotations#openshift.io/logging.data.prefix: ".operations" with_items: "{{ __logging_ops_projects.stdout.split(' ') }}" loop_control: loop_var: project diff --git a/roles/openshift_logging/tasks/delete_logging.yaml b/roles/openshift_logging/tasks/delete_logging.yaml index fbc3e3fd1..ced7397b5 100644 --- a/roles/openshift_logging/tasks/delete_logging.yaml +++ b/roles/openshift_logging/tasks/delete_logging.yaml @@ -131,13 +131,13 @@ when: not openshift_logging_install_eventrouter | default(false) | bool -# Update asset config in openshift-web-console namespace -- name: Remove Kibana route information from web console asset config +# Update console config in openshift-web-console namespace +- name: Remove Kibana route information from the web console config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: - - key: loggingPublicURL + console_config_edits: + - key: clusterInfo#loggingPublicURL value: "" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_logging/tasks/generate_certs.yaml b/roles/openshift_logging/tasks/generate_certs.yaml index 0d7f8c056..a40449bf6 100644 --- a/roles/openshift_logging/tasks/generate_certs.yaml +++ b/roles/openshift_logging/tasks/generate_certs.yaml @@ -19,7 +19,7 @@ command: > {{ openshift_client_binary }} adm --config={{ mktemp.stdout }}/admin.kubeconfig ca create-signer-cert --key={{generated_certs_dir}}/ca.key --cert={{generated_certs_dir}}/ca.crt - --serial={{generated_certs_dir}}/ca.serial.txt --name=logging-signer-test + --serial={{generated_certs_dir}}/ca.serial.txt --name=logging-signer-test --overwrite=false check_mode: no when: - not ca_key_file.stat.exists diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index ebd2d747b..3afd8680f 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -87,7 +87,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_nodeselector: "{{ openshift_logging_es_nodeselector if outer_item.0.nodeSelector | default(None) is none else outer_item.0.nodeSelector }}" openshift_logging_elasticsearch_storage_group: "{{ [openshift_logging_es_storage_group] if outer_item.0.storageGroups | default([]) | length == 0 else outer_item.0.storageGroups }}" _es_containers: "{{ outer_item.0.containers}}" @@ -114,7 +114,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }} loop_control: @@ -151,7 +151,7 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}" openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}" openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}" @@ -193,7 +193,7 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_ops_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_ops_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_ops_pv_selector }}" - openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name }}" + openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_ops_pvc_storage_class_name | default() }}" openshift_logging_elasticsearch_memory_limit: "{{ openshift_logging_es_ops_memory_limit }}" openshift_logging_elasticsearch_cpu_limit: "{{ openshift_logging_es_ops_cpu_limit }}" openshift_logging_elasticsearch_cpu_request: "{{ openshift_logging_es_ops_cpu_request }}" @@ -321,9 +321,9 @@ - name: Add Kibana route information to web console asset config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: - - key: loggingPublicURL + console_config_edits: + - key: clusterInfo#loggingPublicURL value: "https://{{ openshift_logging_kibana_hostname }}" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_logging/tasks/procure_server_certs.yaml b/roles/openshift_logging/tasks/procure_server_certs.yaml index bc817075d..d28d1d160 100644 --- a/roles/openshift_logging/tasks/procure_server_certs.yaml +++ b/roles/openshift_logging/tasks/procure_server_certs.yaml @@ -30,7 +30,7 @@ {{ openshift_client_binary }} adm --config={{ mktemp.stdout }}/admin.kubeconfig ca create-server-cert --key={{generated_certs_dir}}/{{cert_info.procure_component}}.key --cert={{generated_certs_dir}}/{{cert_info.procure_component}}.crt --hostnames={{cert_info.hostnames|quote}} --signer-cert={{generated_certs_dir}}/ca.crt --signer-key={{generated_certs_dir}}/ca.key - --signer-serial={{generated_certs_dir}}/ca.serial.txt + --signer-serial={{generated_certs_dir}}/ca.serial.txt --overwrite=false check_mode: no when: - cert_info.hostnames is defined diff --git a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml index 9182bddb2..16de6f252 100644 --- a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml +++ b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml @@ -1,6 +1,6 @@ --- - command: > - oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name} register: _cluster_pods - name: "Getting ES version for logging-es cluster" @@ -10,7 +10,7 @@ when: _cluster_pods.stdout_lines | count > 0 - command: > - oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name} register: _ops_cluster_pods - name: "Getting ES version for logging-es-ops cluster" diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml index d55beec86..6bce13d1d 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -19,7 +19,7 @@ ## get all pods for the cluster - command: > - oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name} register: _cluster_pods - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster" @@ -64,7 +64,7 @@ ## we may need a new first pod to run against -- fetch them all again - command: > - oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name} register: _cluster_pods - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster" diff --git a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml index 96079884e..4564f33dd 100644 --- a/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml +++ b/roles/openshift_master/tasks/upgrade/rpm_upgrade.yml @@ -8,8 +8,10 @@ # TODO: If the sdn package isn't already installed this will install it, we # should fix that -- name: Upgrade master packages - package: name={{ master_pkgs | join(',') }} state=present +- name: Upgrade master packages - yum + command: + yum install -y {{ master_pkgs | join(' ') }} \ + {{ ' --exclude *' ~ openshift_service_type ~ '*3.9*' if openshift_release | version_compare('3.9','<') else '' }} vars: master_pkgs: - "{{ openshift_service_type }}{{ openshift_pkg_version | default('') }}" @@ -19,3 +21,19 @@ - "{{ openshift_service_type }}-clients{{ openshift_pkg_version | default('') }}" register: result until: result is succeeded + when: ansible_pkg_mgr == 'yum' + +- name: Upgrade master packages - dnf + dnf: + name: "{{ master_pkgs | join(',') }}" + state: present + vars: + master_pkgs: + - "{{ openshift_service_type }}{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-master{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-node{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-sdn-ovs{{ openshift_pkg_version }}" + - "{{ openshift_service_type }}-clients{{ openshift_pkg_version }}" + register: result + until: result is succeeded + when: ansible_pkg_mgr == 'dnf' diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 0866fe0d2..0dd5d1621 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -74,10 +74,10 @@ - name: Add metrics route information to web console asset config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: - - key: metricsPublicURL + console_config_edits: + - key: clusterInfo#metricsPublicURL value: "https://{{ openshift_metrics_hawkular_hostname}}/hawkular/metrics" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index 8ccfb7192..057963c1a 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -16,7 +16,9 @@ apply -f {{ file_name }} -n {{namespace}} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: no - name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}} @@ -28,5 +30,7 @@ register: version_changed vars: init_version: "{{ (generation_init is defined) | ternary(generation_init.stdout, '0') }}" - failed_when: "'error' in version_changed.stderr" + failed_when: + - "'error' in version_changed.stderr" + - "version_changed.rc != 0" changed_when: version_changed.stdout | int > init_version | int diff --git a/roles/openshift_metrics/tasks/uninstall_metrics.yaml b/roles/openshift_metrics/tasks/uninstall_metrics.yaml index 610c7b4e5..1664e9975 100644 --- a/roles/openshift_metrics/tasks/uninstall_metrics.yaml +++ b/roles/openshift_metrics/tasks/uninstall_metrics.yaml @@ -19,13 +19,13 @@ clusterrolebinding/hawkular-metrics changed_when: delete_metrics.stdout != 'No resources found' -# Update asset config in openshift-web-console namespace -- name: Remove metrics route information from web console asset config +# Update the web config in openshift-web-console namespace +- name: Remove metrics route information from the web console config include_role: name: openshift_web_console - tasks_from: update_asset_config.yml + tasks_from: update_console_config.yml vars: - asset_config_edits: - - key: metricsPublicURL + console_config_edits: + - key: clusterInfo#metricsPublicURL value: "" when: openshift_web_console_install | default(true) | bool diff --git a/roles/openshift_node/defaults/main.yml b/roles/openshift_node/defaults/main.yml index c1fab4382..0b10413c5 100644 --- a/roles/openshift_node/defaults/main.yml +++ b/roles/openshift_node/defaults/main.yml @@ -48,6 +48,12 @@ openshift_node_kubelet_args_dict: cloud-config: - "{{ openshift_config_base ~ '/cloudprovider/gce.conf' }}" node-labels: "{{ l_node_kubelet_node_labels }}" + azure: + cloud-provider: + - azure + cloud-config: + - "{{ openshift_config_base ~ '/cloudprovider/azure.conf' }}" + node-labels: "{{ l_node_kubelet_node_labels }}" undefined: node-labels: "{{ l_node_kubelet_node_labels }}" diff --git a/roles/openshift_openstack/templates/heat_stack.yaml.j2 b/roles/openshift_openstack/templates/heat_stack.yaml.j2 index 1be5d3a62..8e7c6288a 100644 --- a/roles/openshift_openstack/templates/heat_stack.yaml.j2 +++ b/roles/openshift_openstack/templates/heat_stack.yaml.j2 @@ -523,7 +523,7 @@ resources: floating_network: if: - no_floating - - null + - '' - {{ openshift_openstack_external_network_name }} {% if openshift_openstack_provider_network_name %} attach_float_net: false @@ -589,8 +589,13 @@ resources: secgrp: - { get_resource: lb-secgrp } - { get_resource: common-secgrp } -{% if not openshift_openstack_provider_network_name %} - floating_network: {{ openshift_openstack_external_network_name }} + floating_network: + if: + - no_floating + - '' + - {{ openshift_openstack_external_network_name }} +{% if openshift_openstack_provider_network_name %} + attach_float_net: false {% endif %} volume_size: {{ openshift_openstack_lb_volume_size }} {% if not openshift_openstack_provider_network_name %} @@ -655,7 +660,7 @@ resources: floating_network: if: - no_floating - - null + - '' - {{ openshift_openstack_external_network_name }} {% if openshift_openstack_provider_network_name %} attach_float_net: false @@ -725,7 +730,7 @@ resources: floating_network: if: - no_floating - - null + - '' - {{ openshift_openstack_external_network_name }} {% if openshift_openstack_provider_network_name %} attach_float_net: false @@ -792,8 +797,13 @@ resources: {% endif %} - { get_resource: infra-secgrp } - { get_resource: common-secgrp } -{% if not openshift_openstack_provider_network_name %} - floating_network: {{ openshift_openstack_external_network_name }} + floating_network: + if: + - no_floating + - '' + - {{ openshift_openstack_external_network_name }} +{% if openshift_openstack_provider_network_name %} + attach_float_net: false {% endif %} volume_size: {{ openshift_openstack_infra_volume_size }} {% if openshift_openstack_infra_server_group_policies|length > 0 %} diff --git a/roles/openshift_openstack/templates/heat_stack_server.yaml.j2 b/roles/openshift_openstack/templates/heat_stack_server.yaml.j2 index 1e73c9e1c..29b09f3c9 100644 --- a/roles/openshift_openstack/templates/heat_stack_server.yaml.j2 +++ b/roles/openshift_openstack/templates/heat_stack_server.yaml.j2 @@ -102,13 +102,11 @@ parameters: label: Attach-float-net description: A switch for floating network port connection -{% if not openshift_openstack_provider_network_name %} floating_network: type: string default: '' label: Floating network description: Network to allocate floating IP from -{% endif %} availability_zone: type: string diff --git a/roles/openshift_persistent_volumes/tasks/pv.yml b/roles/openshift_persistent_volumes/tasks/pv.yml index ef9ab7f5f..865269b7a 100644 --- a/roles/openshift_persistent_volumes/tasks/pv.yml +++ b/roles/openshift_persistent_volumes/tasks/pv.yml @@ -13,5 +13,5 @@ --config={{ mktemp.stdout }}/admin.kubeconfig register: pv_create_output when: persistent_volumes | length > 0 - failed_when: ('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) + failed_when: "('already exists' not in pv_create_output.stderr) and ('created' not in pv_create_output.stdout) and pv_create_output.rc != 0" changed_when: ('created' in pv_create_output.stdout) diff --git a/roles/openshift_persistent_volumes/tasks/pvc.yml b/roles/openshift_persistent_volumes/tasks/pvc.yml index 2c5519192..6c12d128c 100644 --- a/roles/openshift_persistent_volumes/tasks/pvc.yml +++ b/roles/openshift_persistent_volumes/tasks/pvc.yml @@ -13,5 +13,5 @@ --config={{ mktemp.stdout }}/admin.kubeconfig register: pvc_create_output when: persistent_volume_claims | length > 0 - failed_when: ('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) + failed_when: "('already exists' not in pvc_create_output.stderr) and ('created' not in pvc_create_output.stdout) and pvc_create_output.rc != 0" changed_when: ('created' in pvc_create_output.stdout) diff --git a/roles/openshift_provisioners/tasks/oc_apply.yaml b/roles/openshift_provisioners/tasks/oc_apply.yaml index a4ce53eae..239e1f1cc 100644 --- a/roles/openshift_provisioners/tasks/oc_apply.yaml +++ b/roles/openshift_provisioners/tasks/oc_apply.yaml @@ -15,7 +15,9 @@ apply -f {{ file_name }} -n {{ namespace }} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: no - name: Determine change status of {{file_content.kind}} {{file_content.metadata.name}} @@ -36,7 +38,9 @@ delete -f {{ file_name }} -n {{ namespace }} register: generation_delete - failed_when: "'error' in generation_delete.stderr" + failed_when: + - "'error' in generation_delete.stderr" + - "generation_delete.rc != 0" changed_when: generation_delete.rc == 0 when: generation_apply.rc != 0 @@ -46,6 +50,8 @@ apply -f {{ file_name }} -n {{ namespace }} register: generation_apply - failed_when: "'error' in generation_apply.stderr" + failed_when: + - "'error' in generation_apply.stderr" + - "generation_apply.rc != 0" changed_when: generation_apply.rc == 0 when: generation_apply.rc != 0 diff --git a/roles/openshift_version/tasks/check_available_rpms.yml b/roles/openshift_version/tasks/check_available_rpms.yml index bdbc63d27..fea0daf77 100644 --- a/roles/openshift_version/tasks/check_available_rpms.yml +++ b/roles/openshift_version/tasks/check_available_rpms.yml @@ -1,7 +1,7 @@ --- - name: Get available {{ openshift_service_type}} version repoquery: - name: "{{ openshift_service_type}}" + name: "{{ openshift_service_type}}{{ '-' ~ openshift_release ~ '*' if openshift_release is defined else '' }}" ignore_excluders: true register: rpm_results diff --git a/roles/openshift_version/tasks/first_master_containerized_version.yml b/roles/openshift_version/tasks/first_master_containerized_version.yml index e02a75eab..3ed1d2cfe 100644 --- a/roles/openshift_version/tasks/first_master_containerized_version.yml +++ b/roles/openshift_version/tasks/first_master_containerized_version.yml @@ -7,6 +7,7 @@ when: - openshift_image_tag is defined - openshift_version is not defined + - not (openshift_version_reinit | default(false)) - name: Set containerized version to configure if openshift_release specified set_fact: @@ -20,7 +21,7 @@ docker run --rm {{ openshift_cli_image }}:latest version register: cli_image_version when: - - openshift_version is not defined + - openshift_version is not defined or openshift_version_reinit | default(false) - not openshift_use_crio_only # Origin latest = pre-release version (i.e. v1.3.0-alpha.1-321-gb095e3a) @@ -34,7 +35,7 @@ - set_fact: openshift_version: "{{ cli_image_version.stdout_lines[0].split(' ')[1].split('-')[0][1:] }}" - when: openshift_version is not defined + when: openshift_version is not defined or openshift_version_reinit | default(false) # If we got an openshift_version like "3.2", lookup the latest 3.2 container version # and use that value instead. diff --git a/roles/openshift_version/tasks/first_master_rpm_version.yml b/roles/openshift_version/tasks/first_master_rpm_version.yml index 264baca65..5d92f90c6 100644 --- a/roles/openshift_version/tasks/first_master_rpm_version.yml +++ b/roles/openshift_version/tasks/first_master_rpm_version.yml @@ -6,6 +6,7 @@ when: - openshift_pkg_version is defined - openshift_version is not defined + - not (openshift_version_reinit | default(false)) # These tasks should only be run against masters and nodes - name: Set openshift_version for rpm installation @@ -13,4 +14,7 @@ - set_fact: openshift_version: "{{ rpm_results.results.versions.available_versions.0 }}" - when: openshift_version is not defined + when: openshift_version is not defined or ( openshift_version_reinit | default(false) ) +- set_fact: + openshift_pkg_version: "-{{ rpm_results.results.versions.available_versions.0 }}" + when: openshift_version_reinit | default(false) diff --git a/roles/openshift_version/tasks/masters_and_nodes.yml b/roles/openshift_version/tasks/masters_and_nodes.yml index fbeb22d8b..eddd5ff42 100644 --- a/roles/openshift_version/tasks/masters_and_nodes.yml +++ b/roles/openshift_version/tasks/masters_and_nodes.yml @@ -6,9 +6,12 @@ include_tasks: check_available_rpms.yml - name: Fail if rpm version and docker image version are different fail: - msg: "OCP rpm version {{ openshift_rpm_version }} is different from OCP image version {{ openshift_version }}" + msg: "OCP rpm version {{ rpm_results.results.versions.available_versions.0 }} is different from OCP image version {{ openshift_version }}" # Both versions have the same string representation - when: rpm_results.results.versions.available_versions.0 != openshift_version + when: + - openshift_version not in rpm_results.results.versions.available_versions.0 + - openshift_version_reinit | default(false) + # block when when: not openshift_is_atomic | bool diff --git a/roles/openshift_web_console/defaults/main.yml b/roles/openshift_web_console/defaults/main.yml index 4f395398c..c747f73a8 100644 --- a/roles/openshift_web_console/defaults/main.yml +++ b/roles/openshift_web_console/defaults/main.yml @@ -1,3 +1,2 @@ --- -# TODO: This is temporary and will be updated to use taints and tolerations so that the console runs on the masters -openshift_web_console_nodeselector: {"region":"infra"} +openshift_web_console_nodeselector: "{{ openshift_hosted_infra_selector | default('region=infra') | map_from_pairs }}" diff --git a/roles/openshift_web_console/tasks/install.yml b/roles/openshift_web_console/tasks/install.yml index 287d8973d..de852e80b 100644 --- a/roles/openshift_web_console/tasks/install.yml +++ b/roles/openshift_web_console/tasks/install.yml @@ -26,6 +26,11 @@ register: mktemp changed_when: False +- name: Copy admin client config + command: > + cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig + changed_when: false + - name: Copy the web console config template to temp directory copy: src: "{{ __console_files_location }}/{{ item }}" @@ -39,13 +44,23 @@ yedit: src: "{{ mktemp.stdout }}/{{ __console_config_file }}" edits: - - key: logoutURL - value: "{{ openshift.master.logout_url | default('') }}" - - key: publicURL + - key: clusterInfo#consolePublicURL # Must have a trailing slash value: "{{ openshift.master.public_console_url }}/" - - key: masterPublicURL + - key: clusterInfo#masterPublicURL value: "{{ openshift.master.public_api_url }}" + - key: clusterInfo#logoutPublicURL + value: "{{ openshift.master.logout_url | default('') }}" + - key: features#inactivityTimeoutMinutes + value: "{{ openshift_web_console_inactivity_timeout_minutes | default(0) }}" + - key: extensions#scriptURLs + value: "{{ openshift_web_console_extension_script_urls | default([]) }}" + - key: extensions#stylesheetURLs + value: "{{ openshift_web_console_extension_stylesheet_urls | default([]) }}" + - key: extensions#properties + value: "{{ openshift_web_console_extension_properties | default({}) }}" + separator: '#' + state: present - slurp: src: "{{ mktemp.stdout }}/{{ __console_config_file }}" @@ -53,7 +68,8 @@ - name: Reconcile with the web console RBAC file shell: > - {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/{{ __console_rbac_file }}" | {{ openshift_client_binary }} auth reconcile -f - + {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/{{ __console_rbac_file }}" --config={{ mktemp.stdout }}/admin.kubeconfig + | {{ openshift_client_binary }} auth reconcile --config={{ mktemp.stdout }}/admin.kubeconfig -f - - name: Apply the web console template file shell: > @@ -62,7 +78,8 @@ --param IMAGE="{{ openshift_web_console_prefix }}{{ openshift_web_console_image_name }}:{{ openshift_web_console_version }}" --param NODE_SELECTOR={{ openshift_web_console_nodeselector | to_json | quote }} --param REPLICA_COUNT="{{ openshift_web_console_replica_count }}" - | {{ openshift_client_binary }} apply -f - + --config={{ mktemp.stdout }}/admin.kubeconfig + | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f - - name: Verify that the web console is running command: > diff --git a/roles/openshift_web_console/tasks/rollout_console.yml b/roles/openshift_web_console/tasks/rollout_console.yml new file mode 100644 index 000000000..75682ba1d --- /dev/null +++ b/roles/openshift_web_console/tasks/rollout_console.yml @@ -0,0 +1,20 @@ +--- +- name: Check if console deployment exists + oc_obj: + kind: deployments + name: webconsole + namespace: openshift-web-console + state: list + register: console_deployment + +# There's currently no command to trigger a rollout for a k8s deployment +# without changing the pod spec. Add an annotation to force a rollout. +- name: Rollout updated web console deployment + oc_edit: + kind: deployments + name: webconsole + namespace: openshift-web-console + separator: '#' + content: + spec#template#metadata#annotations#installer-triggered-rollout: "{{ ansible_date_time.iso8601_micro }}" + when: console_deployment.results.results.0 | length > 0 diff --git a/roles/openshift_web_console/tasks/update_asset_config.yml b/roles/openshift_web_console/tasks/update_console_config.yml index 0992b32e1..4d2957977 100644 --- a/roles/openshift_web_console/tasks/update_asset_config.yml +++ b/roles/openshift_web_console/tasks/update_console_config.yml @@ -1,9 +1,9 @@ --- # This task updates asset config values in the webconsole-config config map in # the openshift-web-console namespace. The values to set are pased in the -# variable `asset_config_edits`, which is an array of objects with `key` and +# variable `console_config_edits`, which is an array of objects with `key` and # `value` properties in the same format as `yedit` module `edits`. Only -# properties passed are updated. +# properties passed are updated. The separator for nested properties is `#`. # # Note that this triggers a redeployment on the console and a brief downtime # since it uses a `Recreate` strategy. @@ -12,10 +12,10 @@ # # - include_role: # name: openshift_web_console -# tasks_from: update_asset_config.yml +# tasks_from: update_console_config.yml # vars: -# asset_config_edits: -# - key: loggingPublicURL +# console_config_edits: +# - key: clusterInfo#loggingPublicURL # value: "https://{{ openshift_logging_kibana_hostname }}" # when: openshift_web_console_install | default(true) | bool @@ -28,18 +28,20 @@ - name: Make temp directory command: mktemp -d /tmp/console-ansible-XXXXXX - register: mktemp + register: mktemp_console changed_when: False -- name: Copy asset config to temp file +- name: Copy web console config to temp file copy: content: "{{webconsole_config.results.results[0].data['webconsole-config.yaml']}}" - dest: "{{ mktemp.stdout }}/webconsole-config.yaml" + dest: "{{ mktemp_console.stdout }}/webconsole-config.yaml" -- name: Change asset config properties +- name: Change web console config properties yedit: - src: "{{ mktemp.stdout }}/webconsole-config.yaml" - edits: "{{asset_config_edits}}" + src: "{{ mktemp_console.stdout }}/webconsole-config.yaml" + edits: "{{console_config_edits}}" + separator: '#' + state: present - name: Update web console config map oc_configmap: @@ -47,22 +49,13 @@ name: webconsole-config state: present from_file: - webconsole-config.yaml: "{{ mktemp.stdout }}/webconsole-config.yaml" + webconsole-config.yaml: "{{ mktemp_console.stdout }}/webconsole-config.yaml" - name: Remove temp directory file: state: absent - name: "{{ mktemp.stdout }}" + name: "{{ mktemp_console.stdout }}" changed_when: False -# There's currently no command to trigger a rollout for a k8s deployment -# without changing the pod spec. Add an annotation to force a rollout after -# the config map has been edited. -- name: Rollout updated web console deployment - oc_edit: - kind: deployments - name: webconsole - namespace: openshift-web-console - separator: '#' - content: - spec#template#metadata#annotations#installer-triggered-rollout: "{{ ansible_date_time.iso8601_micro }}" +# TODO: Only rollout if config has changed. +- include_tasks: rollout_console.yml diff --git a/roles/openshift_web_console/vars/default_images.yml b/roles/openshift_web_console/vars/default_images.yml index 7adb8a0d0..42d331ac5 100644 --- a/roles/openshift_web_console/vars/default_images.yml +++ b/roles/openshift_web_console/vars/default_images.yml @@ -1,4 +1,4 @@ --- -__openshift_web_console_prefix: "docker.io/openshift/" +__openshift_web_console_prefix: "docker.io/openshift/origin-" __openshift_web_console_version: "latest" -__openshift_web_console_image_name: "origin-web-console" +__openshift_web_console_image_name: "web-console" diff --git a/roles/openshift_web_console/vars/openshift-enterprise.yml b/roles/openshift_web_console/vars/openshift-enterprise.yml index 721ac1d27..375c22067 100644 --- a/roles/openshift_web_console/vars/openshift-enterprise.yml +++ b/roles/openshift_web_console/vars/openshift-enterprise.yml @@ -1,4 +1,4 @@ --- -__openshift_web_console_prefix: "registry.access.redhat.com/openshift3/" +__openshift_web_console_prefix: "registry.access.redhat.com/openshift3/ose-" __openshift_web_console_version: "v3.9" -__openshift_web_console_image_name: "ose-web-console" +__openshift_web_console_image_name: "web-console" diff --git a/roles/os_firewall/tasks/firewalld.yml b/roles/os_firewall/tasks/firewalld.yml index 4eae31596..fa933da51 100644 --- a/roles/os_firewall/tasks/firewalld.yml +++ b/roles/os_firewall/tasks/firewalld.yml @@ -2,7 +2,9 @@ - name: Fail - Firewalld is not supported on Atomic Host fail: msg: "Firewalld is not supported on Atomic Host" - when: r_os_firewall_is_atomic | bool + when: + - r_os_firewall_is_atomic | bool + - not openshift_enable_unsupported_configurations | default(false) - name: Install firewalld packages package: @@ -10,6 +12,7 @@ state: present register: result until: result is succeeded + when: not r_os_firewall_is_atomic | bool - name: Ensure iptables services are not enabled systemd: diff --git a/roles/template_service_broker/defaults/main.yml b/roles/template_service_broker/defaults/main.yml index c32872d24..3465832cc 100644 --- a/roles/template_service_broker/defaults/main.yml +++ b/roles/template_service_broker/defaults/main.yml @@ -3,4 +3,4 @@ template_service_broker_remove: False template_service_broker_install: True openshift_template_service_broker_namespaces: ['openshift'] -template_service_broker_selector: { "region": "infra" } +template_service_broker_selector: "{{ openshift_hosted_infra_selector | default('region=infra') | map_from_pairs }}" diff --git a/roles/template_service_broker/tasks/install.yml b/roles/template_service_broker/tasks/install.yml index 604e94602..82b211032 100644 --- a/roles/template_service_broker/tasks/install.yml +++ b/roles/template_service_broker/tasks/install.yml @@ -85,3 +85,9 @@ state: absent name: "{{ mktemp.stdout }}" changed_when: False + +- name: Rollout console so it discovers the template service broker is installed + include_role: + name: openshift_web_console + tasks_from: rollout_console.yml + when: openshift_web_console_install | default(true) | bool diff --git a/roles/template_service_broker/tasks/remove.yml b/roles/template_service_broker/tasks/remove.yml index db1b558e4..767e8ddc1 100644 --- a/roles/template_service_broker/tasks/remove.yml +++ b/roles/template_service_broker/tasks/remove.yml @@ -31,3 +31,9 @@ state: absent name: "{{ mktemp.stdout }}" changed_when: False + +- name: Rollout console so it discovers the template service broker is removed + include_role: + name: openshift_web_console + tasks_from: rollout_console.yml + when: openshift_web_console_install | default(true) | bool diff --git a/roles/template_service_broker/vars/default_images.yml b/roles/template_service_broker/vars/default_images.yml index 662d65d9f..dc164a4db 100644 --- a/roles/template_service_broker/vars/default_images.yml +++ b/roles/template_service_broker/vars/default_images.yml @@ -1,4 +1,4 @@ --- -__template_service_broker_prefix: "docker.io/openshift/" +__template_service_broker_prefix: "docker.io/openshift/origin-" __template_service_broker_version: "latest" -__template_service_broker_image_name: "origin-template-service-broker" +__template_service_broker_image_name: "template-service-broker" diff --git a/roles/template_service_broker/vars/openshift-enterprise.yml b/roles/template_service_broker/vars/openshift-enterprise.yml index 16a08e72f..b65b97691 100644 --- a/roles/template_service_broker/vars/openshift-enterprise.yml +++ b/roles/template_service_broker/vars/openshift-enterprise.yml @@ -1,4 +1,4 @@ --- -__template_service_broker_prefix: "registry.access.redhat.com/openshift3/" +__template_service_broker_prefix: "registry.access.redhat.com/openshift3/ose-" __template_service_broker_version: "v3.7" -__template_service_broker_image_name: "ose-template-service-broker" +__template_service_broker_image_name: "template-service-broker" diff --git a/roles/tuned/tasks/main.yml b/roles/tuned/tasks/main.yml index 4a28d47b2..5129f4471 100644 --- a/roles/tuned/tasks/main.yml +++ b/roles/tuned/tasks/main.yml @@ -28,7 +28,12 @@ when: item.state == 'file' - name: Make tuned use the recommended tuned profile on restart - file: path=/etc/tuned/active_profile state=absent + file: + path: '{{ item }}' + state: absent + with_items: + - /etc/tuned/active_profile + - /etc/tuned/profile_mode - name: Restart tuned service systemd: |