diff options
-rw-r--r-- | docs/performance/gluster.txt | 45 | ||||
-rw-r--r-- | docs/projects/epics.txt | 1 | ||||
-rw-r--r-- | docs/samples/access/externalip.yaml | 28 | ||||
-rw-r--r-- | docs/samples/mysql/templates/01-sds-secrets.yml.j2 | 26 | ||||
-rw-r--r-- | docs/samples/mysql/vars/sds.yml | 24 | ||||
-rw-r--r-- | docs/troubleshooting.txt | 11 | ||||
-rw-r--r-- | docs/vision.txt | 4 | ||||
-rw-r--r-- | docs/webservices.txt | 8 | ||||
-rw-r--r-- | group_vars/OSEv3.yml | 1 | ||||
-rw-r--r-- | group_vars/production.yml | 1 | ||||
-rw-r--r-- | group_vars/staging.yml | 1 | ||||
-rw-r--r-- | group_vars/testing.yml | 1 | ||||
-rw-r--r-- | setup/configs/openshift.yml | 19 | ||||
-rw-r--r-- | setup/configs/secrets.yml | 19 | ||||
-rw-r--r-- | setup/configs/security.yml | 1 | ||||
-rw-r--r-- | setup/external_ip.txt | 8 | ||||
-rw-r--r-- | setup/users/htpasswd | 2 |
17 files changed, 182 insertions, 18 deletions
diff --git a/docs/performance/gluster.txt b/docs/performance/gluster.txt new file mode 100644 index 0000000..ada9a9c --- /dev/null +++ b/docs/performance/gluster.txt @@ -0,0 +1,45 @@ +# Changed (for katrin_data) +gluster volume set katrin_data server.event-threads 8 # 1 +gluster volume set katrin_data client.event-threads 8 # 2 +gluster volume set katrin_data performance.io-thread-count 32 # 16 +gluster volume set katrin_data cluster.lookup-optimize on # off +gluster volume set katrin_data cluster.readdir-optimize on # off + +# Already set +performance.stat-prefetch: on +performance.readdir-ahead: on +performance.io-cache: on +cluster.choose-local: true +performance.flush-behind: on +performance.write-behind-window-size: 1MB + +# Shall we ? +gluster volume set katrin_data server.outstanding-rpc-limit # 64 + +# Does this cache (per-client size) makes sence with 1-10s delay? +gluster volume set katrin_data performance.cache-refresh-timeout 10 # 1 (seconds) +gluster volume set katrin_data performance.cache-size 1GB # 32MB +gluster volume set katrin_data performance.write-behind-window-size # 1MB +gluster volume set katrin_data performance.cache-max-file-size 2MB # 0 (unlimited) + +# Major, but seems to affect only Samba/NFS in 3.x +gluster volume set katrin_data features.cache-invalidation on # off +gluster volume set katrin_data performance.cache-invalidation on # off +gluster volume set katrin_data features.cache-invalidation-timeout 600 # 60 +gluster volume set katrin_data performance.md-cache-timeout 600 # 1 + +# Not recommended +gluster volume set katrin_data performance.client-io-threads on # off + + +Systemctl (not applied as we use rdma anyway) +========= +sysctl -w net.ipv4.tcp_congestion_control=htcp # cubic +sysctl -w net.ipv4.tcp_mtu_probing=1 # recommended for hosts with jumbo frames enabled + +# Optimal value unclear (big value may harm small file performance) +sysctl -w net.ipv4.tcp_rmem = 4096 87380 33554432 # increase Linux autotuning TCP buffer limit to 32MB +sysctl -w net.ipv4.tcp_wmem = 4096 87380 33554432 +sysctl -w net.core.rmem_max = 67108864 # allow testing with buffers up to 64MB +sysctl -w net.core.wmem_max = 67108864 +sysctl -w net.core.netdev_max_backlog = 30000 # increase the length of the processor input queue diff --git a/docs/projects/epics.txt b/docs/projects/epics.txt new file mode 100644 index 0000000..6190dbd --- /dev/null +++ b/docs/projects/epics.txt @@ -0,0 +1 @@ +EPICS_CA_ADDR_LIST="172.30.14.13" caget -w 3 -t darwin:ist:ts1 diff --git a/docs/samples/access/externalip.yaml b/docs/samples/access/externalip.yaml new file mode 100644 index 0000000..3827968 --- /dev/null +++ b/docs/samples/access/externalip.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: Service +metadata: + name: epics-softioc-external + namespace: epics +spec: + type: ClusterIP + selector: + deploymentconfig: epics-softioc + externalIPs: + - 192.168.130.1 + ports: + - name: 5064-tcp + port: 5064 + protocol: TCP + targetPort: 5064 + - name: 5064-udp + port: 5064 + protocol: UDP + targetPort: 5064 + - name: 5065-tcp + port: 5065 + protocol: TCP + targetPort: 5065 + - name: 5065-udp + port: 5065 + protocol: UDP + targetPort: 5065 diff --git a/docs/samples/mysql/templates/01-sds-secrets.yml.j2 b/docs/samples/mysql/templates/01-sds-secrets.yml.j2 new file mode 100644 index 0000000..2922118 --- /dev/null +++ b/docs/samples/mysql/templates/01-sds-secrets.yml.j2 @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Template +metadata: + name: sds-secrets + labels: + app: sds + annotations: + descriptions: "SymmetricDS Secrets" +objects: +- apiVersion: v1 + kind: Secret + metadata: + annotations: + template.openshift.io/expose-root_password: '{.data[''root-password'']}' + template.openshift.io/expose-database_password: '{.data[''database-password'']}' + name: sds + stringData: + root-password: "${DATABASE_PASSWORD}" + database-password: "${DATABASE_PASSWORD}" +parameters: +- description: SymmetricDS Database Password + displayName: SymmetricDS Database Password + from: '[a-zA-Z0-9]{16}' + generate: expression + name: DATABASE_PASSWORD + required: true diff --git a/docs/samples/mysql/vars/sds.yml b/docs/samples/mysql/vars/sds.yml new file mode 100644 index 0000000..abe0f4f --- /dev/null +++ b/docs/samples/mysql/vars/sds.yml @@ -0,0 +1,24 @@ +sds: + pods: + sds-mysql: + service: { ports: [ 3306 ] } + sched: { replicas: 1, strategy: "Recreate" } + groups: [ "services_sds" ] + images: + - stream: "openshift/mysql:5.7" + env: + - { name: "MYSQL_USER", value: "sds" } + - { name: "MYSQL_PASSWORD", value: "secret@sds/database-password" } + - { name: "MYSQL_ROOT_PASSWORD", value: "secret@sds/root-password" } + - { name: "MYSQL_DATABASE", value: "sds" } + - { name: "MYSQL_MAX_CONNECTIONS", value: "50" } + mappings: + - { name: "db", path: "sds", mount: "/var/lib/mysql/data" } + resources: { limit: { cpu: 1000m, mem: 2Gi } } +# probes: +# - { port: 3306 } + probes: + - { type: "liveness", port: 3306 } + - { type: "readiness", command: [ /bin/sh, -i, -c, MYSQL_PWD="$MYSQL_PASSWORD" mysql -h 127.0.0.1 -u $MYSQL_USER -D $MYSQL_DATABASE -e 'SELECT 1' ], delay: "15", timeout: "5" } + hooks: + - { type: "postStart", command: [ /bin/sh, -i, -c, sleep 10; MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -h 127.0.0.1 -u root -D $MYSQL_DATABASE -e "GRANT ALL ON *.* TO 'sds'@'%'; UPDATE mysql.user SET Super_Priv='Y' WHERE user='sds' AND host='%'; FLUSH PRIVILEGES;" ] } diff --git a/docs/troubleshooting.txt b/docs/troubleshooting.txt index 1f52fe9..5eb0cc7 100644 --- a/docs/troubleshooting.txt +++ b/docs/troubleshooting.txt @@ -263,6 +263,17 @@ pods: very slow scheduling (normal start time in seconds range), failed pods, ro and the pods should be allowed to access files. Possible errors: unable to create pods: pods "mongodb-2-" is forbidden: no providers available to validate pod request +Pod Networking +============== +- Run commands in pod network, particularly execute packet sniffers (which would not work in the container due to missing capabilities) + * Get container cid + docker ps -f label=io.kubernetes.pod.name=epics-archappl-46-h6j62 -f label=io.kubernetes.pod.namespace=epics -f label=io.kubernetes.docker.type=podsandbox -q + * Run command with container networking, e.g. tcpdump + nsenter -n -t $(docker inspect --format "{{ .State.Pid }}" "f5a0ad4f5793") tcpdump -nv -i eth0 + +- Check if service properly exposed ports + * 'nc' will not properly tell if UDP port is open or not due to underlaying firewall. + iptables -n -L -t nat | grep 5064 | grep 172 Builds diff --git a/docs/vision.txt b/docs/vision.txt index bf6de57..fdc921d 100644 --- a/docs/vision.txt +++ b/docs/vision.txt @@ -2,12 +2,14 @@ Ands v.2 ======== - Try overlay2 storage driver (LVM is used in Ands v.1). Check also further docker configuration options: 'cgroup-driver', ... * This actually seems problematic in CentOS-8. Something, like 'rsync portage portage/.tmp' is EXREMELY slow (<1 MB/s). Just check eix-sync. - - Integrate fast Ethernet and use conteiner native networking. OpenVSwitch is slow and causes problems. + - Integrate fast Ethernet and use conteiner native networking. OpenVSwitch is slow and causes problems. Alternatively, can we rely on some hardware + features of novel network cards, e.g. Mellanox ASAP2 (Accelerated Switch and Packet Processing) - Do not run pods on Master nodes, but Gluster and a few databases pods (MySQL) are OK (multiple reasons, especially mounting a lot of Gluster Volumes) * Restrict all periodic jobs to a specific node: easy to re-install (non-master), fast SSD storage, ...? - Object Storage should be integrated, either Gluster Block is ready for production or we have to use Ceph as well - Automatic provisioning would be much better then handling volumes trough Ands. Basically, this will render Ands redundant. We can switch to Helm, etc. But, we need ability to easily understand which volume belong to which pod/namespace and automatically kill redundant volumes. + - Avoid conflicts with SCC private vlans (KIT WiFi, VPN, ...?) Questions ========= diff --git a/docs/webservices.txt b/docs/webservices.txt index 2545bd5..0edfdeb 100644 --- a/docs/webservices.txt +++ b/docs/webservices.txt @@ -10,12 +10,18 @@ Architecture by setting 'haproxy.router.openshift.io/balance' to 'source' in route metadata. Then, the destination replica will be determined based on the client IP. * HAProxy has configured a default timeout. If replica does not send data within '30s' the connection - will be terminated. It can be increased with 'haproxy.router.openshift.io/timeout' + will be terminated. It can be increased with 'haproxy.router.openshift.io/timeout' in route metadata. * There is a several ways to configure certiciates for HTTPS services defined by type of tls termination in the route specification. With 'passthrough' the container is expected to handle certificates itself. In the edge termination mode, the certificates are configured in the route and HAProxy manages secure communication with clients and provides unencrypted data to the service in the cluster. + - Sample metadata configuration for route: + kind: Route + metadata: + annotations: + haproxy.router.openshift.io/balance: 'source' + haproxy.router.openshift.io/timeout: 300s Updating/Generating certificates for the router =============================================== diff --git a/group_vars/OSEv3.yml b/group_vars/OSEv3.yml index 4f99f5a..a92487f 100644 --- a/group_vars/OSEv3.yml +++ b/group_vars/OSEv3.yml @@ -46,6 +46,7 @@ openshift_master_cluster_hostname: "{{ ands_use_inner_lb | ternary(ands_inner_lb openshift_master_cluster_public_hostname: "{{ ands_openshift_lb }}" openshift_master_default_subdomain: "{{ ands_openshift_subdomain | default(ands_openshift_lb) }}" openshift_master_ingress_ip_network_cidr: "{{ ands_openshift_ingress_network }}" +openshift_master_external_ip_network_cidrs: "{{ ands_openshift_external_network }}" #openshift_portal_net: #osm_host_subnet_length: diff --git a/group_vars/production.yml b/group_vars/production.yml index c731873..e661c58 100644 --- a/group_vars/production.yml +++ b/group_vars/production.yml @@ -8,6 +8,7 @@ ands_openshift_subdomain: kaas.kit.edu ands_openshift_network: 192.168.13.0/24 ands_openshift_public_network: 192.168.26.0/24 ands_openshift_ingress_network: 192.168.16.0/24 +ands_openshift_external_network: [192.168.128.0/18] ands_global_network: 141.52.64.0/23 #ands_inner_domain: "" diff --git a/group_vars/staging.yml b/group_vars/staging.yml index 00ec146..94c13fd 100644 --- a/group_vars/staging.yml +++ b/group_vars/staging.yml @@ -9,6 +9,7 @@ ands_openshift_subdomain: openshift.suren.me ands_openshift_network: 192.168.213.0/24 ands_openshift_public_network: 192.168.226.0/24 ands_openshift_ingress_network: 192.168.216.0/24 +ands_openshift_external_network: [192.168.128.0/18] ands_inner_domain: "" ands_use_inner_lb: true diff --git a/group_vars/testing.yml b/group_vars/testing.yml index f7e04cf..af3ba4b 100644 --- a/group_vars/testing.yml +++ b/group_vars/testing.yml @@ -7,6 +7,7 @@ ands_openshift_subdomain: kaas.kit.edu ands_openshift_network: 192.168.13.0/24 ands_openshift_public_network: 192.168.26.0/24 ands_openshift_ingress_network: 192.168.16.0/24 +ands_openshift_external_network: [192.168.128.0/18] #ands_inner_domain: "" ands_openshift_set_hostname: false diff --git a/setup/configs/openshift.yml b/setup/configs/openshift.yml index fc1743a..878d687 100644 --- a/setup/configs/openshift.yml +++ b/setup/configs/openshift.yml @@ -8,6 +8,7 @@ ands_openshift_projects: kaas: KaaS router and common resources katrin: KArlsruhe TRItium Neutrino status: KATRIN status display + epics: EPICS control system adei: ADEI adai: ADAI bora: Build Once Run Always @@ -24,6 +25,9 @@ ands_openshift_users: kopmann: { name: "Andreas Kopmann", email: "kopmann@kit.edu" } ntj: { name: "Nicholas Tan Jerome", email: "nicholas.jerome@kit.edu" } jonasteufel: { name: "Jonas Teufel", email: "jonseb1998@gmail.com" } + jalal: { name: "Jalal Mostafa", email: "jalal.mostapha@outlook.com" } + gil: { name: "Woosik Gil", email: "gil@kit.edu" } + ands_openshift_roles: cluster-admin: csa @@ -31,14 +35,15 @@ ands_openshift_roles: katrin/admin: katrin status/admin: katrin adei/admin: csa - adei/view: pdv, kopmann - adei/kaas-maintain: pdv, kopmann - adai/admin: csa, kopmann - bora/admin: csa, ntj, katrin, kopmann + adei/view: pdv, kopmann, jalal + adei/kaas-maintain: pdv, kopmann, jalal + adai/admin: csa, kopmann, jalal + bora/admin: csa, ntj, gil, jalal, katrin, kopmann + epics/admin: csa, jalal wave/admin: csa, ntj - services/admin: csa - web/admin: kopmann, jonasteufel - mon/admin: csa + services/admin: csa, jalal, katrin + web/admin: kopmann, jonasteufel, jalal + mon/admin: csa, jalal test/admin: csa, ntj, kopmann, katrin ands_repos: diff --git a/setup/configs/secrets.yml b/setup/configs/secrets.yml index 5005be0..0d04ece 100644 --- a/setup/configs/secrets.yml +++ b/setup/configs/secrets.yml @@ -1,10 +1,11 @@ $ANSIBLE_VAULT;1.1;AES256 -30383738386265633133306363326639656331333736303966633133333661383561373533303966 -6361396564626437656237663035616461656661316265610a306336373231616136393330616632 -39376265346133303332363235303635383239336463633234616261643161643639313732313536 -3264636131353136640a623238663037336261303063313564303665386561643030373064356536 -61633136393138656533336563346635656531376161376639656436343437326538366336643734 -35363464646131316366626234613737366633626166376339313832646239626265333637613261 -32353535356537323533373831396138326239643937623865613731373165393633626331623839 -66323039393136313431383264633731653965386261613336376263396531333862306562313337 -38346465613831613566353233346634373032663537353633643330363136343264 +65363962636236613634613837653134633264656465386635633664366365316666393163646130 +3566376130643337626562393361646565396465396661300a333031313732373061393036616366 +62633866616631316261373135666164356161303332613862643132333230376532356237393539 +3263663537613631350a633965366532333361613164346162626138336565303935393862633933 +32653639336436623563666438343230376563643730636635653239393034393663383333643032 +63363330356165666266623463666137393861643930366632373763613236633661643166633439 +30633638393864343430366538343933333039366565326231633233643263383832356361633439 +62666661643963346431643138663538313761316639623065333437386463313231626532323561 +66616531346566366663393632643533636236633464353162613965316634666535313530623337 +3435386536643263316131303736396238366630393037346664 diff --git a/setup/configs/security.yml b/setup/configs/security.yml index a35b33e..171fdf6 100644 --- a/setup/configs/security.yml +++ b/setup/configs/security.yml @@ -20,6 +20,7 @@ ands_openshift_gid_ranges: kaas: "4000/10" katrin: "5000/10" status: "5100/10" + epics: "5200/10" adei: "6000/10" adai: "6050/10" bora: "6100/10" diff --git a/setup/external_ip.txt b/setup/external_ip.txt new file mode 100644 index 0000000..e9e6826 --- /dev/null +++ b/setup/external_ip.txt @@ -0,0 +1,8 @@ +External networks, ranges 192.168.128.x - 192.168.191.x +================= + - 192.168.128.0/24 KaaS VMs (reserved, unused) + - 192.168.129.0/24 ADEI VMs (reserved, unused) + - 192.168.130.0/24 EPICS VMs (managed by Jalal) + + +
\ No newline at end of file diff --git a/setup/users/htpasswd b/setup/users/htpasswd index 8965caf..4386b9e 100644 --- a/setup/users/htpasswd +++ b/setup/users/htpasswd @@ -4,3 +4,5 @@ katrin:$apr1$94lAgTxt$LVOWdwye92nsZVqVT7VaG1 ntj:$apr1$G5/ThWdp$kFLsj/hO9jIYYP.Zab9kC/ kopmann:$apr1$jU8jCdPh$u7ZUBiT3gzxlf1xPJl6FI. jonasteufel:$apr1$2dsiiZ1p$Us/5i8DEt9fxeliGy7L6h/ +jalal:$apr1$hwKRrL2x$RbtSQbfZZqPuvHL9YhCKp. +gil:$apr1$p2khs49v$7poH4dUbTpCyhEO5JmgLx0 |