diff options
Diffstat (limited to 'scripts')
12 files changed, 476 insertions, 0 deletions
diff --git a/scripts/disaster/gluster_endpoints/add_endpoints.sh b/scripts/disaster/gluster_endpoints/add_endpoints.sh new file mode 100644 index 0000000..4badee9 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/add_endpoints.sh @@ -0,0 +1,17 @@ +[[ $# -ne 1 ]] && { echo "Usage: $0 <NEW_NODE_IP>"; exit 1; } + +NEW_IP="$1" + +oc get namespaces -o name | sed 's/namespaces\///' | \ +while read NS; do + if oc -n "$NS" get endpoints gfs &>/dev/null; then + echo "✓ Patching $NS/gfs with $NEW_IP" +# echo oc -n "$NS" patch endpoints gfs --type=strategic --patch="{\"subsets\":[{\"addresses\":[{\"ip\":\"$NEW_IP\"}]}]}" +# echo oc -n "$NS" patch ep gfs --type=strategic --patch='{"subsets":[{"addresses":[{"ip":"'"$NEW_IP"'"}]}]}' + oc -n "$NS" patch ep gfs --type=json -p='[{"op": "add", "path": "/subsets/0/addresses/-", "value": {"ip": "'"$NEW_IP"'"}}]' + else + echo "✗ No gfs endpoint in $NS (skipping)" + fi +done + +echo "Done. Verify: oc get ep gfs -A -o wide"
\ No newline at end of file diff --git a/scripts/disaster/gluster_endpoints/backups/ipekatrin1-edited.yaml b/scripts/disaster/gluster_endpoints/backups/ipekatrin1-edited.yaml new file mode 100644 index 0000000..6a8dc63 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/backups/ipekatrin1-edited.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +kind: Node +metadata: + annotations: + alpha.kubernetes.io/provided-node-ip: 192.168.13.1 + volumes.kubernetes.io/controller-managed-attach-detach: "true" + creationTimestamp: 2018-03-23T04:20:04Z + labels: + beta.kubernetes.io/arch: amd64 + beta.kubernetes.io/os: linux + compute_node: "0" + fat_memory: "0" + fqdn: ipekatrin1.ipe.kit.edu + gpu_node: "0" + hostid: "1" + hostname: ipekatrin1 + kubernetes.io/hostname: ipekatrin1.ipe.kit.edu + master: "1" + node-role.kubernetes.io/master: "true" + openshift-infra: apiserver + permanent: "1" + pod_node: "1" + production: "1" + region: infra + server: "1" + zone: default + name: ipekatrin1.ipe.kit.edu + resourceVersion: "1138908753" + selfLink: /api/v1/nodes/ipekatrin1.ipe.kit.edu + uid: 7616a958-2e51-11e8-969e-0cc47adef108 +spec: + externalID: ipekatrin1.ipe.kit.edu +status: + addresses: + - address: 192.168.13.1 + type: InternalIP + - address: ipekatrin1.ipe.kit.edu + type: Hostname + allocatable: + cpu: "40" + memory: 263757760Ki + pods: "250" + capacity: + cpu: "40" + memory: 263860160Ki + pods: "250" + conditions: + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: OutOfDisk + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: MemoryPressure + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: DiskPressure + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: Ready + daemonEndpoints: + kubeletEndpoint: + Port: 10250 + nodeInfo: + architecture: amd64 + bootID: a87a0b63-abf8-4b1d-9a1a-49197b26817e + containerRuntimeVersion: docker://1.12.6 + kernelVersion: 3.10.0-693.21.1.el7.x86_64 + kubeProxyVersion: v1.7.6+a08f5eeb62 + kubeletVersion: v1.7.6+a08f5eeb62 + machineID: 73b3f7f0088b44adb16582623d7747b1 + operatingSystem: linux + osImage: CentOS Linux 7 (Core) + systemUUID: 00000000-0000-0000-0000-0CC47ADEF108 diff --git a/scripts/disaster/gluster_endpoints/backups/ipekatrin1.yaml b/scripts/disaster/gluster_endpoints/backups/ipekatrin1.yaml new file mode 100644 index 0000000..5e45f12 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/backups/ipekatrin1.yaml @@ -0,0 +1,87 @@ +apiVersion: v1 +kind: Node +metadata: + annotations: + alpha.kubernetes.io/provided-node-ip: 192.168.13.1 + volumes.kubernetes.io/controller-managed-attach-detach: "true" + creationTimestamp: 2018-03-23T04:20:04Z + labels: + beta.kubernetes.io/arch: amd64 + beta.kubernetes.io/os: linux + compute_node: "0" + fat_memory: "0" + fat_storage: "1" + fqdn: ipekatrin1.ipe.kit.edu + glusterfs: storage-host + gpu_node: "0" + hostid: "1" + hostname: ipekatrin1 + kubernetes.io/hostname: ipekatrin1.ipe.kit.edu + master: "1" + node-role.kubernetes.io/master: "true" + openshift-infra: apiserver + permanent: "1" + pod_node: "1" + production: "1" + region: infra + server: "1" + zone: default + name: ipekatrin1.ipe.kit.edu + resourceVersion: "1137118496" + selfLink: /api/v1/nodes/ipekatrin1.ipe.kit.edu + uid: 7616a958-2e51-11e8-969e-0cc47adef108 +spec: + externalID: ipekatrin1.ipe.kit.edu +status: + addresses: + - address: 192.168.13.1 + type: InternalIP + - address: ipekatrin1.ipe.kit.edu + type: Hostname + allocatable: + cpu: "40" + memory: 263757760Ki + pods: "250" + capacity: + cpu: "40" + memory: 263860160Ki + pods: "250" + conditions: + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: OutOfDisk + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: MemoryPressure + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: DiskPressure + - lastHeartbeatTime: 2025-10-23T19:01:20Z + lastTransitionTime: 2025-10-23T19:02:02Z + message: Kubelet stopped posting node status. + reason: NodeStatusUnknown + status: Unknown + type: Ready + daemonEndpoints: + kubeletEndpoint: + Port: 10250 + nodeInfo: + architecture: amd64 + bootID: a87a0b63-abf8-4b1d-9a1a-49197b26817e + containerRuntimeVersion: docker://1.12.6 + kernelVersion: 3.10.0-693.21.1.el7.x86_64 + kubeProxyVersion: v1.7.6+a08f5eeb62 + kubeletVersion: v1.7.6+a08f5eeb62 + machineID: 73b3f7f0088b44adb16582623d7747b1 + operatingSystem: linux + osImage: CentOS Linux 7 (Core) + systemUUID: 00000000-0000-0000-0000-0CC47ADEF108 diff --git a/scripts/disaster/gluster_endpoints/backups/storageclasses_backup_2025-10-29.yaml b/scripts/disaster/gluster_endpoints/backups/storageclasses_backup_2025-10-29.yaml new file mode 100644 index 0000000..77e3452 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/backups/storageclasses_backup_2025-10-29.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +items: +- apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + creationTimestamp: 2018-03-23T04:24:52Z + name: glusterfs-storage + namespace: "" + resourceVersion: "6403" + selfLink: /apis/storage.k8s.io/v1/storageclasses/glusterfs-storage + uid: 219550a3-2e52-11e8-969e-0cc47adef108 + parameters: + resturl: http://heketi-storage.glusterfs.svc.cluster.local:8080 + restuser: admin + secretName: heketi-storage-admin-secret + secretNamespace: glusterfs + provisioner: kubernetes.io/glusterfs +- apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + creationTimestamp: 2018-03-23T04:25:31Z + name: glusterfs-storage-block + namespace: "" + resourceVersion: "6528" + selfLink: /apis/storage.k8s.io/v1/storageclasses/glusterfs-storage-block + uid: 38ff5088-2e52-11e8-969e-0cc47adef108 + parameters: + chapauthenabled: "true" + hacount: "3" + restsecretname: heketi-storage-admin-secret-block + restsecretnamespace: glusterfs + resturl: http://heketi-storage.glusterfs.svc.cluster.local:8080 + restuser: admin + provisioner: gluster.org/glusterblock +kind: List +metadata: + resourceVersion: "" + selfLink: "" diff --git a/scripts/disaster/gluster_endpoints/check_pv.sh b/scripts/disaster/gluster_endpoints/check_pv.sh new file mode 100644 index 0000000..1f2a7e4 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/check_pv.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +pvs=$(oc get pv -o json | jq -r ' + .items[] + | select(.spec.glusterfs?) + | select(.spec.glusterfs.endpoints != "gfs") + | "\(.metadata.name) → endpoints=\(.spec.glusterfs.endpoints // "NONE")"') + + +echo "PV usage:" +echo + +#pvs=$(oc get pv --no-headers | awk '{print $1}') + +for pv in $pvs; do + # Extract PVC and namespace bound to PV + pvc=$(oc get pv "$pv" -o jsonpath='{.spec.claimRef.name}' 2>/dev/null) + ns=$(oc get pv "$pv" -o jsonpath='{.spec.claimRef.namespace}' 2>/dev/null) + + if [[ -z "$pvc" || -z "$ns" ]]; then + echo "$pv → UNUSED" + echo + continue + fi + + echo "$pv → PVC: $ns/$pvc" + + # Grep instead of JSONPath filter — much safer + pods=$(oc get pods -n "$ns" -o name \ + | while read -r pod; do + oc get "$pod" -n "$ns" -o json \ + | jq -r --arg pvc "$pvc" ' + . as $pod | + .spec.volumes[]? + | select(.persistentVolumeClaim? and .persistentVolumeClaim.claimName == $pvc) + | $pod.metadata.name + ' 2>/dev/null + done \ + | sort -u + ) + + if [[ -z "$pods" ]]; then + echo " → PVC bound but no running Pod is using it" + else + echo " → Pods:" + echo "$pods" | sed 's/^/ - /' + fi + + echo +done diff --git a/scripts/disaster/gluster_endpoints/find_inline_gluster_in_pods.sh b/scripts/disaster/gluster_endpoints/find_inline_gluster_in_pods.sh new file mode 100644 index 0000000..e116fb7 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/find_inline_gluster_in_pods.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +for p in $(oc get pods --all-namespaces --no-headers | awk '{print $2":"$1}'); do + pod=${p%:*}; ns=${p#*:}; + echo "=== $ns/$pod ===" + oc -n "$ns" get pod "$pod" -o json | grep gluster +done diff --git a/scripts/disaster/gluster_endpoints/remove_endpoints.sh b/scripts/disaster/gluster_endpoints/remove_endpoints.sh new file mode 100644 index 0000000..f4623f6 --- /dev/null +++ b/scripts/disaster/gluster_endpoints/remove_endpoints.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +TARGET_IP="192.168.12.1" + +for ns in $(oc get ns --no-headers | awk '{print $1}'); do + for epname in gfs glusterfs-dynamic-etcd glusterfs-dynamic-metrics-cassandra-1 glusterfs-dynamic-mongodb glusterfs-dynamic-registry-claim glusterfs-dynamic-sharelatex-docker; do + ep=$(oc get endpoints "$epname" -n "$ns" -o json 2>/dev/null) || continue + + modified="$(printf '%s' "$ep" | jq \ + --arg ip "$TARGET_IP" \ + '(.subsets[]?.addresses |= map(select(.ip != $ip)))' + )" + + if diff <(echo "$ep") <(echo "$modified") >/dev/null; then + continue + fi + + echo -n "Namespace: $ns/$epname:" + echo -n "$ep" | jq '.subsets[].addresses' + echo -n " ===> " + echo -n "$modified" | jq '.subsets[].addresses' + echo + + # When verified, uncomment the following line to APPLY: + echo "$modified" | oc replace -f - -n "$ns" + done +done diff --git a/scripts/disaster/gluster_endpoints/remove_storageclasses.sh b/scripts/disaster/gluster_endpoints/remove_storageclasses.sh new file mode 100644 index 0000000..063650d --- /dev/null +++ b/scripts/disaster/gluster_endpoints/remove_storageclasses.sh @@ -0,0 +1,7 @@ +# Backups provided +oc delete sc glusterfs-storage +oc delete sc glusterfs-storage-block + +# It was a single replica +oc scale dc/glusterblock-storage-provisioner-dc -n glusterfs --replicas=0 +oc scale dc/heketi-storage -n glusterfs --replicas=0 diff --git a/scripts/disaster/walker.sh b/scripts/disaster/walker.sh new file mode 100644 index 0000000..0211105 --- /dev/null +++ b/scripts/disaster/walker.sh @@ -0,0 +1,73 @@ +#! /bin/bash + + +#find /mnt/provision/kaas/adei -type f -print0 | xargs -0 -I{} -n 1 sh -c ' dd if="$1" of=/dev/null bs=1M status=none || true; sleep .5' _ "{}" + +#find /mnt/ands/glusterfs/brick-provision/kaas/bora -type f -size 0 -print0 | \ +#while IFS= read -r -d '' f; do +# echo "Remvoing $f" +# setfattr -x trusted.glusterfs.mdata "$f" 2>/dev/null || true +# for a in $(getfattr -d -m trusted.afr -e hex "$f" 2>/dev/null | awk -F= '/trusted\.afr/{print $1}'); do +# setfattr -x "$a" "$f" 2>/dev/null || true +# done +#done + +#echo 3 | sudo tee /proc/sys/vm/drop_caches +#find /mnt/wave/ -type f -print0 | xargs -0 -I{} -n 1 -P 8 sh -c ' +# f="$1" +# dd if="$f" of=/dev/null bs=1M status=none || true; +# sz=$(stat -c%s "$f" 2>/dev/null || echo 0) +# echo "$f $sz" +# if [ "$sz" -eq 0 ]; then +# # give gluster a breath and try again, like you do manually +# sleep 0.5 +# dd if="$f" of=/dev/null bs=1M status=none 2>/dev/null || true +## sz=$(stat -c%s "$f" 2>/dev/null || echo 0) +# fi +# ' _ "{}" + +#find /mnt/datastore/services/gogs -type f -print0 | xargs -0 -n200 -P16 rm - +#find /mnt/datastore/services/gogs -depth -type d -empty -delete +#find /mnt/datastore/services/gogs/repositories -maxdepth 1 -mindepth 1 -type d -print0 | xargs -0 -I{} -n1 -P200 sh -c 'rm -rf "$1"' _ "{}" + + +#echo 3 | sudo tee /proc/sys/vm/drop_caches +#find /mnt/ands/glusterfs/brick-katrin_data -name .glusterfs -prune -o -type f -size 0 -print0 | xargs -0 -I{} -n 1 -P 8 sh -c ' +# fbrick="$1" +# brick_prefix="/mnt/ands/glusterfs/brick-katrin_data" +# mount_prefix="/mnt/katrin" +# fmount="${fbrick/#$brick_prefix/$mount_prefix}" +# dd if="$fmount" of=/dev/null bs=1M status=none || true; +# sz=$(stat -c%s "$fbrick" 2>/dev/null || echo 0) +# echo "$fmount $sz" +# if [ "$sz" -eq 0 ]; then +# # give gluster a breath and try again, like you do manually +# sleep 0.5 +# dd if="$fmount" of=/dev/null bs=1M status=none 2>/dev/null || true +## sz=$(stat -c%s "$fbrick" 2>/dev/null || echo 0) +# fi +# ' _ "{}" +# + +echo 3 | sudo tee /proc/sys/vm/drop_caches +find /mnt/ands/glusterfs/brick-katrin_data -name .glusterfs -prune -o -type f -print0 | xargs -0 -I{} -n 1 -P 8 sh -c ' + fbrick="$1" + mount_prefix="/mnt/katrin" + brick_prefix="/mnt/ands/glusterfs/brick-katrin_data" + fmount="${fbrick/#$brick_prefix/$mount_prefix}" + szbrick=$(stat -c%s "$fbrick" 2>/dev/null || echo 0) + szmount=$(stat -c%s "$fmount" 2>/dev/null || echo 0) + if [ $szbrick -ne $szmount ]; then + dd if="$fmount" of=/dev/null bs=1M status=none 2>/dev/null || true + sz=$(stat -c%s "$fbrick" 2>/dev/null || echo 0) + while [ $sz -ne $szmount ]; do + echo "* $fmount $szmount $szbrick => $sz" + sleep 1 + dd if="$fmount" of=/dev/null bs=1M status=none 2>/dev/null || true + sz=$(stat -c%s "$fbrick" 2>/dev/null || echo 0) + done + echo "$fmount $szmount $szbrick => $sz" + fi + ' _ "{}" + + diff --git a/scripts/maintain/gluster/bricks_move_heketi.sh b/scripts/maintain/gluster/bricks_move_heketi.sh new file mode 100644 index 0000000..36b8602 --- /dev/null +++ b/scripts/maintain/gluster/bricks_move_heketi.sh @@ -0,0 +1,39 @@ +HOST="192.168.12.1" +NEW_BASE="/mnt/ands/glusterfs/vg_ce3a7c1bb6da5c98ce4bb3e76aeacb8b" +GLUSTER_BIN="gluster" +DRYRUN=1 # set to 0 to actually run +GLUSTER_UID=107 # adjust if your gluster user has a different uid/gid + +# get all volumes like vol_<uid> +VOLS=$($GLUSTER_BIN volume list | grep '^vol_') + +for VOL in $VOLS; do + # find bricks on this host + # lines look like: "Brick2: 192.168.12.1:/var/lib/heketi/.../brick" + mapfile -t OLDBRICKS < <($GLUSTER_BIN volume info "$VOL" \ + | grep "$HOST:" \ + | awk '{print $2}') + + # skip volumes that don't have a brick on this host + if [ ${#OLDBRICKS[@]} -eq 0 ]; then + continue + fi + + for OLD in "${OLDBRICKS[@]}"; do + BRICKID=$(echo "$OLD" | sed -n 's#.*/\(brick_[^/]*\)/brick#\1#p') + if [ -z "$BRICKID" ]; then + echo "WARN: could not extract brick ID from $OLD" + continue + fi + + NEW="$HOST:$NEW_BASE/$BRICKID" + + echo "=== volume: $VOL ===" + echo "old brick: $OLD" + echo "new brick: $NEW" + + + $GLUSTER_BIN volume replace-brick "$VOL" "$OLD" "$NEW" commit force + + done +done diff --git a/scripts/maintain/gluster/bricks_populate.sh b/scripts/maintain/gluster/bricks_populate.sh new file mode 100644 index 0000000..15790a1 --- /dev/null +++ b/scripts/maintain/gluster/bricks_populate.sh @@ -0,0 +1,11 @@ +for brick in brick-*; do + [ -d $brick/.glusterfs ] && continue + name=${brick#brick-} + + echo "$name - $brick" + + setfattr -n trusted.gfid -v 0sAAAAAAAAAAAAAAAAAAAAAQ== /mnt/ands/glusterfs/$brick + setfattr -n trusted.glusterfs.volume-id -v 0x$(gluster volume info $name | grep 'Volume ID' | awk '{print $3}' | tr -d '-') /mnt/ands/glusterfs/$brick + mkdir -p /mnt/ands/glusterfs/$brick/.glusterfs/{indices,exports,xattrop,locks} + +done diff --git a/scripts/maintain/gluster/heal-walk.sh b/scripts/maintain/gluster/heal-walk.sh new file mode 100644 index 0000000..4c8d134 --- /dev/null +++ b/scripts/maintain/gluster/heal-walk.sh @@ -0,0 +1,35 @@ +#! /bin/bash + + +#find /mnt/provision/kaas/adei -type f -print0 | xargs -0 -I{} -n 1 sh -c ' dd if="$1" of=/dev/null bs=1M status=none || true; sleep .5' _ "{}" + +#find /mnt/ands/glusterfs/brick-provision/kaas/bora -type f -size 0 -print0 | \ +#while IFS= read -r -d '' f; do +# echo "Remvoing $f" +# setfattr -x trusted.glusterfs.mdata "$f" 2>/dev/null || true +# for a in $(getfattr -d -m trusted.afr -e hex "$f" 2>/dev/null | awk -F= '/trusted\.afr/{print $1}'); do +# setfattr -x "$a" "$f" 2>/dev/null || true +# done +#done + +#find /mnt/datastore/services/gogs -type f -print0 | xargs -0 -n200 -P16 rm - +#find /mnt/datastore/services/gogs -depth -type d -empty -delete +#find /mnt/datastore/services/gogs/repositories -maxdepth 1 -mindepth 1 -type d -print0 | xargs -0 -I{} -n1 -P200 sh -c 'rm -rf "$1"' _ "{}" + + +echo 3 | sudo tee /proc/sys/vm/drop_caches +find /mnt/wave/ -type f -print0 | xargs -0 -I{} -n 1 -P 8 sh -c ' + f="$1" + dd if="$f" of=/dev/null bs=1M status=none || true; + sz=$(stat -c%s "$f" 2>/dev/null || echo 0) + echo "$f $sz" + if [ "$sz" -eq 0 ]; then + # give gluster a breath and try again, like you do manually + sleep 0.5 + dd if="$f" of=/dev/null bs=1M status=none 2>/dev/null || true +# sz=$(stat -c%s "$f" 2>/dev/null || echo 0) + fi + ' _ "{}" + + +#find /mnt/wave/ -type f -print0 | xargs -0 -I{} -n 1 -P 8 sh -c 'echo $1; dd if="$1" of=/dev/null bs=1M status=none || true; sleep .5' _ {} |
