For anyone who is running the latest talos, I couldn't find a way to run
a script for watchdog in that cut down environment, so I've used a
privileged pod in kube-system ns. You can unbind the driver from the
device and then rebind, but I've found bouncing the if sufficient. I've
included that commented out in the ds yaml. If you run a hybrid cluster,
use node affinity with generic-device-plugin or just node labels to only
run pods on devices with macb drivers.
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: macb-watchdog
namespace: kube-system
spec:
selector:
matchLabels:
name: macb-watchdog
template:
metadata:
labels:
name: macb-watchdog
spec:
priorityClassName: system-node-critical
hostNetwork: true
containers:
- name: watchdog
env:
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: alpine:latest
command: ["/bin/sh", "-c"]
args:
- |
apk update && apk add --no-cache iproute2 iputils-ping
trap 'echo "Received SIGTERM, exiting..."; exit 0' 15
INTERFACE="end0"
DEVICE="1f00100000.ethernet"
DRV_PATH="/sys/bus/platform/drivers/macb"
pingList() {
for ip in 192.168.33.8 192.168.33.1 192.168.33.5 192.168.33.6
192.168.33.7
do
[[ "${NODE_IP}" == "${ip}" ]] && continue
ping -q -c1 -W.1 ${ip} &> /dev/null && return 0 # <--- Adjust
-W ping timeout if needed
echo "${ip} ping failed"
done
echo "All pings failed..."
return 1
}
while true
do
if ! pingList
then
echo "Attempting driver reset..."
# if [[ -e "${DRV_PATH}/${DEVICE}" ]]
# then
# echo "${DEVICE}" > "${DRV_PATH}/unbind"
# sleep 1
# echo "${DEVICE}" > "${DRV_PATH}/bind"
# sleep 1
# fi
# ip link set "${INTERFACE}" up
ip link set "$INTERFACE" down && ip link set "$INTERFACE" up
sleep 4
fi
sleep 1
done
securityContext:
privileged: true
volumeMounts:
- name: host-sys
mountPath: /sys
readOnly: false
volumes:
- name: host-sys
hostPath:
path: /sys
--
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/2133877
Title:
Complete network hang on Raspberry Pi 5 with kernel 6.17 under load -
possibly related to CPU frequency scaling
To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/linux-raspi/+bug/2133877/+subscriptions
--
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs