Fixing watchdog

This commit is contained in:
2026-05-10 13:35:16 -06:00
parent 8b6b4d582d
commit a51fd2beff

View File

@@ -106,14 +106,52 @@ prev=""
curr="$(current_addr)" curr="$(current_addr)"
# Always update the state file so the next run sees a fresh baseline. # Only update the state file when the device is present. If we overwrote with
# an empty string while the device was absent (e.g. mid-KVM-swap), the very
# next run would see prev="" and miss the address change on return.
if [[ -n "$curr" ]]; then
printf '%s' "$curr" > "$STATE_FILE" printf '%s' "$curr" > "$STATE_FILE"
fi
# No device present — nothing to do. Don't touch the service. # No device present — nothing to do. Don't touch the service.
if [[ -z "$curr" ]]; then if [[ -z "$curr" ]]; then
exit 0 exit 0
fi fi
# Linux-only: detect a stale hidraw fd held by the daemon. When the device
# unplugs, hidraw's open fd survives but its /dev node is removed; procfs
# marks the symlink "(deleted)". hid_read_timeout on this fd silently returns
# zero bytes, so the daemon's 3-error reconnect path never trips.
stale_fd_detected() {
[[ "$OS" != "Linux" ]] && return 1
local pid
pid="$(systemctl --user show -p MainPID --value streamdeck-go.service 2>/dev/null || true)"
[[ -z "$pid" || "$pid" == "0" ]] && return 1
[[ ! -d "/proc/$pid/fd" ]] && return 1
ls -la "/proc/$pid/fd/" 2>/dev/null | grep -qE 'hidraw[0-9]+ \(deleted\)'
}
# Linux-only: detect that the system resumed from suspend after the daemon
# started. On resume, the xhci controller may reset the deck's USB device
# in place (same bus address, same hidraw node, fd not deleted). The kernel
# reset leaves the existing fd's input queue dead — buttons no longer reach
# userspace — but no externally visible signal flags the failure. Restarting
# the daemon is cheap and reliably fixes it.
#
# Idempotent by construction: once we restart, the daemon's ActiveEnterTimestamp
# moves past the resume event, so this check stops firing until the next sleep.
resumed_since_start() {
[[ "$OS" != "Linux" ]] && return 1
local started
started="$(systemctl --user show -p ActiveEnterTimestamp --value streamdeck-go.service 2>/dev/null || true)"
[[ -z "$started" || "$started" == "n/a" ]] && return 1
local started_epoch
started_epoch="$(date -d "$started" +%s 2>/dev/null || true)"
[[ -z "$started_epoch" ]] && return 1
journalctl -k --since "@$started_epoch" --no-pager 2>/dev/null \
| grep -qE 'PM: suspend exit|PM: Finishing wakeup'
}
reason="" reason=""
if [[ -z "$prev" ]]; then if [[ -z "$prev" ]]; then
@@ -127,6 +165,10 @@ elif [[ "$curr" != "$prev" ]]; then
reason="device address changed: $prev$curr (likely unplug/replug)" reason="device address changed: $prev$curr (likely unplug/replug)"
elif ! service_active; then elif ! service_active; then
reason="device present at $curr but service is not active" reason="device present at $curr but service is not active"
elif stale_fd_detected; then
reason="daemon holds a deleted hidraw fd (post-unplug stale handle)"
elif resumed_since_start; then
reason="system resumed from suspend since daemon started (USB reset may have invalidated input queue)"
fi fi
if [[ -n "$reason" ]]; then if [[ -n "$reason" ]]; then