diff --git a/systemd/streamdeck-go-watchdog.sh b/systemd/streamdeck-go-watchdog.sh index f98738f..b00924e 100644 --- a/systemd/streamdeck-go-watchdog.sh +++ b/systemd/streamdeck-go-watchdog.sh @@ -106,14 +106,52 @@ prev="" curr="$(current_addr)" -# Always update the state file so the next run sees a fresh baseline. -printf '%s' "$curr" > "$STATE_FILE" +# Only update the state file when the device is present. If we overwrote with +# an empty string while the device was absent (e.g. mid-KVM-swap), the very +# next run would see prev="" and miss the address change on return. +if [[ -n "$curr" ]]; then + printf '%s' "$curr" > "$STATE_FILE" +fi # No device present — nothing to do. Don't touch the service. if [[ -z "$curr" ]]; then exit 0 fi +# Linux-only: detect a stale hidraw fd held by the daemon. When the device +# unplugs, hidraw's open fd survives but its /dev node is removed; procfs +# marks the symlink "(deleted)". hid_read_timeout on this fd silently returns +# zero bytes, so the daemon's 3-error reconnect path never trips. +stale_fd_detected() { + [[ "$OS" != "Linux" ]] && return 1 + local pid + pid="$(systemctl --user show -p MainPID --value streamdeck-go.service 2>/dev/null || true)" + [[ -z "$pid" || "$pid" == "0" ]] && return 1 + [[ ! -d "/proc/$pid/fd" ]] && return 1 + ls -la "/proc/$pid/fd/" 2>/dev/null | grep -qE 'hidraw[0-9]+ \(deleted\)' +} + +# Linux-only: detect that the system resumed from suspend after the daemon +# started. On resume, the xhci controller may reset the deck's USB device +# in place (same bus address, same hidraw node, fd not deleted). The kernel +# reset leaves the existing fd's input queue dead — buttons no longer reach +# userspace — but no externally visible signal flags the failure. Restarting +# the daemon is cheap and reliably fixes it. +# +# Idempotent by construction: once we restart, the daemon's ActiveEnterTimestamp +# moves past the resume event, so this check stops firing until the next sleep. +resumed_since_start() { + [[ "$OS" != "Linux" ]] && return 1 + local started + started="$(systemctl --user show -p ActiveEnterTimestamp --value streamdeck-go.service 2>/dev/null || true)" + [[ -z "$started" || "$started" == "n/a" ]] && return 1 + local started_epoch + started_epoch="$(date -d "$started" +%s 2>/dev/null || true)" + [[ -z "$started_epoch" ]] && return 1 + journalctl -k --since "@$started_epoch" --no-pager 2>/dev/null \ + | grep -qE 'PM: suspend exit|PM: Finishing wakeup' +} + reason="" if [[ -z "$prev" ]]; then @@ -127,6 +165,10 @@ elif [[ "$curr" != "$prev" ]]; then reason="device address changed: $prev → $curr (likely unplug/replug)" elif ! service_active; then reason="device present at $curr but service is not active" +elif stale_fd_detected; then + reason="daemon holds a deleted hidraw fd (post-unplug stale handle)" +elif resumed_since_start; then + reason="system resumed from suspend since daemon started (USB reset may have invalidated input queue)" fi if [[ -n "$reason" ]]; then