#!/usr/bin/env bash # streamdeck-go watchdog — runs every 30s via systemd timer (Linux) or launchd # StartInterval (macOS). # # Why this exists: when the Stream Deck is unplugged and replugged, the daemon's # in-process reconnect logic does not always notice. On Linux, hidraw can keep # returning read timeouts on the now-stale fd instead of surfacing an error, so # the "3 consecutive errors → reconnect" path never triggers, and the service # manager still reports the service as active even though the device is # unreachable. # # Strategy: track the device's transient USB address (Linux: bus:device, # macOS: Location ID). When it changes (unplug/replug) or the service is # inactive while a device is present, restart the service. set -euo pipefail # Stream Deck product IDs we support (see internal/device/streamdeck.go). PIDS_RE="00ba|006c|006d" OS="$(uname -s)" case "$OS" in Linux) STATE_DIR="${XDG_RUNTIME_DIR:-/tmp}" ;; Darwin) # No XDG_RUNTIME_DIR on macOS; use the user-private temp dir. STATE_DIR="${TMPDIR:-/tmp}" ;; *) echo "watchdog: unsupported OS: $OS" >&2 exit 1 ;; esac STATE_FILE="$STATE_DIR/streamdeck-go-watchdog.state" # Print a transient identifier for the first matching Stream Deck on the USB # bus, or empty if none is present. The identifier must change across # unplug/replug so we can detect it. current_addr() { case "$OS" in Linux) # "Bus 003 Device 052: ID 0fd9:00ba ..." → "003:052" lsusb 2>/dev/null | awk -v pids="$PIDS_RE" ' $0 ~ ("ID 0fd9:(" pids ")") { gsub(":", "", $4) print $2 ":" $4 exit } ' ;; Darwin) # system_profiler entry per device: # Stream Deck XL: # Product ID: 0x00ba # Vendor ID: 0x0fd9 (Elgato ...) # ... # Location ID: 0x14140000 / 5 # The trailing "/ N" is the bus address — it changes on replug. system_profiler SPUSBDataType 2>/dev/null | awk -v pids="$PIDS_RE" ' /^[[:space:]]*Product ID:/ { pid = $3 } /^[[:space:]]*Vendor ID:/ { vid = $3 } /^[[:space:]]*Location ID:/ { sub(/^[[:space:]]*Location ID:[[:space:]]*/, "") if (vid == "0x0fd9" && pid ~ ("^0x(" pids ")$")) { print exit } } ' ;; esac } # Is the streamdeck-go service currently active? service_active() { case "$OS" in Linux) systemctl --user is-active --quiet streamdeck-go.service ;; Darwin) # launchctl list prints "PID Status Label". A PID of "-" means # the agent is loaded but not running. local line line="$(launchctl list 2>/dev/null | awk '$3 == "com.woodarddigital.streamdeck-go" { print $1 }')" [[ -n "$line" && "$line" != "-" ]] ;; esac } restart_service() { case "$OS" in Linux) systemctl --user restart streamdeck-go.service ;; Darwin) # kickstart -k stops and restarts; works whether or not it's running. launchctl kickstart -k "gui/$(id -u)/com.woodarddigital.streamdeck-go" ;; esac } prev="" [[ -f "$STATE_FILE" ]] && prev="$(cat "$STATE_FILE" 2>/dev/null || true)" curr="$(current_addr)" # Only update the state file when the device is present. If we overwrote with # an empty string while the device was absent (e.g. mid-KVM-swap), the very # next run would see prev="" and miss the address change on return. if [[ -n "$curr" ]]; then printf '%s' "$curr" > "$STATE_FILE" fi # No device present — nothing to do. Don't touch the service. if [[ -z "$curr" ]]; then exit 0 fi # Linux-only: detect a stale hidraw fd held by the daemon. When the device # unplugs, hidraw's open fd survives but its /dev node is removed; procfs # marks the symlink "(deleted)". hid_read_timeout on this fd silently returns # zero bytes, so the daemon's 3-error reconnect path never trips. stale_fd_detected() { [[ "$OS" != "Linux" ]] && return 1 local pid pid="$(systemctl --user show -p MainPID --value streamdeck-go.service 2>/dev/null || true)" [[ -z "$pid" || "$pid" == "0" ]] && return 1 [[ ! -d "/proc/$pid/fd" ]] && return 1 ls -la "/proc/$pid/fd/" 2>/dev/null | grep -qE 'hidraw[0-9]+ \(deleted\)' } # Linux-only: detect that the system resumed from suspend after the daemon # started. On resume, the xhci controller may reset the deck's USB device # in place (same bus address, same hidraw node, fd not deleted). The kernel # reset leaves the existing fd's input queue dead — buttons no longer reach # userspace — but no externally visible signal flags the failure. Restarting # the daemon is cheap and reliably fixes it. # # Idempotent by construction: once we restart, the daemon's ActiveEnterTimestamp # moves past the resume event, so this check stops firing until the next sleep. resumed_since_start() { [[ "$OS" != "Linux" ]] && return 1 local started started="$(systemctl --user show -p ActiveEnterTimestamp --value streamdeck-go.service 2>/dev/null || true)" [[ -z "$started" || "$started" == "n/a" ]] && return 1 local started_epoch started_epoch="$(date -d "$started" +%s 2>/dev/null || true)" [[ -z "$started_epoch" ]] && return 1 journalctl -k --since "@$started_epoch" --no-pager 2>/dev/null \ | grep -qE 'PM: suspend exit|PM: Finishing wakeup' } reason="" if [[ -z "$prev" ]]; then # First observation (or state file was wiped). Only restart if the service # is also down — if it's already running, assume it's healthy and just # record the baseline. if ! service_active; then reason="device present at $curr but service is not active" fi elif [[ "$curr" != "$prev" ]]; then reason="device address changed: $prev → $curr (likely unplug/replug)" elif ! service_active; then reason="device present at $curr but service is not active" elif stale_fd_detected; then reason="daemon holds a deleted hidraw fd (post-unplug stale handle)" elif resumed_since_start; then reason="system resumed from suspend since daemon started (USB reset may have invalidated input queue)" fi if [[ -n "$reason" ]]; then echo "watchdog: $reason — restarting streamdeck-go" restart_service fi