From 54629aecad0651167dd1a74a72c957886f888f5e Mon Sep 17 00:00:00 2001 From: Levi Woodard Date: Sun, 10 May 2026 13:37:17 -0600 Subject: [PATCH] Initial push to gitea --- .gitignore | 9 + CLAUDE.md | 343 +++++++++++++++++++++ Makefile | 52 ++++ go.mod | 3 + internal/audio/audio.go | 42 +++ internal/clip/clip.go | 204 +++++++++++++ internal/clip/clip_test.go | 37 +++ internal/output/clipboard.go | 30 ++ internal/output/spotify.go | 154 ++++++++++ internal/output/spotify_test.go | 67 +++++ internal/summarize/anthropic.go | 123 ++++++++ internal/summarize/claudecli.go | 49 +++ internal/summarize/summarize.go | 13 + internal/transcribe/segments.go | 49 +++ internal/transcribe/transcribe.go | 10 + internal/transcribe/whispercpp.go | 213 +++++++++++++ main.go | 400 +++++++++++++++++++++++++ prompts/church-service.md | 58 ++++ prompts/clip-selector.md | 45 +++ scripts/install.sh | 480 ++++++++++++++++++++++++++++++ 20 files changed, 2381 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Makefile create mode 100644 go.mod create mode 100644 internal/audio/audio.go create mode 100644 internal/clip/clip.go create mode 100644 internal/clip/clip_test.go create mode 100644 internal/output/clipboard.go create mode 100644 internal/output/spotify.go create mode 100644 internal/output/spotify_test.go create mode 100644 internal/summarize/anthropic.go create mode 100644 internal/summarize/claudecli.go create mode 100644 internal/summarize/summarize.go create mode 100644 internal/transcribe/segments.go create mode 100644 internal/transcribe/transcribe.go create mode 100644 internal/transcribe/whispercpp.go create mode 100644 main.go create mode 100644 prompts/church-service.md create mode 100644 prompts/clip-selector.md create mode 100755 scripts/install.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82f74de --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/publish +*.summary.md +*.spotify.html +*.transcript.txt +*.segments.json +*.16k.wav +*.clip.mp4 +*.clip.m4a +.env diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..445d555 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,343 @@ +# publish + +Go CLI that turns a local audio/video recording of a church service into: + +1. A markdown summary (`--summerize`) +2. A 60–90s social-media hook clip cut from the source (`--clip`) +3. (Future) A post to Spotify for Podcasters (`--post` — currently stubs out) + +The repo directory is still `summerize/` (historical), but the module and binary +are both `publish`. + +## Pipeline (one pass, shared by all modes) + +``` +input ──ffmpeg──► 16kHz mono WAV ──whisper.cpp -oj──► []Segment{Start,End,Text} + │ + ┌─────────────────┴─────────────────┐ + │ │ + PlainText(segs) FormatForLLM(segs) + │ │ + ▼ ▼ + Summarizer clip.Pick + (Anthropic API or (same Summarizer, + shelled-out claude CLI) different prompt → JSON) + │ │ + ▼ ▼ + markdown summary ffmpeg cut [start,end] + │ + └─► output.MarkdownToSpotifyHTML + (b/i/a/ul/ol/li/p subset + that Spotify show notes accept) +``` + +Whisper output is cached at `.segments.json`. Subsequent runs (different +modes, different prompt-clip params) skip whisper entirely. + +## Layout + +``` +main.go flat flagset, mode dispatch, orchestration +prompts/church-service.md default --summerize prompt (go:embed) +prompts/clip-selector.md default --clip prompt; templated with + {{MIN_SECONDS}} / {{MAX_SECONDS}} (go:embed) +internal/audio/audio.go ffmpeg → 16kHz mono PCM WAV +internal/transcribe/ + transcribe.go Transcriber interface, Segment type + segments.go Segment, PlainText, FormatForLLM, mm:ss helper + whispercpp.go shells out to whisper-cli with -oj; parses JSON +internal/summarize/ + summarize.go Summarizer interface + anthropic.go direct Messages API via net/http + (no SDK dep; reads ANTHROPIC_API_KEY) + claudecli.go `claude -p ` with transcript on stdin +internal/clip/ + clip.go Selection, Pick (LLM JSON parse), Extract (ffmpeg) + clip_test.go JSON object extraction edge cases +internal/output/ + spotify.go markdown → Spotify-safe HTML + spotify_test.go + clipboard.go wl-copy / xclip / pbcopy +Makefile build/install/link/doctor/uninstall targets +scripts/install.sh interactive setup (OS + GPU detect → deps, + whisper.cpp build, model download, link) +``` + +**Zero external Go dependencies.** Stdlib only. + +## CLI surface + +``` +publish [mode...] [flags] + +modes (combine freely; defaults to --summerize): + --summerize write a markdown summary + --clip cut a 60-90s social hook clip + --post post to Spotify (not implemented yet) +``` + +Modes share whisper output, so `publish --summerize --clip sermon.mp4` only +transcribes once. + +### Shared flags + +| flag | purpose | default | +|---|---|---| +| `--summarizer` | `claude-cli` or `claude-api` | `claude-cli` | +| `--model` | model name (Anthropic API path defaults to `claude-sonnet-4-6`) | empty | +| `--prompt-summary` | override summary prompt path | bundled | +| `--prompt-clip` | override clip-selector prompt path | bundled | +| `--whisper-bin` | whisper.cpp binary; auto-detects best backend (see "Backend auto-detect" below) | auto | +| `--whisper-model` | path to ggml model | `~/.cache/whisper.cpp/ggml-base.en.bin` | +| `--whisper-lang` | force language code | auto-detect | +| `--whisper-threads` | thread count | library default | +| `--segments` | segments JSON cache path | `.segments.json` | +| `--keep-transcript` | also write `.transcript.txt` | off | +| `--keep-wav` | keep the normalized WAV instead of tempdir | off | +| `-v` | verbose progress to stderr | off | + +### --summerize flags + +| flag | purpose | default | +|---|---|---| +| `--prompt` | producer's notes (any pre-written framing, title, key points) that anchor the summary | empty | +| `--md PATH` | markdown output; `-` = stdout, `""` = disable | `.summary.md` | +| `--spotify PATH` | Spotify HTML output; `-` = stdout | disabled | +| `--copy` | copy Spotify HTML to clipboard | off | + +When `--prompt` is set, the value is prepended to the user message as a "Producer's notes" block above the transcript. The bundled prompt instructs the LLM to treat producer's notes as authoritative for titles, speaker names, framing, and key points, then use the transcript to expand and enrich them. Use this when the Spotify show notes you've already drafted should drive the summary's framing rather than the LLM inferring everything from scratch. + +For longer notes, use shell expansion: `--prompt "$(cat notes.md)"`. + +Note: `--prompt-summary` (system prompt template path) and `--prompt` (user notes content) are different flags. The former overrides the *system* prompt; the latter feeds *user content* into it. + +### --clip flags + +| flag | purpose | default | +|---|---|---| +| `--min` | minimum clip length (seconds) | 60 | +| `--max` | maximum clip length (seconds) | 90 | +| `--out PATH` | clip output path | `.clip` (`.clip.m4a` for audio) | +| `--copy-codec` | ffmpeg `-c copy` (fast, keyframe-aligned) — **skips the 9:16 portrait crop**, since stream copy can't apply video filters | off | +| `--dry-run` | print the picked window but don't run ffmpeg | off | + +Video clips are always re-encoded as **1080×1920 portrait (9:16)** with a center +crop, capped at **1 GiB** via ffmpeg's `-fs`. The crop filter is +`crop=min(iw,ih*9/16):min(ih,iw*16/9)` so any source aspect (16:9, 4:3, 1:1, or +already-portrait) yields the largest 9:16 sub-rectangle without distortion. See +`portraitFilter` and `MaxClipBytes` in `internal/clip/clip.go`. + +## Conventions / non-obvious choices + +- **Spelling: `summerize` is intentional.** It's the original name of the project + and the user's preferred spelling. Use `summerize` (e.g. for `--summerize`) + rather than auto-correcting to `summarize` in user-facing surfaces. Internal + Go package `internal/summarize` keeps the standard spelling. +- **Pluggable Summarizer is shared between modes.** `--clip` reuses the same + Summarizer interface; the only difference is the prompt and the expectation + of JSON output. If you add a new mode that talks to an LLM, plug it in there. +- **Summarizer.Summarize takes the user content verbatim.** No implicit + `"Transcript:"` prefix or other framing. Callers (`doSummerize` in main.go, + `clip.Pick`) build the full user message themselves — that's how + `--prompt` (producer's notes) prepends a "Producer's notes:" block above + the transcript without the message getting mislabeled. +- **Whisper output is the source of truth.** All text-only consumers go through + `transcribe.PlainText(segs)`; we don't run whisper twice. +- **JSON parsing for clip selection is defensive.** `clip.extractJSONObject` + walks balanced braces (skipping strings) so the model can wrap its answer in + prose despite the prompt asking for raw JSON. +- **Clip extraction defaults to re-encode.** Frame-accurate cuts matter for + short social hooks; `--copy-codec` trades that for speed. +- **Anthropic API call uses net/http directly.** Adding the SDK was tempting, + but the request is one POST and avoiding the dep keeps go.sum empty. +- **`prepareWAV` cleanup is owned by the caller.** It returns a `func()` you + must `defer`. Don't call `os.RemoveAll` on the wav path yourself. +- **No subcommands.** The CLI is one flat flagset. Modes are boolean flags so + multiple can run in one invocation and share state. + +## Build / install + +**Fresh machine (recommended)** — clone, then run the interactive installer. +It detects OS + GPU, builds whisper.cpp with the right backend, downloads a +ggml model, and links `publish` + `whisper-cli-` into `~/.local/bin`: + +```bash +git clone ~/Git\ Repos/summerize +cd ~/Git\ Repos/summerize +make install # interactive +make doctor # just print detected platform/GPU/dependencies +``` + +Re-runnable; each step is idempotent and skippable. The script supports Arch +(`pacman`), Debian/Ubuntu (`apt`), Fedora (`dnf`), and macOS (`brew`); for +unknown distros it prints the package list and skips the install command. + +**Already built once** — just rebuild: + +```bash +go build -o publish . +# or +make link # rebuilds + (re)points ~/.local/bin/publish at the repo +``` + +The symlink at `~/.local/bin/publish` is the canonical install location; +rebuilds update in place via the symlink. + +## External dependencies (runtime) + +| tool | required for | install | +|---|---|---| +| `ffmpeg` | always (audio extraction + clip cut) | `pacman -S ffmpeg` | +| `whisper-cli` (whisper.cpp) | transcription | `pacman -S whisper.cpp` for CPU; for GPU acceleration see "GPU builds" below | +| ggml whisper model | transcription | `curl -LO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin` into `~/.cache/whisper.cpp/` | +| `claude` CLI | `--summarizer claude-cli` (default) | already installed (Claude Code) | +| `ANTHROPIC_API_KEY` | `--summarizer claude-api` | env var | +| `wl-copy` / `xclip` / `pbcopy` | `--copy` flag (Spotify HTML to clipboard) | wl-copy ships with wayland on omarchy | + +## Backend auto-detect + +When `--whisper-bin` is not set, `resolveBin` in +`internal/transcribe/whispercpp.go` picks a backend at runtime: + +1. **CUDA** — if `~/.local/bin/whisper-cli-cuda` (or `whisper-cli-cuda` on PATH) exists *and* `nvidia-smi -L` exits 0. +2. **ROCm** — if `whisper-cli-rocm` exists *and* `rocminfo` exits 0. +3. **Vulkan** — if `whisper-cli-vulkan` exists *and* `vulkaninfo --summary` exits 0. +4. **CPU fallback** — first of `whisper-cli` / `whisper-cpp` / `main` on PATH. + +Each probe is gated on a 5s timeout. The chosen backend is logged on a +single stderr line (`whisper: using CUDA backend (/path)`); `-v` adds +diagnostics about which probes were skipped or failed. The convention is +one whisper.cpp checkout per host with a per-backend symlink in +`~/.local/bin/whisper-cli-`, so the same `publish` binary works +across machines without machine-specific flags. + +### CUDA build (RTX 3070 Ti / desktop) + +``` +sudo pacman -S --needed cuda # ~3GB; installs to /opt/cuda +git clone --depth=1 https://github.com/ggerganov/whisper.cpp ~/Git\ Repos/whisper.cpp +cd ~/Git\ Repos/whisper.cpp +PATH=/opt/cuda/bin:$PATH cmake -B build \ + -DGGML_CUDA=1 \ + -DCMAKE_CUDA_ARCHITECTURES=86 \ + -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-15 +PATH=/opt/cuda/bin:$PATH cmake --build build -j$(nproc) --config Release +ln -sf "$PWD/build/bin/whisper-cli" ~/.local/bin/whisper-cli-cuda +``` + +CUDA 13.2 caps the host compiler at GCC 15; system gcc is 16, so the +`-DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-15` line is required (the `gcc15` +package ships `g++-15` alongside the default toolchain). `sm_86` matches +the RTX 3070 Ti compute capability — adjust if the GPU changes. + +CUDA smoke test — these stderr lines should appear in any run: + +``` +whisper_init_with_params_no_state: use gpu = 1 +ggml_cuda_init: found 1 CUDA devices ... +whisper_backend_init_gpu: using CUDA0 backend +``` + +### ROCm build (Framework 16 / Radeon RX 7700S) + +The 7700S is RDNA3 (gfx1102). ROCm 6.x supports it. + +``` +sudo pacman -S --needed rocm-hip-sdk rocm-hip-runtime hipblas rocblas +git clone --depth=1 https://github.com/ggerganov/whisper.cpp ~/Git\ Repos/whisper.cpp +cd ~/Git\ Repos/whisper.cpp +HIPCXX=/opt/rocm/llvm/bin/clang++ cmake -B build-rocm \ + -DGGML_HIP=1 \ + -DAMDGPU_TARGETS=gfx1102 \ + -DCMAKE_BUILD_TYPE=Release +cmake --build build-rocm -j$(nproc) +ln -sf "$PWD/build-rocm/bin/whisper-cli" ~/.local/bin/whisper-cli-rocm +``` + +If ROCm doesn't recognize gfx1102 (older ROCm releases), set +`HSA_OVERRIDE_GFX_VERSION=11.0.0` in the shell before invoking `publish` +to spoof gfx1100 — same RDNA3 ISA, supported kernels. + +ROCm smoke test — look for `ggml_cuda_init` (HIP reuses the CUDA backend +naming in whisper.cpp) plus a ROCm device line on stderr. + +### Vulkan build (universal GPU fallback) + +Vulkan is the easiest cross-vendor path; uses any GPU with a working +Vulkan driver (Mesa RADV for AMD/Intel, Nvidia proprietary, etc.). + +``` +sudo pacman -S --needed vulkan-headers vulkan-icd-loader shaderc +cd ~/Git\ Repos/whisper.cpp +cmake -B build-vulkan -DGGML_VULKAN=1 -DCMAKE_BUILD_TYPE=Release +cmake --build build-vulkan -j$(nproc) +ln -sf "$PWD/build-vulkan/bin/whisper-cli" ~/.local/bin/whisper-cli-vulkan +``` + +Slower than native CUDA/ROCm but works on machines where the vendor +toolchain is too painful to install. Useful as a portable fallback for +laptops with iGPUs. + +### Metal build (Apple Silicon) + +`make install` handles this automatically; the manual recipe is short +because cmake on macOS picks up Metal by default — no special flag. + +Prerequisites: + +``` +xcode-select --install # one-time +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +brew install go cmake ffmpeg +``` + +Build: + +``` +git clone --depth=1 https://github.com/ggerganov/whisper.cpp ~/Git\ Repos/whisper.cpp +cd ~/Git\ Repos/whisper.cpp +cmake -B build-metal -DCMAKE_BUILD_TYPE=Release +cmake --build build-metal -j$(sysctl -n hw.ncpu) +ln -sf "$PWD/build-metal/bin/whisper-cli" ~/.local/bin/whisper-cli-metal +``` + +The resolver special-cases Darwin: if `whisper-cli-metal` exists it's +used immediately (no probe — Metal is always available on macOS). On a +Mac without that symlink, the CPU fallback finds `whisper-cli` / +`whisper-cpp` from brew (which is itself Metal-enabled by default), so a +plain `brew install whisper-cpp` is a workable lazy path. It just shows +"CPU backend" in the publish log line even though whisper.cpp is in fact +running Metal kernels. + +Metal smoke test — these stderr lines should appear in any run: + +``` +ggml_metal_init: allocating +ggml_metal_init: found device: Apple M1 ... +whisper_backend_init_gpu: using Metal backend +``` + +## Tests + +``` +go test ./... +``` + +Covered: +- `internal/output/spotify_test.go` — markdown→Spotify-HTML conversion, escaping +- `internal/clip/clip_test.go` — JSON object extraction, including prose-wrapped + and fence-wrapped model output + +There are no integration tests for whisper or the LLM calls — those depend on +external binaries and remote APIs. + +## Future work + +- **`--post`**: post the markdown summary as a Spotify for Podcasters episode + description. Requires the Spotify show-notes API or Spotify for Podcasters + upload integration. Reuse `output.MarkdownToSpotifyHTML` since their show- + notes editor accepts that subset. +- **Multi-clip output**: pick the top-N hooks instead of one. The current + `Selection` would become `[]Selection` and the prompt would request an array. +- **Faster `--summarizer` for short transcripts**: default to Haiku for very + short inputs to save on API costs. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..752bb09 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +# publish — Makefile +# +# Common targets: +# make - build the publish binary in the repo +# make install - interactive setup: detect OS/GPU, build whisper.cpp +# with the right backend, download a model, and link +# publish + whisper-cli- into ~/.local/bin +# make doctor - print detected platform/GPU/dependencies and exit +# make link - just link the existing publish binary into PREFIX/bin +# make uninstall - remove the publish symlink (leaves whisper.cpp alone) +# make clean - remove the local publish binary + +PREFIX ?= $(HOME)/.local +BINDIR := $(PREFIX)/bin + +.PHONY: all build link install doctor uninstall clean test help + +all: build + +build: + go build -o publish . + +link: build + @mkdir -p "$(BINDIR)" + @ln -sf "$(CURDIR)/publish" "$(BINDIR)/publish" + @echo "linked $(BINDIR)/publish -> $(CURDIR)/publish" + +install: + @bash scripts/install.sh + +doctor: + @bash scripts/install.sh --doctor + +uninstall: + @rm -f "$(BINDIR)/publish" + @echo "removed $(BINDIR)/publish (whisper.cpp checkout and whisper-cli-* symlinks left intact)" + +clean: + rm -f publish + +test: + go test ./... + +help: + @echo "Targets:" + @echo " make build build ./publish" + @echo " make link symlink ./publish into \$$PREFIX/bin (default ~/.local)" + @echo " make install interactive end-to-end setup (deps + whisper + model + publish)" + @echo " make doctor show detected platform/GPU/dependencies" + @echo " make uninstall remove the publish symlink" + @echo " make clean remove the built publish binary" + @echo " make test go test ./..." diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fe1ed1f --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module publish + +go 1.26.3 diff --git a/internal/audio/audio.go b/internal/audio/audio.go new file mode 100644 index 0000000..dc989c2 --- /dev/null +++ b/internal/audio/audio.go @@ -0,0 +1,42 @@ +// Package audio normalizes arbitrary audio/video inputs into a whisper.cpp-friendly +// 16 kHz mono PCM WAV file using ffmpeg. +package audio + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" +) + +// ExtractWAV runs ffmpeg to convert input (audio or video) into a 16kHz mono +// signed-16-bit PCM WAV file at outPath. ffmpeg must be on PATH. +func ExtractWAV(ctx context.Context, input, outPath string) error { + if _, err := exec.LookPath("ffmpeg"); err != nil { + return fmt.Errorf("ffmpeg not found on PATH: %w", err) + } + if _, err := os.Stat(input); err != nil { + return fmt.Errorf("input not readable: %w", err) + } + if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { + return err + } + + cmd := exec.CommandContext(ctx, "ffmpeg", + "-y", + "-loglevel", "error", + "-i", input, + "-vn", + "-ac", "1", + "-ar", "16000", + "-c:a", "pcm_s16le", + outPath, + ) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("ffmpeg: %w", err) + } + return nil +} diff --git a/internal/clip/clip.go b/internal/clip/clip.go new file mode 100644 index 0000000..850f6fc --- /dev/null +++ b/internal/clip/clip.go @@ -0,0 +1,204 @@ +// Package clip selects the best 60–90s window from a timestamped transcript +// (using a Summarizer to do the picking) and runs ffmpeg to cut that window +// out of the original media. +package clip + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "publish/internal/summarize" + "publish/internal/transcribe" +) + +// Selection is the LLM's chosen clip window plus metadata. +type Selection struct { + StartSeconds float64 `json:"start_seconds"` + EndSeconds float64 `json:"end_seconds"` + Title string `json:"title"` + Hook string `json:"hook"` + Quote string `json:"quote"` + Reasoning string `json:"reasoning"` +} + +// Duration returns the selected window length in seconds. +func (s Selection) Duration() float64 { return s.EndSeconds - s.StartSeconds } + +// Pick asks the summarizer to choose the best window in the given segments, +// using promptTemplate (which may contain {{MIN_SECONDS}} / {{MAX_SECONDS}} +// placeholders). It clamps and validates the returned window against minSec +// and maxSec. +func Pick(ctx context.Context, sum summarize.Summarizer, promptTemplate string, segs []transcribe.Segment, minSec, maxSec float64) (Selection, string, error) { + if len(segs) == 0 { + return Selection{}, "", fmt.Errorf("no transcript segments to choose from") + } + prompt := strings.NewReplacer( + "{{MIN_SECONDS}}", fmt.Sprintf("%g", minSec), + "{{MAX_SECONDS}}", fmt.Sprintf("%g", maxSec), + ).Replace(promptTemplate) + + body := transcribe.FormatForLLM(segs) + + raw, err := sum.Summarize(ctx, prompt, body) + if err != nil { + return Selection{}, "", err + } + + jsonText, err := extractJSONObject(raw) + if err != nil { + return Selection{}, raw, fmt.Errorf("could not find JSON object in model output: %w", err) + } + var sel Selection + if err := json.Unmarshal([]byte(jsonText), &sel); err != nil { + return Selection{}, raw, fmt.Errorf("parsing selection JSON: %w\n--- raw ---\n%s", err, jsonText) + } + + if err := validate(&sel, segs, minSec, maxSec); err != nil { + return sel, raw, err + } + return sel, raw, nil +} + +func validate(sel *Selection, segs []transcribe.Segment, minSec, maxSec float64) error { + if sel.EndSeconds <= sel.StartSeconds { + return fmt.Errorf("invalid window: end (%g) <= start (%g)", sel.EndSeconds, sel.StartSeconds) + } + maxEnd := segs[len(segs)-1].End + if sel.StartSeconds < 0 || sel.EndSeconds > maxEnd+1.0 { + return fmt.Errorf("window [%g, %g] is outside transcript bounds [0, %g]", + sel.StartSeconds, sel.EndSeconds, maxEnd) + } + dur := sel.Duration() + // Allow small slop on either side; otherwise reject. + if dur < minSec-2 || dur > maxSec+2 { + return fmt.Errorf("window duration %.1fs is outside requested bounds [%g, %g]", + dur, minSec, maxSec) + } + return nil +} + +// extractJSONObject pulls the first balanced {...} object out of s, ignoring +// braces that appear inside JSON strings. Useful when the model wraps its +// answer in prose despite being told not to. +func extractJSONObject(s string) (string, error) { + start := strings.Index(s, "{") + if start < 0 { + return "", fmt.Errorf("no '{' in response") + } + depth := 0 + inStr := false + esc := false + for i := start; i < len(s); i++ { + c := s[i] + if inStr { + switch { + case esc: + esc = false + case c == '\\': + esc = true + case c == '"': + inStr = false + } + continue + } + switch c { + case '"': + inStr = true + case '{': + depth++ + case '}': + depth-- + if depth == 0 { + return s[start : i+1], nil + } + } + } + return "", fmt.Errorf("unbalanced braces") +} + +// portraitFilter center-crops any source aspect ratio to a 9:16 sub-rectangle +// (no distortion, just cropping) and scales to 1080x1920. The min() expressions +// pick the largest 9:16 box that fits inside the source: 16:9 sources lose the +// left/right edges, 9:16 sources are unchanged, and 4:3 / 1:1 sources crop the +// sides. setsar=1 forces square pixels. +const portraitFilter = `crop=min(iw\,ih*9/16):min(ih\,iw*16/9),scale=1080:1920,setsar=1` + +// MaxClipBytes is the hard size ceiling enforced by ffmpeg's -fs flag. +// Realistic 60–90s 1080x1920 H.264 clips at CRF 23 land 30–100 MB, so this is +// a safety cap rather than a target. +const MaxClipBytes = 1 << 30 // 1 GiB + +// Extract runs ffmpeg to cut [start, end) seconds out of input into outPath. +// For video inputs, the clip is re-encoded as a 1080x1920 portrait (9:16 +// center-crop) under a 1 GiB size cap. If reencode is false, stream copy is +// used (fast, keyframe-aligned, but the source aspect ratio is preserved). +func Extract(ctx context.Context, input string, sel Selection, outPath string, reencode bool) error { + if _, err := exec.LookPath("ffmpeg"); err != nil { + return fmt.Errorf("ffmpeg not on PATH: %w", err) + } + if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { + return err + } + + dur := sel.EndSeconds - sel.StartSeconds + args := []string{ + "-y", + "-loglevel", "error", + "-ss", fmt.Sprintf("%.3f", sel.StartSeconds), + "-i", input, + "-t", fmt.Sprintf("%.3f", dur), + } + if reencode { + if hasVideoExt(input) { + args = append(args, + "-vf", portraitFilter, + "-c:v", "libx264", + "-preset", "fast", + "-crf", "23", + "-c:a", "aac", + "-b:a", "128k", + "-movflags", "+faststart", + ) + } else { + args = append(args, + "-vn", + "-c:a", "aac", + "-b:a", "128k", + ) + } + } else { + args = append(args, "-c", "copy") + } + args = append(args, "-fs", fmt.Sprintf("%d", MaxClipBytes), outPath) + + cmd := exec.CommandContext(ctx, "ffmpeg", args...) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("ffmpeg cut: %w", err) + } + return nil +} + +func hasVideoExt(p string) bool { + switch strings.ToLower(filepath.Ext(p)) { + case ".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v", ".flv", ".ts": + return true + } + return false +} + +// DefaultOutputPath builds .clip for video inputs and +// .m4a for audio inputs. +func DefaultOutputPath(input string) string { + base := strings.TrimSuffix(input, filepath.Ext(input)) + if hasVideoExt(input) { + return base + ".clip" + filepath.Ext(input) + } + return base + ".clip.m4a" +} diff --git a/internal/clip/clip_test.go b/internal/clip/clip_test.go new file mode 100644 index 0000000..c6a245e --- /dev/null +++ b/internal/clip/clip_test.go @@ -0,0 +1,37 @@ +package clip + +import "testing" + +func TestExtractJSONObject(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"raw json", `{"a":1}`, `{"a":1}`}, + {"with prose", "Sure, here you go:\n{\"a\":1}\nThanks", `{"a":1}`}, + {"with fence", "```json\n{\"a\":1}\n```", `{"a":1}`}, + {"nested", `prelude {"a":{"b":2},"c":3} trailing`, `{"a":{"b":2},"c":3}`}, + {"brace in string", `{"text":"hello {world}"}`, `{"text":"hello {world}"}`}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got, err := extractJSONObject(c.in) + if err != nil { + t.Fatalf("err: %v", err) + } + if got != c.want { + t.Errorf("got %q want %q", got, c.want) + } + }) + } +} + +func TestExtractJSONObjectMissing(t *testing.T) { + if _, err := extractJSONObject("no json here"); err == nil { + t.Error("expected error for missing JSON") + } + if _, err := extractJSONObject(`{"unterminated":`); err == nil { + t.Error("expected error for unbalanced braces") + } +} diff --git a/internal/output/clipboard.go b/internal/output/clipboard.go new file mode 100644 index 0000000..44c466c --- /dev/null +++ b/internal/output/clipboard.go @@ -0,0 +1,30 @@ +package output + +import ( + "fmt" + "os/exec" + "strings" +) + +// CopyToClipboard tries platform-appropriate clipboard tools and writes data +// to the first one available: wl-copy (Wayland), xclip (X11), pbcopy (macOS). +// Returns the tool name used or an error if none are available. +func CopyToClipboard(data string) (string, error) { + candidates := [][]string{ + {"wl-copy"}, + {"xclip", "-selection", "clipboard"}, + {"pbcopy"}, + } + for _, c := range candidates { + if _, err := exec.LookPath(c[0]); err != nil { + continue + } + cmd := exec.Command(c[0], c[1:]...) + cmd.Stdin = strings.NewReader(data) + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %w", c[0], err) + } + return c[0], nil + } + return "", fmt.Errorf("no clipboard tool found (tried wl-copy, xclip, pbcopy)") +} diff --git a/internal/output/spotify.go b/internal/output/spotify.go new file mode 100644 index 0000000..08333fe --- /dev/null +++ b/internal/output/spotify.go @@ -0,0 +1,154 @@ +// Package output renders summaries to user-visible formats. Markdown is +// passed through; Spotify HTML uses the small tag subset that Spotify for +// Podcasters' show-notes editor accepts (b, i, a, ul/ol/li, paragraphs). +package output + +import ( + "regexp" + "strings" +) + +var ( + reBoldStar = regexp.MustCompile(`\*\*([^*\n]+)\*\*`) + reBoldUnder = regexp.MustCompile(`__([^_\n]+)__`) + reItalicStar = regexp.MustCompile(`\*([^*\n]+)\*`) + reItalicUnder = regexp.MustCompile(`(^|[\s(])_([^_\n]+)_($|[\s).,!?;:])`) + reLink = regexp.MustCompile(`\[([^\]]+)\]\(([^)\s]+)\)`) + reInlineCode = regexp.MustCompile("`([^`\n]+)`") +) + +// MarkdownToSpotifyHTML converts a markdown summary into the limited HTML +// subset Spotify for Podcasters renders. Unknown markdown structures degrade +// to plain text rather than producing rejected tags. +func MarkdownToSpotifyHTML(md string) string { + lines := strings.Split(strings.ReplaceAll(md, "\r\n", "\n"), "\n") + + var out strings.Builder + listKind := "" // "ul" or "ol" while we're inside a list + flushList := func() { + if listKind != "" { + out.WriteString("\n") + listKind = "" + } + } + openList := func(kind string) { + if listKind != kind { + flushList() + out.WriteString("<" + kind + ">\n") + listKind = kind + } + } + + paragraph := []string{} + flushPara := func() { + if len(paragraph) == 0 { + return + } + text := strings.Join(paragraph, " ") + out.WriteString("

" + inline(text) + "

\n") + paragraph = paragraph[:0] + } + + for _, raw := range lines { + line := strings.TrimRight(raw, " \t") + trim := strings.TrimSpace(line) + + // Blank line: end current paragraph/list block. + if trim == "" { + flushPara() + flushList() + continue + } + + // Horizontal rule. + if trim == "---" || trim == "***" || trim == "___" { + flushPara() + flushList() + continue + } + + // Heading -> bold paragraph. + if h := headingText(trim); h != "" { + flushPara() + flushList() + out.WriteString("

" + inline(h) + "

\n") + continue + } + + // Blockquote -> italic paragraph. + if strings.HasPrefix(trim, "> ") { + flushPara() + flushList() + out.WriteString("

" + inline(strings.TrimPrefix(trim, "> ")) + "

\n") + continue + } + + // Unordered list item. + if strings.HasPrefix(trim, "- ") || strings.HasPrefix(trim, "* ") || strings.HasPrefix(trim, "+ ") { + flushPara() + openList("ul") + out.WriteString("
  • " + inline(trim[2:]) + "
  • \n") + continue + } + + // Ordered list item like "1. text". + if item, ok := orderedItem(trim); ok { + flushPara() + openList("ol") + out.WriteString("
  • " + inline(item) + "
  • \n") + continue + } + + // Anything else: append to current paragraph. + flushList() + paragraph = append(paragraph, trim) + } + + flushPara() + flushList() + + return strings.TrimRight(out.String(), "\n") +} + +func headingText(s string) string { + // Up to 6 leading '#' followed by a space. + hashes := 0 + for hashes < len(s) && s[hashes] == '#' { + hashes++ + } + if hashes == 0 || hashes > 6 || hashes >= len(s) || s[hashes] != ' ' { + return "" + } + return strings.TrimSpace(s[hashes+1:]) +} + +func orderedItem(s string) (string, bool) { + i := 0 + for i < len(s) && s[i] >= '0' && s[i] <= '9' { + i++ + } + if i == 0 || i+1 >= len(s) || s[i] != '.' || s[i+1] != ' ' { + return "", false + } + return strings.TrimSpace(s[i+2:]), true +} + +func inline(s string) string { + s = escapeHTML(s) + s = reInlineCode.ReplaceAllString(s, "$1") + s = reBoldStar.ReplaceAllString(s, "$1") + s = reBoldUnder.ReplaceAllString(s, "$1") + s = reItalicStar.ReplaceAllString(s, "$1") + s = reItalicUnder.ReplaceAllString(s, "$1$2$3") + s = reLink.ReplaceAllString(s, `$1`) + return s +} + +func escapeHTML(s string) string { + r := strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + ) + return r.Replace(s) +} diff --git a/internal/output/spotify_test.go b/internal/output/spotify_test.go new file mode 100644 index 0000000..67ec62b --- /dev/null +++ b/internal/output/spotify_test.go @@ -0,0 +1,67 @@ +package output + +import ( + "strings" + "testing" +) + +func TestMarkdownToSpotifyHTML(t *testing.T) { + in := `# Sermon Title + +**Speaker:** Pastor Bob +**Scripture:** John 3:16 + +## Overview +This was a *short* message about hope. See [the site](https://example.com). + +## Key Points +- First point +- Second point with **bold** text +- Third one + +1. Step one +2. Step two + +> A pithy quote. +` + + got := MarkdownToSpotifyHTML(in) + + mustContain := []string{ + "

    Sermon Title

    ", + "Speaker:", + "

    Overview

    ", + "short", + `the site`, + "
      ", + "
    • First point
    • ", + "
    • Second point with bold text
    • ", + "
    ", + "
      ", + "
    1. Step one
    2. ", + "
    ", + "

    A pithy quote.

    ", + } + for _, s := range mustContain { + if !strings.Contains(got, s) { + t.Errorf("expected output to contain %q\n--- got ---\n%s", s, got) + } + } + + mustNotContain := []string{"

    ", "

    ", "
    ", "**", "##"} + for _, s := range mustNotContain { + if strings.Contains(got, s) { + t.Errorf("did not expect output to contain %q\n--- got ---\n%s", s, got) + } + } +} + +func TestEscapesHTML(t *testing.T) { + got := MarkdownToSpotifyHTML("A & ampersand") + if strings.Contains(got, "