// publish — generate a markdown summary, a 60–90s social hook clip, or both // from a local audio/video file. Each mode is enabled by its own boolean flag. package main import ( "context" _ "embed" "encoding/json" "flag" "fmt" "os" "os/signal" "path/filepath" "strings" "syscall" "time" "publish/internal/audio" "publish/internal/clip" "publish/internal/output" "publish/internal/summarize" "publish/internal/transcribe" ) //go:embed prompts/church-service.md var defaultSummaryPrompt string //go:embed prompts/clip-selector.md var defaultClipPrompt string func main() { if err := run(os.Args[1:]); err != nil { fmt.Fprintln(os.Stderr, "publish: "+err.Error()) os.Exit(1) } } type config struct { input string // mode selection modeSummerize bool modeClip bool modePost bool // shared summarizer string model string promptSummary string promptClip string whisperBin string whisperModel string whisperLang string whisperThreads int segmentsCache string keepWAV bool keepTranscript bool verbose bool // --summerize inputs/outputs prompt string mdOut string spotifyOut string copyHTML bool // --clip outputs minSec float64 maxSec float64 clipOut string copyCodec bool dryRun bool } func run(args []string) error { var cfg config fs := flag.NewFlagSet("publish", flag.ContinueOnError) // Mode flags. fs.BoolVar(&cfg.modeSummerize, "summerize", false, "produce a markdown summary (default if no mode is set)") fs.BoolVar(&cfg.modeClip, "clip", false, "pick a 60-90s hook clip and cut it out of the source") fs.BoolVar(&cfg.modePost, "post", false, "post the summary to Spotify (not implemented yet)") // Shared flags. fs.StringVar(&cfg.summarizer, "summarizer", "claude-cli", "LLM backend: claude-cli | claude-api") fs.StringVar(&cfg.model, "model", "", "model name (claude-api default: claude-sonnet-4-6)") fs.StringVar(&cfg.promptSummary, "prompt-summary", "", "summary prompt path; empty uses bundled prompts/church-service.md") fs.StringVar(&cfg.promptClip, "prompt-clip", "", "clip-selection prompt path; empty uses bundled prompts/clip-selector.md") fs.StringVar(&cfg.whisperBin, "whisper-bin", "", "whisper.cpp binary (auto-detect if empty)") fs.StringVar(&cfg.whisperModel, "whisper-model", defaultWhisperModel(), "whisper.cpp ggml model path") fs.StringVar(&cfg.whisperLang, "whisper-lang", "", "force whisper language code (empty = auto)") fs.IntVar(&cfg.whisperThreads, "whisper-threads", 0, "whisper.cpp thread count (0 = library default)") fs.StringVar(&cfg.segmentsCache, "segments", "", `path to read/write whisper segments JSON; default: .segments.json`) fs.BoolVar(&cfg.keepWAV, "keep-wav", false, "keep the normalized 16kHz WAV next to the input") fs.BoolVar(&cfg.keepTranscript, "keep-transcript", false, "also write .transcript.txt") fs.BoolVar(&cfg.verbose, "v", false, "verbose progress output") // --summerize inputs/outputs. fs.StringVar(&cfg.prompt, "prompt", "", "[--summerize] producer's notes to anchor the summary (titles, framing, key points). For longer notes use shell expansion: --prompt \"$(cat notes.txt)\"") fs.StringVar(&cfg.mdOut, "md", "", `[--summerize] markdown output; "-" for stdout, "" disables; default: .summary.md`) fs.StringVar(&cfg.spotifyOut, "spotify", "", `[--summerize] Spotify HTML output; "-" for stdout (default: disabled)`) fs.BoolVar(&cfg.copyHTML, "copy", false, "[--summerize] copy Spotify HTML to clipboard") // --clip outputs. fs.Float64Var(&cfg.minSec, "min", 60, "[--clip] minimum clip length in seconds") fs.Float64Var(&cfg.maxSec, "max", 90, "[--clip] maximum clip length in seconds") fs.StringVar(&cfg.clipOut, "out", "", `[--clip] clip output path; default: .clip (or .clip.m4a for audio)`) fs.BoolVar(&cfg.copyCodec, "copy-codec", false, "[--clip] use ffmpeg stream copy instead of re-encoding (faster, keyframe-aligned)") fs.BoolVar(&cfg.dryRun, "dry-run", false, "[--clip] pick the clip and print metadata, but skip the ffmpeg cut") fs.Usage = func() { fmt.Fprintf(os.Stderr, `usage: publish [mode...] [flags] modes (combine freely; defaults to --summerize): --summerize write a markdown summary --clip cut a 60-90s social hook clip --post post to Spotify (not implemented yet) flags: `) fs.PrintDefaults() } if err := fs.Parse(args); err != nil { return err } if fs.NArg() != 1 { fs.Usage() return fmt.Errorf("exactly one input file is required") } cfg.input = fs.Arg(0) // Default to --summerize if no mode flag was passed. if !cfg.modeSummerize && !cfg.modeClip && !cfg.modePost { cfg.modeSummerize = true } if cfg.modePost { return fmt.Errorf("--post is not implemented yet") } // Output path defaults that depend on input. if cfg.mdOut == "" { cfg.mdOut = cfg.input + ".summary.md" } if cfg.mdOut == "-" && cfg.spotifyOut == "-" { return fmt.Errorf("--md and --spotify cannot both be \"-\"") } if cfg.segmentsCache == "" { cfg.segmentsCache = cfg.input + ".segments.json" } if cfg.clipOut == "" { cfg.clipOut = clip.DefaultOutputPath(cfg.input) } if cfg.minSec <= 0 || cfg.maxSec <= 0 || cfg.maxSec < cfg.minSec { return fmt.Errorf("invalid --min/--max bounds") } ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer cancel() segs, err := loadOrTranscribeSegments(ctx, cfg) if err != nil { return err } if cfg.keepTranscript { if err := os.WriteFile(cfg.input+".transcript.txt", []byte(transcribe.PlainText(segs)), 0o644); err != nil { return fmt.Errorf("writing transcript: %w", err) } } sum, err := buildSummarizer(cfg.summarizer, cfg.model) if err != nil { return err } if cfg.modeSummerize { if err := doSummerize(ctx, cfg, sum, segs); err != nil { return err } } if cfg.modeClip { if err := doClip(ctx, cfg, sum, segs); err != nil { return err } } return nil } func doSummerize(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error { systemPrompt, err := loadPrompt(cfg.promptSummary, defaultSummaryPrompt) if err != nil { return err } body := "Transcript:\n\n" + transcribe.PlainText(segs) if notes := strings.TrimSpace(cfg.prompt); notes != "" { body = "Producer's notes (treat these as authoritative for titles, framing, and key points; expand and enrich them using the transcript that follows):\n\n" + notes + "\n\n---\n\n" + body } logIf(cfg.verbose, "summarizing with %s", sum.Name()) t0 := time.Now() md, err := sum.Summarize(ctx, systemPrompt, body) if err != nil { return fmt.Errorf("summarize: %w", err) } md = strings.TrimSpace(md) logIf(cfg.verbose, "summary ready (%d chars, %s)", len(md), time.Since(t0).Round(time.Second)) if err := writeOutput(cfg.mdOut, md); err != nil { return fmt.Errorf("writing markdown: %w", err) } var html string if cfg.spotifyOut != "" || cfg.copyHTML { html = output.MarkdownToSpotifyHTML(md) } if cfg.spotifyOut != "" { if err := writeOutput(cfg.spotifyOut, html); err != nil { return fmt.Errorf("writing spotify HTML: %w", err) } } if cfg.copyHTML { tool, err := output.CopyToClipboard(html) if err != nil { return fmt.Errorf("clipboard: %w", err) } logIf(cfg.verbose, "Spotify HTML copied via %s", tool) } return nil } func doClip(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error { prompt, err := loadPrompt(cfg.promptClip, defaultClipPrompt) if err != nil { return err } logIf(cfg.verbose, "selecting clip with %s (looking for %g-%gs window)", sum.Name(), cfg.minSec, cfg.maxSec) t0 := time.Now() sel, raw, err := clip.Pick(ctx, sum, prompt, segs, cfg.minSec, cfg.maxSec) if err != nil { if raw != "" { fmt.Fprintf(os.Stderr, "model output:\n%s\n", raw) } return fmt.Errorf("selecting clip: %w", err) } logIf(cfg.verbose, "selection ready (%s)", time.Since(t0).Round(time.Second)) fmt.Printf("Title: %s\n", sel.Title) fmt.Printf("Hook: %s\n", sel.Hook) fmt.Printf("Quote: %s\n", sel.Quote) fmt.Printf("Window: %s -> %s (%.1fs)\n", mmss(sel.StartSeconds), mmss(sel.EndSeconds), sel.Duration()) fmt.Printf("Reason: %s\n", sel.Reasoning) if cfg.dryRun { return nil } logIf(cfg.verbose, "cutting clip with ffmpeg -> %s", cfg.clipOut) if err := clip.Extract(ctx, cfg.input, sel, cfg.clipOut, !cfg.copyCodec); err != nil { return err } fmt.Printf("Wrote: %s\n", cfg.clipOut) return nil } // loadOrTranscribeSegments reads cached whisper JSON if available; otherwise // extracts audio, runs whisper, writes the cache, and returns segments. func loadOrTranscribeSegments(ctx context.Context, cfg config) ([]transcribe.Segment, error) { if data, err := os.ReadFile(cfg.segmentsCache); err == nil { var segs []transcribe.Segment if jerr := json.Unmarshal(data, &segs); jerr == nil && len(segs) > 0 { logIf(cfg.verbose, "reusing cached segments from %s (%d segments)", cfg.segmentsCache, len(segs)) return segs, nil } } wavPath, cleanup, err := prepareWAV(ctx, cfg.input, cfg.keepWAV, cfg.verbose) if err != nil { return nil, err } defer cleanup() tr := buildTranscriber(cfg) logIf(cfg.verbose, "transcribing with %s", tr.Name()) t0 := time.Now() segs, err := tr.TranscribeSegments(ctx, wavPath) if err != nil { return nil, fmt.Errorf("transcribe: %w", err) } logIf(cfg.verbose, "transcript ready (%d segments, %s)", len(segs), time.Since(t0).Round(time.Second)) if data, err := json.Marshal(segs); err == nil { _ = os.WriteFile(cfg.segmentsCache, data, 0o644) logIf(cfg.verbose, "cached segments to %s", cfg.segmentsCache) } return segs, nil } // prepareWAV normalizes input to 16 kHz mono WAV. Returns the wav path and a // cleanup function (no-op if keep is true). func prepareWAV(ctx context.Context, input string, keep, verbose bool) (string, func(), error) { wavPath := input + ".16k.wav" cleanup := func() {} if !keep { tmpDir, err := os.MkdirTemp("", "publish-") if err != nil { return "", cleanup, err } wavPath = filepath.Join(tmpDir, "audio.wav") cleanup = func() { _ = os.RemoveAll(tmpDir) } } logIf(verbose, "extracting audio -> %s", wavPath) if err := audio.ExtractWAV(ctx, input, wavPath); err != nil { cleanup() return "", func() {}, fmt.Errorf("audio extraction: %w", err) } return wavPath, cleanup, nil } func loadPrompt(path, fallback string) (string, error) { if path == "" { return fallback, nil } b, err := os.ReadFile(expand(path)) if err != nil { return "", fmt.Errorf("reading prompt %s: %w", path, err) } return string(b), nil } func buildTranscriber(cfg config) *transcribe.WhisperCPP { return &transcribe.WhisperCPP{ Bin: cfg.whisperBin, Model: expand(cfg.whisperModel), Language: cfg.whisperLang, Threads: cfg.whisperThreads, Verbose: cfg.verbose, } } func buildSummarizer(kind, model string) (summarize.Summarizer, error) { switch kind { case "claude-cli", "cli": return &summarize.ClaudeCLI{Model: model}, nil case "claude-api", "anthropic", "api": return &summarize.Anthropic{Model: model}, nil default: return nil, fmt.Errorf("unknown summarizer %q", kind) } } func writeOutput(path, data string) error { if path == "" { return nil } if path == "-" { _, err := os.Stdout.WriteString(data + "\n") return err } return os.WriteFile(expand(path), []byte(data+"\n"), 0o644) } func expand(p string) string { if strings.HasPrefix(p, "~/") { if home, err := os.UserHomeDir(); err == nil { return filepath.Join(home, p[2:]) } } return p } func defaultWhisperModel() string { home, err := os.UserHomeDir() if err != nil { return "" } return filepath.Join(home, ".cache", "whisper.cpp", "ggml-base.en.bin") } func logIf(on bool, format string, args ...any) { if !on { return } fmt.Fprintf(os.Stderr, "[publish] "+format+"\n", args...) } func mmss(seconds float64) string { if seconds < 0 { seconds = 0 } total := int(seconds) h := total / 3600 m := (total % 3600) / 60 s := total % 60 if h > 0 { return fmt.Sprintf("%02d:%02d:%02d", h, m, s) } return fmt.Sprintf("%02d:%02d", m, s) }