// publish — generate a markdown summary, a 60–90s social hook clip, or both
// from a local audio/video file. Each mode is enabled by its own boolean flag.
package main
import (
"context"
_ "embed"
"encoding/json"
"flag"
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
"publish/internal/audio"
"publish/internal/clip"
"publish/internal/output"
"publish/internal/summarize"
"publish/internal/transcribe"
)
//go:embed prompts/church-service.md
var defaultSummaryPrompt string
//go:embed prompts/clip-selector.md
var defaultClipPrompt string
func main() {
if err := run(os.Args[1:]); err != nil {
fmt.Fprintln(os.Stderr, "publish: "+err.Error())
os.Exit(1)
}
}
type config struct {
input string
// mode selection
modeSummerize bool
modeClip bool
modePost bool
// shared
summarizer string
model string
promptSummary string
promptClip string
whisperBin string
whisperModel string
whisperLang string
whisperThreads int
segmentsCache string
keepWAV bool
keepTranscript bool
verbose bool
// --summerize inputs/outputs
prompt string
mdOut string
spotifyOut string
copyHTML bool
// --clip outputs
minSec float64
maxSec float64
clipOut string
copyCodec bool
dryRun bool
}
func run(args []string) error {
var cfg config
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
// Mode flags.
fs.BoolVar(&cfg.modeSummerize, "summerize", false, "produce a markdown summary (default if no mode is set)")
fs.BoolVar(&cfg.modeClip, "clip", false, "pick a 60-90s hook clip and cut it out of the source")
fs.BoolVar(&cfg.modePost, "post", false, "post the summary to Spotify (not implemented yet)")
// Shared flags.
fs.StringVar(&cfg.summarizer, "summarizer", "claude-cli", "LLM backend: claude-cli | claude-api")
fs.StringVar(&cfg.model, "model", "", "model name (claude-api default: claude-sonnet-4-6)")
fs.StringVar(&cfg.promptSummary, "prompt-summary", "", "summary prompt path; empty uses bundled prompts/church-service.md")
fs.StringVar(&cfg.promptClip, "prompt-clip", "", "clip-selection prompt path; empty uses bundled prompts/clip-selector.md")
fs.StringVar(&cfg.whisperBin, "whisper-bin", "", "whisper.cpp binary (auto-detect if empty)")
fs.StringVar(&cfg.whisperModel, "whisper-model", defaultWhisperModel(), "whisper.cpp ggml model path")
fs.StringVar(&cfg.whisperLang, "whisper-lang", "", "force whisper language code (empty = auto)")
fs.IntVar(&cfg.whisperThreads, "whisper-threads", 0, "whisper.cpp thread count (0 = library default)")
fs.StringVar(&cfg.segmentsCache, "segments", "", `path to read/write whisper segments JSON; default: .segments.json`)
fs.BoolVar(&cfg.keepWAV, "keep-wav", false, "keep the normalized 16kHz WAV next to the input")
fs.BoolVar(&cfg.keepTranscript, "keep-transcript", false, "also write .transcript.txt")
fs.BoolVar(&cfg.verbose, "v", false, "verbose progress output")
// --summerize inputs/outputs.
fs.StringVar(&cfg.prompt, "prompt", "", "[--summerize] producer's notes to anchor the summary (titles, framing, key points). For longer notes use shell expansion: --prompt \"$(cat notes.txt)\"")
fs.StringVar(&cfg.mdOut, "md", "", `[--summerize] markdown output; "-" for stdout, "" disables; default: .summary.md`)
fs.StringVar(&cfg.spotifyOut, "spotify", "", `[--summerize] Spotify HTML output; "-" for stdout (default: disabled)`)
fs.BoolVar(&cfg.copyHTML, "copy", false, "[--summerize] copy Spotify HTML to clipboard")
// --clip outputs.
fs.Float64Var(&cfg.minSec, "min", 60, "[--clip] minimum clip length in seconds")
fs.Float64Var(&cfg.maxSec, "max", 90, "[--clip] maximum clip length in seconds")
fs.StringVar(&cfg.clipOut, "out", "", `[--clip] clip output path; default: .clip (or .clip.m4a for audio)`)
fs.BoolVar(&cfg.copyCodec, "copy-codec", false, "[--clip] use ffmpeg stream copy instead of re-encoding (faster, keyframe-aligned)")
fs.BoolVar(&cfg.dryRun, "dry-run", false, "[--clip] pick the clip and print metadata, but skip the ffmpeg cut")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, `usage: publish [mode...] [flags]
modes (combine freely; defaults to --summerize):
--summerize write a markdown summary
--clip cut a 60-90s social hook clip
--post post to Spotify (not implemented yet)
flags:
`)
fs.PrintDefaults()
}
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() != 1 {
fs.Usage()
return fmt.Errorf("exactly one input file is required")
}
cfg.input = fs.Arg(0)
// Default to --summerize if no mode flag was passed.
if !cfg.modeSummerize && !cfg.modeClip && !cfg.modePost {
cfg.modeSummerize = true
}
if cfg.modePost {
return fmt.Errorf("--post is not implemented yet")
}
// Output path defaults that depend on input.
if cfg.mdOut == "" {
cfg.mdOut = cfg.input + ".summary.md"
}
if cfg.mdOut == "-" && cfg.spotifyOut == "-" {
return fmt.Errorf("--md and --spotify cannot both be \"-\"")
}
if cfg.segmentsCache == "" {
cfg.segmentsCache = cfg.input + ".segments.json"
}
if cfg.clipOut == "" {
cfg.clipOut = clip.DefaultOutputPath(cfg.input)
}
if cfg.minSec <= 0 || cfg.maxSec <= 0 || cfg.maxSec < cfg.minSec {
return fmt.Errorf("invalid --min/--max bounds")
}
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer cancel()
segs, err := loadOrTranscribeSegments(ctx, cfg)
if err != nil {
return err
}
if cfg.keepTranscript {
if err := os.WriteFile(cfg.input+".transcript.txt", []byte(transcribe.PlainText(segs)), 0o644); err != nil {
return fmt.Errorf("writing transcript: %w", err)
}
}
sum, err := buildSummarizer(cfg.summarizer, cfg.model)
if err != nil {
return err
}
if cfg.modeSummerize {
if err := doSummerize(ctx, cfg, sum, segs); err != nil {
return err
}
}
if cfg.modeClip {
if err := doClip(ctx, cfg, sum, segs); err != nil {
return err
}
}
return nil
}
func doSummerize(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
systemPrompt, err := loadPrompt(cfg.promptSummary, defaultSummaryPrompt)
if err != nil {
return err
}
body := "Transcript:\n\n" + transcribe.PlainText(segs)
if notes := strings.TrimSpace(cfg.prompt); notes != "" {
body = "Producer's notes (treat these as authoritative for titles, framing, and key points; expand and enrich them using the transcript that follows):\n\n" +
notes + "\n\n---\n\n" + body
}
logIf(cfg.verbose, "summarizing with %s", sum.Name())
t0 := time.Now()
md, err := sum.Summarize(ctx, systemPrompt, body)
if err != nil {
return fmt.Errorf("summarize: %w", err)
}
md = strings.TrimSpace(md)
logIf(cfg.verbose, "summary ready (%d chars, %s)", len(md), time.Since(t0).Round(time.Second))
if err := writeOutput(cfg.mdOut, md); err != nil {
return fmt.Errorf("writing markdown: %w", err)
}
var html string
if cfg.spotifyOut != "" || cfg.copyHTML {
html = output.MarkdownToSpotifyHTML(md)
}
if cfg.spotifyOut != "" {
if err := writeOutput(cfg.spotifyOut, html); err != nil {
return fmt.Errorf("writing spotify HTML: %w", err)
}
}
if cfg.copyHTML {
tool, err := output.CopyToClipboard(html)
if err != nil {
return fmt.Errorf("clipboard: %w", err)
}
logIf(cfg.verbose, "Spotify HTML copied via %s", tool)
}
return nil
}
func doClip(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
prompt, err := loadPrompt(cfg.promptClip, defaultClipPrompt)
if err != nil {
return err
}
logIf(cfg.verbose, "selecting clip with %s (looking for %g-%gs window)", sum.Name(), cfg.minSec, cfg.maxSec)
t0 := time.Now()
sel, raw, err := clip.Pick(ctx, sum, prompt, segs, cfg.minSec, cfg.maxSec)
if err != nil {
if raw != "" {
fmt.Fprintf(os.Stderr, "model output:\n%s\n", raw)
}
return fmt.Errorf("selecting clip: %w", err)
}
logIf(cfg.verbose, "selection ready (%s)", time.Since(t0).Round(time.Second))
fmt.Printf("Title: %s\n", sel.Title)
fmt.Printf("Hook: %s\n", sel.Hook)
fmt.Printf("Quote: %s\n", sel.Quote)
fmt.Printf("Window: %s -> %s (%.1fs)\n", mmss(sel.StartSeconds), mmss(sel.EndSeconds), sel.Duration())
fmt.Printf("Reason: %s\n", sel.Reasoning)
if cfg.dryRun {
return nil
}
logIf(cfg.verbose, "cutting clip with ffmpeg -> %s", cfg.clipOut)
if err := clip.Extract(ctx, cfg.input, sel, cfg.clipOut, !cfg.copyCodec); err != nil {
return err
}
fmt.Printf("Wrote: %s\n", cfg.clipOut)
return nil
}
// loadOrTranscribeSegments reads cached whisper JSON if available; otherwise
// extracts audio, runs whisper, writes the cache, and returns segments.
func loadOrTranscribeSegments(ctx context.Context, cfg config) ([]transcribe.Segment, error) {
if data, err := os.ReadFile(cfg.segmentsCache); err == nil {
var segs []transcribe.Segment
if jerr := json.Unmarshal(data, &segs); jerr == nil && len(segs) > 0 {
logIf(cfg.verbose, "reusing cached segments from %s (%d segments)", cfg.segmentsCache, len(segs))
return segs, nil
}
}
wavPath, cleanup, err := prepareWAV(ctx, cfg.input, cfg.keepWAV, cfg.verbose)
if err != nil {
return nil, err
}
defer cleanup()
tr := buildTranscriber(cfg)
logIf(cfg.verbose, "transcribing with %s", tr.Name())
t0 := time.Now()
segs, err := tr.TranscribeSegments(ctx, wavPath)
if err != nil {
return nil, fmt.Errorf("transcribe: %w", err)
}
logIf(cfg.verbose, "transcript ready (%d segments, %s)", len(segs), time.Since(t0).Round(time.Second))
if data, err := json.Marshal(segs); err == nil {
_ = os.WriteFile(cfg.segmentsCache, data, 0o644)
logIf(cfg.verbose, "cached segments to %s", cfg.segmentsCache)
}
return segs, nil
}
// prepareWAV normalizes input to 16 kHz mono WAV. Returns the wav path and a
// cleanup function (no-op if keep is true).
func prepareWAV(ctx context.Context, input string, keep, verbose bool) (string, func(), error) {
wavPath := input + ".16k.wav"
cleanup := func() {}
if !keep {
tmpDir, err := os.MkdirTemp("", "publish-")
if err != nil {
return "", cleanup, err
}
wavPath = filepath.Join(tmpDir, "audio.wav")
cleanup = func() { _ = os.RemoveAll(tmpDir) }
}
logIf(verbose, "extracting audio -> %s", wavPath)
if err := audio.ExtractWAV(ctx, input, wavPath); err != nil {
cleanup()
return "", func() {}, fmt.Errorf("audio extraction: %w", err)
}
return wavPath, cleanup, nil
}
func loadPrompt(path, fallback string) (string, error) {
if path == "" {
return fallback, nil
}
b, err := os.ReadFile(expand(path))
if err != nil {
return "", fmt.Errorf("reading prompt %s: %w", path, err)
}
return string(b), nil
}
func buildTranscriber(cfg config) *transcribe.WhisperCPP {
return &transcribe.WhisperCPP{
Bin: cfg.whisperBin,
Model: expand(cfg.whisperModel),
Language: cfg.whisperLang,
Threads: cfg.whisperThreads,
Verbose: cfg.verbose,
}
}
func buildSummarizer(kind, model string) (summarize.Summarizer, error) {
switch kind {
case "claude-cli", "cli":
return &summarize.ClaudeCLI{Model: model}, nil
case "claude-api", "anthropic", "api":
return &summarize.Anthropic{Model: model}, nil
default:
return nil, fmt.Errorf("unknown summarizer %q", kind)
}
}
func writeOutput(path, data string) error {
if path == "" {
return nil
}
if path == "-" {
_, err := os.Stdout.WriteString(data + "\n")
return err
}
return os.WriteFile(expand(path), []byte(data+"\n"), 0o644)
}
func expand(p string) string {
if strings.HasPrefix(p, "~/") {
if home, err := os.UserHomeDir(); err == nil {
return filepath.Join(home, p[2:])
}
}
return p
}
func defaultWhisperModel() string {
home, err := os.UserHomeDir()
if err != nil {
return ""
}
return filepath.Join(home, ".cache", "whisper.cpp", "ggml-base.en.bin")
}
func logIf(on bool, format string, args ...any) {
if !on {
return
}
fmt.Fprintf(os.Stderr, "[publish] "+format+"\n", args...)
}
func mmss(seconds float64) string {
if seconds < 0 {
seconds = 0
}
total := int(seconds)
h := total / 3600
m := (total % 3600) / 60
s := total % 60
if h > 0 {
return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
}
return fmt.Sprintf("%02d:%02d", m, s)
}