Files
Summerize/main.go
2026-05-10 13:37:17 -06:00

401 lines
12 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// publish — generate a markdown summary, a 6090s social hook clip, or both
// from a local audio/video file. Each mode is enabled by its own boolean flag.
package main
import (
"context"
_ "embed"
"encoding/json"
"flag"
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
"publish/internal/audio"
"publish/internal/clip"
"publish/internal/output"
"publish/internal/summarize"
"publish/internal/transcribe"
)
//go:embed prompts/church-service.md
var defaultSummaryPrompt string
//go:embed prompts/clip-selector.md
var defaultClipPrompt string
func main() {
if err := run(os.Args[1:]); err != nil {
fmt.Fprintln(os.Stderr, "publish: "+err.Error())
os.Exit(1)
}
}
type config struct {
input string
// mode selection
modeSummerize bool
modeClip bool
modePost bool
// shared
summarizer string
model string
promptSummary string
promptClip string
whisperBin string
whisperModel string
whisperLang string
whisperThreads int
segmentsCache string
keepWAV bool
keepTranscript bool
verbose bool
// --summerize inputs/outputs
prompt string
mdOut string
spotifyOut string
copyHTML bool
// --clip outputs
minSec float64
maxSec float64
clipOut string
copyCodec bool
dryRun bool
}
func run(args []string) error {
var cfg config
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
// Mode flags.
fs.BoolVar(&cfg.modeSummerize, "summerize", false, "produce a markdown summary (default if no mode is set)")
fs.BoolVar(&cfg.modeClip, "clip", false, "pick a 60-90s hook clip and cut it out of the source")
fs.BoolVar(&cfg.modePost, "post", false, "post the summary to Spotify (not implemented yet)")
// Shared flags.
fs.StringVar(&cfg.summarizer, "summarizer", "claude-cli", "LLM backend: claude-cli | claude-api")
fs.StringVar(&cfg.model, "model", "", "model name (claude-api default: claude-sonnet-4-6)")
fs.StringVar(&cfg.promptSummary, "prompt-summary", "", "summary prompt path; empty uses bundled prompts/church-service.md")
fs.StringVar(&cfg.promptClip, "prompt-clip", "", "clip-selection prompt path; empty uses bundled prompts/clip-selector.md")
fs.StringVar(&cfg.whisperBin, "whisper-bin", "", "whisper.cpp binary (auto-detect if empty)")
fs.StringVar(&cfg.whisperModel, "whisper-model", defaultWhisperModel(), "whisper.cpp ggml model path")
fs.StringVar(&cfg.whisperLang, "whisper-lang", "", "force whisper language code (empty = auto)")
fs.IntVar(&cfg.whisperThreads, "whisper-threads", 0, "whisper.cpp thread count (0 = library default)")
fs.StringVar(&cfg.segmentsCache, "segments", "", `path to read/write whisper segments JSON; default: <input>.segments.json`)
fs.BoolVar(&cfg.keepWAV, "keep-wav", false, "keep the normalized 16kHz WAV next to the input")
fs.BoolVar(&cfg.keepTranscript, "keep-transcript", false, "also write <input>.transcript.txt")
fs.BoolVar(&cfg.verbose, "v", false, "verbose progress output")
// --summerize inputs/outputs.
fs.StringVar(&cfg.prompt, "prompt", "", "[--summerize] producer's notes to anchor the summary (titles, framing, key points). For longer notes use shell expansion: --prompt \"$(cat notes.txt)\"")
fs.StringVar(&cfg.mdOut, "md", "", `[--summerize] markdown output; "-" for stdout, "" disables; default: <input>.summary.md`)
fs.StringVar(&cfg.spotifyOut, "spotify", "", `[--summerize] Spotify HTML output; "-" for stdout (default: disabled)`)
fs.BoolVar(&cfg.copyHTML, "copy", false, "[--summerize] copy Spotify HTML to clipboard")
// --clip outputs.
fs.Float64Var(&cfg.minSec, "min", 60, "[--clip] minimum clip length in seconds")
fs.Float64Var(&cfg.maxSec, "max", 90, "[--clip] maximum clip length in seconds")
fs.StringVar(&cfg.clipOut, "out", "", `[--clip] clip output path; default: <input>.clip<ext> (or .clip.m4a for audio)`)
fs.BoolVar(&cfg.copyCodec, "copy-codec", false, "[--clip] use ffmpeg stream copy instead of re-encoding (faster, keyframe-aligned)")
fs.BoolVar(&cfg.dryRun, "dry-run", false, "[--clip] pick the clip and print metadata, but skip the ffmpeg cut")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, `usage: publish [mode...] [flags] <input>
modes (combine freely; defaults to --summerize):
--summerize write a markdown summary
--clip cut a 60-90s social hook clip
--post post to Spotify (not implemented yet)
flags:
`)
fs.PrintDefaults()
}
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() != 1 {
fs.Usage()
return fmt.Errorf("exactly one input file is required")
}
cfg.input = fs.Arg(0)
// Default to --summerize if no mode flag was passed.
if !cfg.modeSummerize && !cfg.modeClip && !cfg.modePost {
cfg.modeSummerize = true
}
if cfg.modePost {
return fmt.Errorf("--post is not implemented yet")
}
// Output path defaults that depend on input.
if cfg.mdOut == "" {
cfg.mdOut = cfg.input + ".summary.md"
}
if cfg.mdOut == "-" && cfg.spotifyOut == "-" {
return fmt.Errorf("--md and --spotify cannot both be \"-\"")
}
if cfg.segmentsCache == "" {
cfg.segmentsCache = cfg.input + ".segments.json"
}
if cfg.clipOut == "" {
cfg.clipOut = clip.DefaultOutputPath(cfg.input)
}
if cfg.minSec <= 0 || cfg.maxSec <= 0 || cfg.maxSec < cfg.minSec {
return fmt.Errorf("invalid --min/--max bounds")
}
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer cancel()
segs, err := loadOrTranscribeSegments(ctx, cfg)
if err != nil {
return err
}
if cfg.keepTranscript {
if err := os.WriteFile(cfg.input+".transcript.txt", []byte(transcribe.PlainText(segs)), 0o644); err != nil {
return fmt.Errorf("writing transcript: %w", err)
}
}
sum, err := buildSummarizer(cfg.summarizer, cfg.model)
if err != nil {
return err
}
if cfg.modeSummerize {
if err := doSummerize(ctx, cfg, sum, segs); err != nil {
return err
}
}
if cfg.modeClip {
if err := doClip(ctx, cfg, sum, segs); err != nil {
return err
}
}
return nil
}
func doSummerize(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
systemPrompt, err := loadPrompt(cfg.promptSummary, defaultSummaryPrompt)
if err != nil {
return err
}
body := "Transcript:\n\n" + transcribe.PlainText(segs)
if notes := strings.TrimSpace(cfg.prompt); notes != "" {
body = "Producer's notes (treat these as authoritative for titles, framing, and key points; expand and enrich them using the transcript that follows):\n\n" +
notes + "\n\n---\n\n" + body
}
logIf(cfg.verbose, "summarizing with %s", sum.Name())
t0 := time.Now()
md, err := sum.Summarize(ctx, systemPrompt, body)
if err != nil {
return fmt.Errorf("summarize: %w", err)
}
md = strings.TrimSpace(md)
logIf(cfg.verbose, "summary ready (%d chars, %s)", len(md), time.Since(t0).Round(time.Second))
if err := writeOutput(cfg.mdOut, md); err != nil {
return fmt.Errorf("writing markdown: %w", err)
}
var html string
if cfg.spotifyOut != "" || cfg.copyHTML {
html = output.MarkdownToSpotifyHTML(md)
}
if cfg.spotifyOut != "" {
if err := writeOutput(cfg.spotifyOut, html); err != nil {
return fmt.Errorf("writing spotify HTML: %w", err)
}
}
if cfg.copyHTML {
tool, err := output.CopyToClipboard(html)
if err != nil {
return fmt.Errorf("clipboard: %w", err)
}
logIf(cfg.verbose, "Spotify HTML copied via %s", tool)
}
return nil
}
func doClip(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
prompt, err := loadPrompt(cfg.promptClip, defaultClipPrompt)
if err != nil {
return err
}
logIf(cfg.verbose, "selecting clip with %s (looking for %g-%gs window)", sum.Name(), cfg.minSec, cfg.maxSec)
t0 := time.Now()
sel, raw, err := clip.Pick(ctx, sum, prompt, segs, cfg.minSec, cfg.maxSec)
if err != nil {
if raw != "" {
fmt.Fprintf(os.Stderr, "model output:\n%s\n", raw)
}
return fmt.Errorf("selecting clip: %w", err)
}
logIf(cfg.verbose, "selection ready (%s)", time.Since(t0).Round(time.Second))
fmt.Printf("Title: %s\n", sel.Title)
fmt.Printf("Hook: %s\n", sel.Hook)
fmt.Printf("Quote: %s\n", sel.Quote)
fmt.Printf("Window: %s -> %s (%.1fs)\n", mmss(sel.StartSeconds), mmss(sel.EndSeconds), sel.Duration())
fmt.Printf("Reason: %s\n", sel.Reasoning)
if cfg.dryRun {
return nil
}
logIf(cfg.verbose, "cutting clip with ffmpeg -> %s", cfg.clipOut)
if err := clip.Extract(ctx, cfg.input, sel, cfg.clipOut, !cfg.copyCodec); err != nil {
return err
}
fmt.Printf("Wrote: %s\n", cfg.clipOut)
return nil
}
// loadOrTranscribeSegments reads cached whisper JSON if available; otherwise
// extracts audio, runs whisper, writes the cache, and returns segments.
func loadOrTranscribeSegments(ctx context.Context, cfg config) ([]transcribe.Segment, error) {
if data, err := os.ReadFile(cfg.segmentsCache); err == nil {
var segs []transcribe.Segment
if jerr := json.Unmarshal(data, &segs); jerr == nil && len(segs) > 0 {
logIf(cfg.verbose, "reusing cached segments from %s (%d segments)", cfg.segmentsCache, len(segs))
return segs, nil
}
}
wavPath, cleanup, err := prepareWAV(ctx, cfg.input, cfg.keepWAV, cfg.verbose)
if err != nil {
return nil, err
}
defer cleanup()
tr := buildTranscriber(cfg)
logIf(cfg.verbose, "transcribing with %s", tr.Name())
t0 := time.Now()
segs, err := tr.TranscribeSegments(ctx, wavPath)
if err != nil {
return nil, fmt.Errorf("transcribe: %w", err)
}
logIf(cfg.verbose, "transcript ready (%d segments, %s)", len(segs), time.Since(t0).Round(time.Second))
if data, err := json.Marshal(segs); err == nil {
_ = os.WriteFile(cfg.segmentsCache, data, 0o644)
logIf(cfg.verbose, "cached segments to %s", cfg.segmentsCache)
}
return segs, nil
}
// prepareWAV normalizes input to 16 kHz mono WAV. Returns the wav path and a
// cleanup function (no-op if keep is true).
func prepareWAV(ctx context.Context, input string, keep, verbose bool) (string, func(), error) {
wavPath := input + ".16k.wav"
cleanup := func() {}
if !keep {
tmpDir, err := os.MkdirTemp("", "publish-")
if err != nil {
return "", cleanup, err
}
wavPath = filepath.Join(tmpDir, "audio.wav")
cleanup = func() { _ = os.RemoveAll(tmpDir) }
}
logIf(verbose, "extracting audio -> %s", wavPath)
if err := audio.ExtractWAV(ctx, input, wavPath); err != nil {
cleanup()
return "", func() {}, fmt.Errorf("audio extraction: %w", err)
}
return wavPath, cleanup, nil
}
func loadPrompt(path, fallback string) (string, error) {
if path == "" {
return fallback, nil
}
b, err := os.ReadFile(expand(path))
if err != nil {
return "", fmt.Errorf("reading prompt %s: %w", path, err)
}
return string(b), nil
}
func buildTranscriber(cfg config) *transcribe.WhisperCPP {
return &transcribe.WhisperCPP{
Bin: cfg.whisperBin,
Model: expand(cfg.whisperModel),
Language: cfg.whisperLang,
Threads: cfg.whisperThreads,
Verbose: cfg.verbose,
}
}
func buildSummarizer(kind, model string) (summarize.Summarizer, error) {
switch kind {
case "claude-cli", "cli":
return &summarize.ClaudeCLI{Model: model}, nil
case "claude-api", "anthropic", "api":
return &summarize.Anthropic{Model: model}, nil
default:
return nil, fmt.Errorf("unknown summarizer %q", kind)
}
}
func writeOutput(path, data string) error {
if path == "" {
return nil
}
if path == "-" {
_, err := os.Stdout.WriteString(data + "\n")
return err
}
return os.WriteFile(expand(path), []byte(data+"\n"), 0o644)
}
func expand(p string) string {
if strings.HasPrefix(p, "~/") {
if home, err := os.UserHomeDir(); err == nil {
return filepath.Join(home, p[2:])
}
}
return p
}
func defaultWhisperModel() string {
home, err := os.UserHomeDir()
if err != nil {
return ""
}
return filepath.Join(home, ".cache", "whisper.cpp", "ggml-base.en.bin")
}
func logIf(on bool, format string, args ...any) {
if !on {
return
}
fmt.Fprintf(os.Stderr, "[publish] "+format+"\n", args...)
}
func mmss(seconds float64) string {
if seconds < 0 {
seconds = 0
}
total := int(seconds)
h := total / 3600
m := (total % 3600) / 60
s := total % 60
if h > 0 {
return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
}
return fmt.Sprintf("%02d:%02d", m, s)
}