401 lines
12 KiB
Go
401 lines
12 KiB
Go
// publish — generate a markdown summary, a 60–90s social hook clip, or both
|
||
// from a local audio/video file. Each mode is enabled by its own boolean flag.
|
||
package main
|
||
|
||
import (
|
||
"context"
|
||
_ "embed"
|
||
"encoding/json"
|
||
"flag"
|
||
"fmt"
|
||
"os"
|
||
"os/signal"
|
||
"path/filepath"
|
||
"strings"
|
||
"syscall"
|
||
"time"
|
||
|
||
"publish/internal/audio"
|
||
"publish/internal/clip"
|
||
"publish/internal/output"
|
||
"publish/internal/summarize"
|
||
"publish/internal/transcribe"
|
||
)
|
||
|
||
//go:embed prompts/church-service.md
|
||
var defaultSummaryPrompt string
|
||
|
||
//go:embed prompts/clip-selector.md
|
||
var defaultClipPrompt string
|
||
|
||
func main() {
|
||
if err := run(os.Args[1:]); err != nil {
|
||
fmt.Fprintln(os.Stderr, "publish: "+err.Error())
|
||
os.Exit(1)
|
||
}
|
||
}
|
||
|
||
type config struct {
|
||
input string
|
||
|
||
// mode selection
|
||
modeSummerize bool
|
||
modeClip bool
|
||
modePost bool
|
||
|
||
// shared
|
||
summarizer string
|
||
model string
|
||
promptSummary string
|
||
promptClip string
|
||
whisperBin string
|
||
whisperModel string
|
||
whisperLang string
|
||
whisperThreads int
|
||
segmentsCache string
|
||
keepWAV bool
|
||
keepTranscript bool
|
||
verbose bool
|
||
|
||
// --summerize inputs/outputs
|
||
prompt string
|
||
mdOut string
|
||
spotifyOut string
|
||
copyHTML bool
|
||
|
||
// --clip outputs
|
||
minSec float64
|
||
maxSec float64
|
||
clipOut string
|
||
copyCodec bool
|
||
dryRun bool
|
||
}
|
||
|
||
func run(args []string) error {
|
||
var cfg config
|
||
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
|
||
|
||
// Mode flags.
|
||
fs.BoolVar(&cfg.modeSummerize, "summerize", false, "produce a markdown summary (default if no mode is set)")
|
||
fs.BoolVar(&cfg.modeClip, "clip", false, "pick a 60-90s hook clip and cut it out of the source")
|
||
fs.BoolVar(&cfg.modePost, "post", false, "post the summary to Spotify (not implemented yet)")
|
||
|
||
// Shared flags.
|
||
fs.StringVar(&cfg.summarizer, "summarizer", "claude-cli", "LLM backend: claude-cli | claude-api")
|
||
fs.StringVar(&cfg.model, "model", "", "model name (claude-api default: claude-sonnet-4-6)")
|
||
fs.StringVar(&cfg.promptSummary, "prompt-summary", "", "summary prompt path; empty uses bundled prompts/church-service.md")
|
||
fs.StringVar(&cfg.promptClip, "prompt-clip", "", "clip-selection prompt path; empty uses bundled prompts/clip-selector.md")
|
||
fs.StringVar(&cfg.whisperBin, "whisper-bin", "", "whisper.cpp binary (auto-detect if empty)")
|
||
fs.StringVar(&cfg.whisperModel, "whisper-model", defaultWhisperModel(), "whisper.cpp ggml model path")
|
||
fs.StringVar(&cfg.whisperLang, "whisper-lang", "", "force whisper language code (empty = auto)")
|
||
fs.IntVar(&cfg.whisperThreads, "whisper-threads", 0, "whisper.cpp thread count (0 = library default)")
|
||
fs.StringVar(&cfg.segmentsCache, "segments", "", `path to read/write whisper segments JSON; default: <input>.segments.json`)
|
||
fs.BoolVar(&cfg.keepWAV, "keep-wav", false, "keep the normalized 16kHz WAV next to the input")
|
||
fs.BoolVar(&cfg.keepTranscript, "keep-transcript", false, "also write <input>.transcript.txt")
|
||
fs.BoolVar(&cfg.verbose, "v", false, "verbose progress output")
|
||
|
||
// --summerize inputs/outputs.
|
||
fs.StringVar(&cfg.prompt, "prompt", "", "[--summerize] producer's notes to anchor the summary (titles, framing, key points). For longer notes use shell expansion: --prompt \"$(cat notes.txt)\"")
|
||
fs.StringVar(&cfg.mdOut, "md", "", `[--summerize] markdown output; "-" for stdout, "" disables; default: <input>.summary.md`)
|
||
fs.StringVar(&cfg.spotifyOut, "spotify", "", `[--summerize] Spotify HTML output; "-" for stdout (default: disabled)`)
|
||
fs.BoolVar(&cfg.copyHTML, "copy", false, "[--summerize] copy Spotify HTML to clipboard")
|
||
|
||
// --clip outputs.
|
||
fs.Float64Var(&cfg.minSec, "min", 60, "[--clip] minimum clip length in seconds")
|
||
fs.Float64Var(&cfg.maxSec, "max", 90, "[--clip] maximum clip length in seconds")
|
||
fs.StringVar(&cfg.clipOut, "out", "", `[--clip] clip output path; default: <input>.clip<ext> (or .clip.m4a for audio)`)
|
||
fs.BoolVar(&cfg.copyCodec, "copy-codec", false, "[--clip] use ffmpeg stream copy instead of re-encoding (faster, keyframe-aligned)")
|
||
fs.BoolVar(&cfg.dryRun, "dry-run", false, "[--clip] pick the clip and print metadata, but skip the ffmpeg cut")
|
||
|
||
fs.Usage = func() {
|
||
fmt.Fprintf(os.Stderr, `usage: publish [mode...] [flags] <input>
|
||
|
||
modes (combine freely; defaults to --summerize):
|
||
--summerize write a markdown summary
|
||
--clip cut a 60-90s social hook clip
|
||
--post post to Spotify (not implemented yet)
|
||
|
||
flags:
|
||
`)
|
||
fs.PrintDefaults()
|
||
}
|
||
if err := fs.Parse(args); err != nil {
|
||
return err
|
||
}
|
||
if fs.NArg() != 1 {
|
||
fs.Usage()
|
||
return fmt.Errorf("exactly one input file is required")
|
||
}
|
||
cfg.input = fs.Arg(0)
|
||
|
||
// Default to --summerize if no mode flag was passed.
|
||
if !cfg.modeSummerize && !cfg.modeClip && !cfg.modePost {
|
||
cfg.modeSummerize = true
|
||
}
|
||
if cfg.modePost {
|
||
return fmt.Errorf("--post is not implemented yet")
|
||
}
|
||
|
||
// Output path defaults that depend on input.
|
||
if cfg.mdOut == "" {
|
||
cfg.mdOut = cfg.input + ".summary.md"
|
||
}
|
||
if cfg.mdOut == "-" && cfg.spotifyOut == "-" {
|
||
return fmt.Errorf("--md and --spotify cannot both be \"-\"")
|
||
}
|
||
if cfg.segmentsCache == "" {
|
||
cfg.segmentsCache = cfg.input + ".segments.json"
|
||
}
|
||
if cfg.clipOut == "" {
|
||
cfg.clipOut = clip.DefaultOutputPath(cfg.input)
|
||
}
|
||
if cfg.minSec <= 0 || cfg.maxSec <= 0 || cfg.maxSec < cfg.minSec {
|
||
return fmt.Errorf("invalid --min/--max bounds")
|
||
}
|
||
|
||
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||
defer cancel()
|
||
|
||
segs, err := loadOrTranscribeSegments(ctx, cfg)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if cfg.keepTranscript {
|
||
if err := os.WriteFile(cfg.input+".transcript.txt", []byte(transcribe.PlainText(segs)), 0o644); err != nil {
|
||
return fmt.Errorf("writing transcript: %w", err)
|
||
}
|
||
}
|
||
|
||
sum, err := buildSummarizer(cfg.summarizer, cfg.model)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if cfg.modeSummerize {
|
||
if err := doSummerize(ctx, cfg, sum, segs); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
if cfg.modeClip {
|
||
if err := doClip(ctx, cfg, sum, segs); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func doSummerize(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
|
||
systemPrompt, err := loadPrompt(cfg.promptSummary, defaultSummaryPrompt)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
body := "Transcript:\n\n" + transcribe.PlainText(segs)
|
||
if notes := strings.TrimSpace(cfg.prompt); notes != "" {
|
||
body = "Producer's notes (treat these as authoritative for titles, framing, and key points; expand and enrich them using the transcript that follows):\n\n" +
|
||
notes + "\n\n---\n\n" + body
|
||
}
|
||
|
||
logIf(cfg.verbose, "summarizing with %s", sum.Name())
|
||
t0 := time.Now()
|
||
md, err := sum.Summarize(ctx, systemPrompt, body)
|
||
if err != nil {
|
||
return fmt.Errorf("summarize: %w", err)
|
||
}
|
||
md = strings.TrimSpace(md)
|
||
logIf(cfg.verbose, "summary ready (%d chars, %s)", len(md), time.Since(t0).Round(time.Second))
|
||
|
||
if err := writeOutput(cfg.mdOut, md); err != nil {
|
||
return fmt.Errorf("writing markdown: %w", err)
|
||
}
|
||
|
||
var html string
|
||
if cfg.spotifyOut != "" || cfg.copyHTML {
|
||
html = output.MarkdownToSpotifyHTML(md)
|
||
}
|
||
if cfg.spotifyOut != "" {
|
||
if err := writeOutput(cfg.spotifyOut, html); err != nil {
|
||
return fmt.Errorf("writing spotify HTML: %w", err)
|
||
}
|
||
}
|
||
if cfg.copyHTML {
|
||
tool, err := output.CopyToClipboard(html)
|
||
if err != nil {
|
||
return fmt.Errorf("clipboard: %w", err)
|
||
}
|
||
logIf(cfg.verbose, "Spotify HTML copied via %s", tool)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func doClip(ctx context.Context, cfg config, sum summarize.Summarizer, segs []transcribe.Segment) error {
|
||
prompt, err := loadPrompt(cfg.promptClip, defaultClipPrompt)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
logIf(cfg.verbose, "selecting clip with %s (looking for %g-%gs window)", sum.Name(), cfg.minSec, cfg.maxSec)
|
||
t0 := time.Now()
|
||
sel, raw, err := clip.Pick(ctx, sum, prompt, segs, cfg.minSec, cfg.maxSec)
|
||
if err != nil {
|
||
if raw != "" {
|
||
fmt.Fprintf(os.Stderr, "model output:\n%s\n", raw)
|
||
}
|
||
return fmt.Errorf("selecting clip: %w", err)
|
||
}
|
||
logIf(cfg.verbose, "selection ready (%s)", time.Since(t0).Round(time.Second))
|
||
|
||
fmt.Printf("Title: %s\n", sel.Title)
|
||
fmt.Printf("Hook: %s\n", sel.Hook)
|
||
fmt.Printf("Quote: %s\n", sel.Quote)
|
||
fmt.Printf("Window: %s -> %s (%.1fs)\n", mmss(sel.StartSeconds), mmss(sel.EndSeconds), sel.Duration())
|
||
fmt.Printf("Reason: %s\n", sel.Reasoning)
|
||
|
||
if cfg.dryRun {
|
||
return nil
|
||
}
|
||
|
||
logIf(cfg.verbose, "cutting clip with ffmpeg -> %s", cfg.clipOut)
|
||
if err := clip.Extract(ctx, cfg.input, sel, cfg.clipOut, !cfg.copyCodec); err != nil {
|
||
return err
|
||
}
|
||
fmt.Printf("Wrote: %s\n", cfg.clipOut)
|
||
return nil
|
||
}
|
||
|
||
// loadOrTranscribeSegments reads cached whisper JSON if available; otherwise
|
||
// extracts audio, runs whisper, writes the cache, and returns segments.
|
||
func loadOrTranscribeSegments(ctx context.Context, cfg config) ([]transcribe.Segment, error) {
|
||
if data, err := os.ReadFile(cfg.segmentsCache); err == nil {
|
||
var segs []transcribe.Segment
|
||
if jerr := json.Unmarshal(data, &segs); jerr == nil && len(segs) > 0 {
|
||
logIf(cfg.verbose, "reusing cached segments from %s (%d segments)", cfg.segmentsCache, len(segs))
|
||
return segs, nil
|
||
}
|
||
}
|
||
|
||
wavPath, cleanup, err := prepareWAV(ctx, cfg.input, cfg.keepWAV, cfg.verbose)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer cleanup()
|
||
|
||
tr := buildTranscriber(cfg)
|
||
logIf(cfg.verbose, "transcribing with %s", tr.Name())
|
||
t0 := time.Now()
|
||
segs, err := tr.TranscribeSegments(ctx, wavPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("transcribe: %w", err)
|
||
}
|
||
logIf(cfg.verbose, "transcript ready (%d segments, %s)", len(segs), time.Since(t0).Round(time.Second))
|
||
|
||
if data, err := json.Marshal(segs); err == nil {
|
||
_ = os.WriteFile(cfg.segmentsCache, data, 0o644)
|
||
logIf(cfg.verbose, "cached segments to %s", cfg.segmentsCache)
|
||
}
|
||
return segs, nil
|
||
}
|
||
|
||
// prepareWAV normalizes input to 16 kHz mono WAV. Returns the wav path and a
|
||
// cleanup function (no-op if keep is true).
|
||
func prepareWAV(ctx context.Context, input string, keep, verbose bool) (string, func(), error) {
|
||
wavPath := input + ".16k.wav"
|
||
cleanup := func() {}
|
||
if !keep {
|
||
tmpDir, err := os.MkdirTemp("", "publish-")
|
||
if err != nil {
|
||
return "", cleanup, err
|
||
}
|
||
wavPath = filepath.Join(tmpDir, "audio.wav")
|
||
cleanup = func() { _ = os.RemoveAll(tmpDir) }
|
||
}
|
||
logIf(verbose, "extracting audio -> %s", wavPath)
|
||
if err := audio.ExtractWAV(ctx, input, wavPath); err != nil {
|
||
cleanup()
|
||
return "", func() {}, fmt.Errorf("audio extraction: %w", err)
|
||
}
|
||
return wavPath, cleanup, nil
|
||
}
|
||
|
||
func loadPrompt(path, fallback string) (string, error) {
|
||
if path == "" {
|
||
return fallback, nil
|
||
}
|
||
b, err := os.ReadFile(expand(path))
|
||
if err != nil {
|
||
return "", fmt.Errorf("reading prompt %s: %w", path, err)
|
||
}
|
||
return string(b), nil
|
||
}
|
||
|
||
func buildTranscriber(cfg config) *transcribe.WhisperCPP {
|
||
return &transcribe.WhisperCPP{
|
||
Bin: cfg.whisperBin,
|
||
Model: expand(cfg.whisperModel),
|
||
Language: cfg.whisperLang,
|
||
Threads: cfg.whisperThreads,
|
||
Verbose: cfg.verbose,
|
||
}
|
||
}
|
||
|
||
func buildSummarizer(kind, model string) (summarize.Summarizer, error) {
|
||
switch kind {
|
||
case "claude-cli", "cli":
|
||
return &summarize.ClaudeCLI{Model: model}, nil
|
||
case "claude-api", "anthropic", "api":
|
||
return &summarize.Anthropic{Model: model}, nil
|
||
default:
|
||
return nil, fmt.Errorf("unknown summarizer %q", kind)
|
||
}
|
||
}
|
||
|
||
func writeOutput(path, data string) error {
|
||
if path == "" {
|
||
return nil
|
||
}
|
||
if path == "-" {
|
||
_, err := os.Stdout.WriteString(data + "\n")
|
||
return err
|
||
}
|
||
return os.WriteFile(expand(path), []byte(data+"\n"), 0o644)
|
||
}
|
||
|
||
func expand(p string) string {
|
||
if strings.HasPrefix(p, "~/") {
|
||
if home, err := os.UserHomeDir(); err == nil {
|
||
return filepath.Join(home, p[2:])
|
||
}
|
||
}
|
||
return p
|
||
}
|
||
|
||
func defaultWhisperModel() string {
|
||
home, err := os.UserHomeDir()
|
||
if err != nil {
|
||
return ""
|
||
}
|
||
return filepath.Join(home, ".cache", "whisper.cpp", "ggml-base.en.bin")
|
||
}
|
||
|
||
func logIf(on bool, format string, args ...any) {
|
||
if !on {
|
||
return
|
||
}
|
||
fmt.Fprintf(os.Stderr, "[publish] "+format+"\n", args...)
|
||
}
|
||
|
||
func mmss(seconds float64) string {
|
||
if seconds < 0 {
|
||
seconds = 0
|
||
}
|
||
total := int(seconds)
|
||
h := total / 3600
|
||
m := (total % 3600) / 60
|
||
s := total % 60
|
||
if h > 0 {
|
||
return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
|
||
}
|
||
return fmt.Sprintf("%02d:%02d", m, s)
|
||
}
|