Initial push to gitea

This commit is contained in:
2026-05-10 13:37:17 -06:00
commit 54629aecad
20 changed files with 2381 additions and 0 deletions

204
internal/clip/clip.go Normal file
View File

@@ -0,0 +1,204 @@
// Package clip selects the best 6090s window from a timestamped transcript
// (using a Summarizer to do the picking) and runs ffmpeg to cut that window
// out of the original media.
package clip
import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"publish/internal/summarize"
"publish/internal/transcribe"
)
// Selection is the LLM's chosen clip window plus metadata.
type Selection struct {
StartSeconds float64 `json:"start_seconds"`
EndSeconds float64 `json:"end_seconds"`
Title string `json:"title"`
Hook string `json:"hook"`
Quote string `json:"quote"`
Reasoning string `json:"reasoning"`
}
// Duration returns the selected window length in seconds.
func (s Selection) Duration() float64 { return s.EndSeconds - s.StartSeconds }
// Pick asks the summarizer to choose the best window in the given segments,
// using promptTemplate (which may contain {{MIN_SECONDS}} / {{MAX_SECONDS}}
// placeholders). It clamps and validates the returned window against minSec
// and maxSec.
func Pick(ctx context.Context, sum summarize.Summarizer, promptTemplate string, segs []transcribe.Segment, minSec, maxSec float64) (Selection, string, error) {
if len(segs) == 0 {
return Selection{}, "", fmt.Errorf("no transcript segments to choose from")
}
prompt := strings.NewReplacer(
"{{MIN_SECONDS}}", fmt.Sprintf("%g", minSec),
"{{MAX_SECONDS}}", fmt.Sprintf("%g", maxSec),
).Replace(promptTemplate)
body := transcribe.FormatForLLM(segs)
raw, err := sum.Summarize(ctx, prompt, body)
if err != nil {
return Selection{}, "", err
}
jsonText, err := extractJSONObject(raw)
if err != nil {
return Selection{}, raw, fmt.Errorf("could not find JSON object in model output: %w", err)
}
var sel Selection
if err := json.Unmarshal([]byte(jsonText), &sel); err != nil {
return Selection{}, raw, fmt.Errorf("parsing selection JSON: %w\n--- raw ---\n%s", err, jsonText)
}
if err := validate(&sel, segs, minSec, maxSec); err != nil {
return sel, raw, err
}
return sel, raw, nil
}
func validate(sel *Selection, segs []transcribe.Segment, minSec, maxSec float64) error {
if sel.EndSeconds <= sel.StartSeconds {
return fmt.Errorf("invalid window: end (%g) <= start (%g)", sel.EndSeconds, sel.StartSeconds)
}
maxEnd := segs[len(segs)-1].End
if sel.StartSeconds < 0 || sel.EndSeconds > maxEnd+1.0 {
return fmt.Errorf("window [%g, %g] is outside transcript bounds [0, %g]",
sel.StartSeconds, sel.EndSeconds, maxEnd)
}
dur := sel.Duration()
// Allow small slop on either side; otherwise reject.
if dur < minSec-2 || dur > maxSec+2 {
return fmt.Errorf("window duration %.1fs is outside requested bounds [%g, %g]",
dur, minSec, maxSec)
}
return nil
}
// extractJSONObject pulls the first balanced {...} object out of s, ignoring
// braces that appear inside JSON strings. Useful when the model wraps its
// answer in prose despite being told not to.
func extractJSONObject(s string) (string, error) {
start := strings.Index(s, "{")
if start < 0 {
return "", fmt.Errorf("no '{' in response")
}
depth := 0
inStr := false
esc := false
for i := start; i < len(s); i++ {
c := s[i]
if inStr {
switch {
case esc:
esc = false
case c == '\\':
esc = true
case c == '"':
inStr = false
}
continue
}
switch c {
case '"':
inStr = true
case '{':
depth++
case '}':
depth--
if depth == 0 {
return s[start : i+1], nil
}
}
}
return "", fmt.Errorf("unbalanced braces")
}
// portraitFilter center-crops any source aspect ratio to a 9:16 sub-rectangle
// (no distortion, just cropping) and scales to 1080x1920. The min() expressions
// pick the largest 9:16 box that fits inside the source: 16:9 sources lose the
// left/right edges, 9:16 sources are unchanged, and 4:3 / 1:1 sources crop the
// sides. setsar=1 forces square pixels.
const portraitFilter = `crop=min(iw\,ih*9/16):min(ih\,iw*16/9),scale=1080:1920,setsar=1`
// MaxClipBytes is the hard size ceiling enforced by ffmpeg's -fs flag.
// Realistic 6090s 1080x1920 H.264 clips at CRF 23 land 30100 MB, so this is
// a safety cap rather than a target.
const MaxClipBytes = 1 << 30 // 1 GiB
// Extract runs ffmpeg to cut [start, end) seconds out of input into outPath.
// For video inputs, the clip is re-encoded as a 1080x1920 portrait (9:16
// center-crop) under a 1 GiB size cap. If reencode is false, stream copy is
// used (fast, keyframe-aligned, but the source aspect ratio is preserved).
func Extract(ctx context.Context, input string, sel Selection, outPath string, reencode bool) error {
if _, err := exec.LookPath("ffmpeg"); err != nil {
return fmt.Errorf("ffmpeg not on PATH: %w", err)
}
if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
return err
}
dur := sel.EndSeconds - sel.StartSeconds
args := []string{
"-y",
"-loglevel", "error",
"-ss", fmt.Sprintf("%.3f", sel.StartSeconds),
"-i", input,
"-t", fmt.Sprintf("%.3f", dur),
}
if reencode {
if hasVideoExt(input) {
args = append(args,
"-vf", portraitFilter,
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-c:a", "aac",
"-b:a", "128k",
"-movflags", "+faststart",
)
} else {
args = append(args,
"-vn",
"-c:a", "aac",
"-b:a", "128k",
)
}
} else {
args = append(args, "-c", "copy")
}
args = append(args, "-fs", fmt.Sprintf("%d", MaxClipBytes), outPath)
cmd := exec.CommandContext(ctx, "ffmpeg", args...)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("ffmpeg cut: %w", err)
}
return nil
}
func hasVideoExt(p string) bool {
switch strings.ToLower(filepath.Ext(p)) {
case ".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v", ".flv", ".ts":
return true
}
return false
}
// DefaultOutputPath builds <input-without-ext>.clip<ext> for video inputs and
// .m4a for audio inputs.
func DefaultOutputPath(input string) string {
base := strings.TrimSuffix(input, filepath.Ext(input))
if hasVideoExt(input) {
return base + ".clip" + filepath.Ext(input)
}
return base + ".clip.m4a"
}

View File

@@ -0,0 +1,37 @@
package clip
import "testing"
func TestExtractJSONObject(t *testing.T) {
cases := []struct {
name string
in string
want string
}{
{"raw json", `{"a":1}`, `{"a":1}`},
{"with prose", "Sure, here you go:\n{\"a\":1}\nThanks", `{"a":1}`},
{"with fence", "```json\n{\"a\":1}\n```", `{"a":1}`},
{"nested", `prelude {"a":{"b":2},"c":3} trailing`, `{"a":{"b":2},"c":3}`},
{"brace in string", `{"text":"hello {world}"}`, `{"text":"hello {world}"}`},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got, err := extractJSONObject(c.in)
if err != nil {
t.Fatalf("err: %v", err)
}
if got != c.want {
t.Errorf("got %q want %q", got, c.want)
}
})
}
}
func TestExtractJSONObjectMissing(t *testing.T) {
if _, err := extractJSONObject("no json here"); err == nil {
t.Error("expected error for missing JSON")
}
if _, err := extractJSONObject(`{"unterminated":`); err == nil {
t.Error("expected error for unbalanced braces")
}
}