Initial push to gitea
This commit is contained in:
204
internal/clip/clip.go
Normal file
204
internal/clip/clip.go
Normal file
@@ -0,0 +1,204 @@
|
||||
// Package clip selects the best 60–90s window from a timestamped transcript
|
||||
// (using a Summarizer to do the picking) and runs ffmpeg to cut that window
|
||||
// out of the original media.
|
||||
package clip
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"publish/internal/summarize"
|
||||
"publish/internal/transcribe"
|
||||
)
|
||||
|
||||
// Selection is the LLM's chosen clip window plus metadata.
|
||||
type Selection struct {
|
||||
StartSeconds float64 `json:"start_seconds"`
|
||||
EndSeconds float64 `json:"end_seconds"`
|
||||
Title string `json:"title"`
|
||||
Hook string `json:"hook"`
|
||||
Quote string `json:"quote"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
}
|
||||
|
||||
// Duration returns the selected window length in seconds.
|
||||
func (s Selection) Duration() float64 { return s.EndSeconds - s.StartSeconds }
|
||||
|
||||
// Pick asks the summarizer to choose the best window in the given segments,
|
||||
// using promptTemplate (which may contain {{MIN_SECONDS}} / {{MAX_SECONDS}}
|
||||
// placeholders). It clamps and validates the returned window against minSec
|
||||
// and maxSec.
|
||||
func Pick(ctx context.Context, sum summarize.Summarizer, promptTemplate string, segs []transcribe.Segment, minSec, maxSec float64) (Selection, string, error) {
|
||||
if len(segs) == 0 {
|
||||
return Selection{}, "", fmt.Errorf("no transcript segments to choose from")
|
||||
}
|
||||
prompt := strings.NewReplacer(
|
||||
"{{MIN_SECONDS}}", fmt.Sprintf("%g", minSec),
|
||||
"{{MAX_SECONDS}}", fmt.Sprintf("%g", maxSec),
|
||||
).Replace(promptTemplate)
|
||||
|
||||
body := transcribe.FormatForLLM(segs)
|
||||
|
||||
raw, err := sum.Summarize(ctx, prompt, body)
|
||||
if err != nil {
|
||||
return Selection{}, "", err
|
||||
}
|
||||
|
||||
jsonText, err := extractJSONObject(raw)
|
||||
if err != nil {
|
||||
return Selection{}, raw, fmt.Errorf("could not find JSON object in model output: %w", err)
|
||||
}
|
||||
var sel Selection
|
||||
if err := json.Unmarshal([]byte(jsonText), &sel); err != nil {
|
||||
return Selection{}, raw, fmt.Errorf("parsing selection JSON: %w\n--- raw ---\n%s", err, jsonText)
|
||||
}
|
||||
|
||||
if err := validate(&sel, segs, minSec, maxSec); err != nil {
|
||||
return sel, raw, err
|
||||
}
|
||||
return sel, raw, nil
|
||||
}
|
||||
|
||||
func validate(sel *Selection, segs []transcribe.Segment, minSec, maxSec float64) error {
|
||||
if sel.EndSeconds <= sel.StartSeconds {
|
||||
return fmt.Errorf("invalid window: end (%g) <= start (%g)", sel.EndSeconds, sel.StartSeconds)
|
||||
}
|
||||
maxEnd := segs[len(segs)-1].End
|
||||
if sel.StartSeconds < 0 || sel.EndSeconds > maxEnd+1.0 {
|
||||
return fmt.Errorf("window [%g, %g] is outside transcript bounds [0, %g]",
|
||||
sel.StartSeconds, sel.EndSeconds, maxEnd)
|
||||
}
|
||||
dur := sel.Duration()
|
||||
// Allow small slop on either side; otherwise reject.
|
||||
if dur < minSec-2 || dur > maxSec+2 {
|
||||
return fmt.Errorf("window duration %.1fs is outside requested bounds [%g, %g]",
|
||||
dur, minSec, maxSec)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractJSONObject pulls the first balanced {...} object out of s, ignoring
|
||||
// braces that appear inside JSON strings. Useful when the model wraps its
|
||||
// answer in prose despite being told not to.
|
||||
func extractJSONObject(s string) (string, error) {
|
||||
start := strings.Index(s, "{")
|
||||
if start < 0 {
|
||||
return "", fmt.Errorf("no '{' in response")
|
||||
}
|
||||
depth := 0
|
||||
inStr := false
|
||||
esc := false
|
||||
for i := start; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if inStr {
|
||||
switch {
|
||||
case esc:
|
||||
esc = false
|
||||
case c == '\\':
|
||||
esc = true
|
||||
case c == '"':
|
||||
inStr = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
switch c {
|
||||
case '"':
|
||||
inStr = true
|
||||
case '{':
|
||||
depth++
|
||||
case '}':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return s[start : i+1], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("unbalanced braces")
|
||||
}
|
||||
|
||||
// portraitFilter center-crops any source aspect ratio to a 9:16 sub-rectangle
|
||||
// (no distortion, just cropping) and scales to 1080x1920. The min() expressions
|
||||
// pick the largest 9:16 box that fits inside the source: 16:9 sources lose the
|
||||
// left/right edges, 9:16 sources are unchanged, and 4:3 / 1:1 sources crop the
|
||||
// sides. setsar=1 forces square pixels.
|
||||
const portraitFilter = `crop=min(iw\,ih*9/16):min(ih\,iw*16/9),scale=1080:1920,setsar=1`
|
||||
|
||||
// MaxClipBytes is the hard size ceiling enforced by ffmpeg's -fs flag.
|
||||
// Realistic 60–90s 1080x1920 H.264 clips at CRF 23 land 30–100 MB, so this is
|
||||
// a safety cap rather than a target.
|
||||
const MaxClipBytes = 1 << 30 // 1 GiB
|
||||
|
||||
// Extract runs ffmpeg to cut [start, end) seconds out of input into outPath.
|
||||
// For video inputs, the clip is re-encoded as a 1080x1920 portrait (9:16
|
||||
// center-crop) under a 1 GiB size cap. If reencode is false, stream copy is
|
||||
// used (fast, keyframe-aligned, but the source aspect ratio is preserved).
|
||||
func Extract(ctx context.Context, input string, sel Selection, outPath string, reencode bool) error {
|
||||
if _, err := exec.LookPath("ffmpeg"); err != nil {
|
||||
return fmt.Errorf("ffmpeg not on PATH: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dur := sel.EndSeconds - sel.StartSeconds
|
||||
args := []string{
|
||||
"-y",
|
||||
"-loglevel", "error",
|
||||
"-ss", fmt.Sprintf("%.3f", sel.StartSeconds),
|
||||
"-i", input,
|
||||
"-t", fmt.Sprintf("%.3f", dur),
|
||||
}
|
||||
if reencode {
|
||||
if hasVideoExt(input) {
|
||||
args = append(args,
|
||||
"-vf", portraitFilter,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "128k",
|
||||
"-movflags", "+faststart",
|
||||
)
|
||||
} else {
|
||||
args = append(args,
|
||||
"-vn",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "128k",
|
||||
)
|
||||
}
|
||||
} else {
|
||||
args = append(args, "-c", "copy")
|
||||
}
|
||||
args = append(args, "-fs", fmt.Sprintf("%d", MaxClipBytes), outPath)
|
||||
|
||||
cmd := exec.CommandContext(ctx, "ffmpeg", args...)
|
||||
cmd.Stdout = os.Stderr
|
||||
cmd.Stderr = os.Stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("ffmpeg cut: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasVideoExt(p string) bool {
|
||||
switch strings.ToLower(filepath.Ext(p)) {
|
||||
case ".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v", ".flv", ".ts":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// DefaultOutputPath builds <input-without-ext>.clip<ext> for video inputs and
|
||||
// .m4a for audio inputs.
|
||||
func DefaultOutputPath(input string) string {
|
||||
base := strings.TrimSuffix(input, filepath.Ext(input))
|
||||
if hasVideoExt(input) {
|
||||
return base + ".clip" + filepath.Ext(input)
|
||||
}
|
||||
return base + ".clip.m4a"
|
||||
}
|
||||
37
internal/clip/clip_test.go
Normal file
37
internal/clip/clip_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package clip
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestExtractJSONObject(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"raw json", `{"a":1}`, `{"a":1}`},
|
||||
{"with prose", "Sure, here you go:\n{\"a\":1}\nThanks", `{"a":1}`},
|
||||
{"with fence", "```json\n{\"a\":1}\n```", `{"a":1}`},
|
||||
{"nested", `prelude {"a":{"b":2},"c":3} trailing`, `{"a":{"b":2},"c":3}`},
|
||||
{"brace in string", `{"text":"hello {world}"}`, `{"text":"hello {world}"}`},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got, err := extractJSONObject(c.in)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if got != c.want {
|
||||
t.Errorf("got %q want %q", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractJSONObjectMissing(t *testing.T) {
|
||||
if _, err := extractJSONObject("no json here"); err == nil {
|
||||
t.Error("expected error for missing JSON")
|
||||
}
|
||||
if _, err := extractJSONObject(`{"unterminated":`); err == nil {
|
||||
t.Error("expected error for unbalanced braces")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user