Files
Summerize/internal/audio/audio.go
2026-05-10 13:37:17 -06:00

43 lines
1018 B
Go

// Package audio normalizes arbitrary audio/video inputs into a whisper.cpp-friendly
// 16 kHz mono PCM WAV file using ffmpeg.
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
)
// ExtractWAV runs ffmpeg to convert input (audio or video) into a 16kHz mono
// signed-16-bit PCM WAV file at outPath. ffmpeg must be on PATH.
func ExtractWAV(ctx context.Context, input, outPath string) error {
if _, err := exec.LookPath("ffmpeg"); err != nil {
return fmt.Errorf("ffmpeg not found on PATH: %w", err)
}
if _, err := os.Stat(input); err != nil {
return fmt.Errorf("input not readable: %w", err)
}
if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
return err
}
cmd := exec.CommandContext(ctx, "ffmpeg",
"-y",
"-loglevel", "error",
"-i", input,
"-vn",
"-ac", "1",
"-ar", "16000",
"-c:a", "pcm_s16le",
outPath,
)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("ffmpeg: %w", err)
}
return nil
}