// Package audio normalizes arbitrary audio/video inputs into a whisper.cpp-friendly // 16 kHz mono PCM WAV file using ffmpeg. package audio import ( "context" "fmt" "os" "os/exec" "path/filepath" ) // ExtractWAV runs ffmpeg to convert input (audio or video) into a 16kHz mono // signed-16-bit PCM WAV file at outPath. ffmpeg must be on PATH. func ExtractWAV(ctx context.Context, input, outPath string) error { if _, err := exec.LookPath("ffmpeg"); err != nil { return fmt.Errorf("ffmpeg not found on PATH: %w", err) } if _, err := os.Stat(input); err != nil { return fmt.Errorf("input not readable: %w", err) } if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { return err } cmd := exec.CommandContext(ctx, "ffmpeg", "-y", "-loglevel", "error", "-i", input, "-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", outPath, ) cmd.Stdout = os.Stderr cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { return fmt.Errorf("ffmpeg: %w", err) } return nil }