43 lines
1018 B
Go
43 lines
1018 B
Go
// Package audio normalizes arbitrary audio/video inputs into a whisper.cpp-friendly
|
|
// 16 kHz mono PCM WAV file using ffmpeg.
|
|
package audio
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
)
|
|
|
|
// ExtractWAV runs ffmpeg to convert input (audio or video) into a 16kHz mono
|
|
// signed-16-bit PCM WAV file at outPath. ffmpeg must be on PATH.
|
|
func ExtractWAV(ctx context.Context, input, outPath string) error {
|
|
if _, err := exec.LookPath("ffmpeg"); err != nil {
|
|
return fmt.Errorf("ffmpeg not found on PATH: %w", err)
|
|
}
|
|
if _, err := os.Stat(input); err != nil {
|
|
return fmt.Errorf("input not readable: %w", err)
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
|
|
return err
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, "ffmpeg",
|
|
"-y",
|
|
"-loglevel", "error",
|
|
"-i", input,
|
|
"-vn",
|
|
"-ac", "1",
|
|
"-ar", "16000",
|
|
"-c:a", "pcm_s16le",
|
|
outPath,
|
|
)
|
|
cmd.Stdout = os.Stderr
|
|
cmd.Stderr = os.Stderr
|
|
if err := cmd.Run(); err != nil {
|
|
return fmt.Errorf("ffmpeg: %w", err)
|
|
}
|
|
return nil
|
|
}
|