Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Load AGENTS.md
75 changes: 24 additions & 51 deletions cmd/jivefire/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,38 +378,27 @@ func runPass2(p *tea.Program, inputFile string, outputFile string, channels int,
fftBuffer := make([]float64, config.FFTSize)

// Pre-allocate reusable buffers for audio processing (avoid per-frame allocations)
newSamples := make([]float64, 0, samplesPerFrame)
newSamples := make([]float64, samplesPerFrame)
audioSamples := make([]float32, samplesPerFrame)

// Pre-fill buffer with first chunk
// Keep reading until we get the requested number of samples or EOF
var initialSamples []float64
for len(initialSamples) < config.FFTSize {
chunk, err := reader.ReadChunk(config.FFTSize - len(initialSamples))
if err != nil {
if errors.Is(err, io.EOF) {
break // Use what we have
}
cli.PrintError(fmt.Sprintf("error reading initial audio chunk: %v", err))
p.Quit()
return
}
initialSamples = append(initialSamples, chunk...)
n, err := audio.FillFFTBuffer(reader, fftBuffer)
if err != nil {
cli.PrintError(fmt.Sprintf("error reading initial audio chunk: %v", err))
p.Quit()
return
}

if len(initialSamples) == 0 {
if n == 0 {
cli.PrintError("no audio data available")
p.Quit()
return
}

copy(fftBuffer, initialSamples)

// Write initial audio samples to encoder (first samplesPerFrame worth)
// This corresponds to the audio for frame 0
initialAudioSamples := make([]float32, samplesPerFrame)
for i := 0; i < samplesPerFrame && i < len(initialSamples); i++ {
initialAudioSamples[i] = float32(initialSamples[i])
for i := 0; i < samplesPerFrame && i < n; i++ {
initialAudioSamples[i] = float32(fftBuffer[i])
}
if err := enc.WriteAudioSamples(initialAudioSamples); err != nil {
cli.PrintError(fmt.Sprintf("error writing initial audio: %v", err))
Expand Down Expand Up @@ -556,55 +545,39 @@ func runPass2(p *tea.Program, inputFile string, outputFile string, channels int,
// === AUDIO TIMING START ===
// Read audio, encode, and manage buffer for next frame
t0 = time.Now()
newSamples = newSamples[:0] // Reset slice, reuse backing array
for len(newSamples) < samplesPerFrame {
chunk, err := reader.ReadChunk(samplesPerFrame - len(newSamples))
if err != nil {
if errors.Is(err, io.EOF) {
// If we got no new samples at all, we're done
if len(newSamples) == 0 {
// Break out of the frame loop - no more audio
frameNum = numFrames
break
}
// Got partial frame at end of file, use what we have
break
}
cli.PrintError(fmt.Sprintf("error reading audio: %v", err))
p.Quit()
return
nRead, readErr := audio.ReadNextFrame(reader, newSamples)
if readErr != nil {
if errors.Is(readErr, io.EOF) {
totalAudio += time.Since(t0)
break
}
newSamples = append(newSamples, chunk...)
}

// If we got no new samples, we're done
if len(newSamples) == 0 {
totalAudio += time.Since(t0)
break
cli.PrintError(fmt.Sprintf("error reading audio: %v", readErr))
p.Quit()
return
}

// Write audio samples for this frame to encoder
// Convert float64 samples to float32 for AAC encoder
// Uses pre-allocated audioSamples buffer, slice to actual length
for i, s := range newSamples {
audioSamples[i] = float32(s)
for i := range nRead {
audioSamples[i] = float32(newSamples[i])
}
if err := enc.WriteAudioSamples(audioSamples[:len(newSamples)]); err != nil {
if err := enc.WriteAudioSamples(audioSamples[:nRead]); err != nil {
cli.PrintError(fmt.Sprintf("error writing audio at frame %d: %v", frameNum, err))
p.Quit()
return
}
// Shift buffer left by samplesPerFrame, append new samples
copy(fftBuffer, fftBuffer[samplesPerFrame:])
// Pad with zeros if we got fewer samples than expected
if len(newSamples) < samplesPerFrame {
copy(fftBuffer[config.FFTSize-samplesPerFrame:], newSamples)
if nRead < samplesPerFrame {
copy(fftBuffer[config.FFTSize-samplesPerFrame:], newSamples[:nRead])
// Zero-fill the remaining space
for i := config.FFTSize - samplesPerFrame + len(newSamples); i < config.FFTSize; i++ {
for i := config.FFTSize - samplesPerFrame + nRead; i < config.FFTSize; i++ {
fftBuffer[i] = 0
}
} else {
copy(fftBuffer[config.FFTSize-samplesPerFrame:], newSamples)
copy(fftBuffer[config.FFTSize-samplesPerFrame:], newSamples[:nRead])
}
totalAudio += time.Since(t0)
// === AUDIO TIMING END ===
Expand Down
2 changes: 1 addition & 1 deletion docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ MP4 Muxer (libavformat)
## Key Technical Choices

### Audio Frame Size Mismatch
FFT analysis requires 2048 samples for frequency resolution, but AAC encoder expects 1024 samples per frame. **Solution:** `SharedAudioBuffer` in `audio/shared_buffer.go` provides thread-safe multi-consumer access with independent read positions—FFT and encoder each consume at their own rate without blocking each other.
FFT analysis requires 2048 samples for frequency resolution, but AAC encoder expects 1024 samples per frame. **Solution:** `AudioFIFO` in `encoder/encoder.go` buffers incoming audio samples and drains them in encoder-sized frames, decoupling the FFT chunk size from the AAC frame size.

### Hardware-Accelerated Encoding
Automatic GPU encoder detection in `encoder/hwaccel.go`:
Expand Down
70 changes: 24 additions & 46 deletions internal/audio/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"math"
"math/cmplx"
"time"

"github.com/linuxmatters/jivefire/internal/config"
Expand Down Expand Up @@ -74,32 +75,22 @@ func AnalyzeAudio(filename string, progressCb ProgressCallback) (*Profile, error

// Sliding buffer for FFT: we advance by samplesPerFrame but need FFTSize for FFT
fftBuffer := make([]float64, config.FFTSize)
frameBuf := make([]float64, samplesPerFrame)

// Pre-fill buffer with first chunk
// Keep reading until we get the requested number of samples or EOF
var initialSamples []float64
for len(initialSamples) < config.FFTSize {
chunk, err := reader.ReadChunk(config.FFTSize - len(initialSamples))
if err != nil {
if errors.Is(err, io.EOF) {
break // Use what we have
}
return nil, fmt.Errorf("error reading initial chunk: %w", err)
}
initialSamples = append(initialSamples, chunk...)
n, err := FillFFTBuffer(reader, fftBuffer)
if err != nil {
return nil, fmt.Errorf("error reading initial chunk: %w", err)
}

if len(initialSamples) == 0 {
if n == 0 {
return nil, fmt.Errorf("no audio data in file")
}

copy(fftBuffer, initialSamples)

startTime := time.Now()
frameNum := 0

for {
// Pass fftBuffer directly to ProcessChunk - it creates its own copy via ApplyHanning
// Pass fftBuffer directly to ProcessChunk - it applies the pre-computed Hanning window
// No need for intermediate allocation since analyzeFrame only reads the buffer
coeffs := processor.ProcessChunk(fftBuffer)

Expand Down Expand Up @@ -129,42 +120,29 @@ func AnalyzeAudio(filename string, progressCb ProgressCallback) (*Profile, error
}

// Advance sliding buffer for next frame
// Read samplesPerFrame new samples and shift buffer
// Keep reading until we get the requested number of samples or EOF
newSamples := make([]float64, 0, samplesPerFrame)
for len(newSamples) < samplesPerFrame {
chunk, err := reader.ReadChunk(samplesPerFrame - len(newSamples))
if err != nil {
if errors.Is(err, io.EOF) {
// If we got some samples, use them; otherwise we're done
if len(newSamples) == 0 {
// Send final progress update
if progressCb != nil {
barHeights := make([]float64, config.NumBars)
for i := range config.NumBars {
barHeights[i] = analysis.BarMagnitudes[i]
}
elapsed := time.Since(startTime)
progressCb(frameNum, frameNum, analysis.RMSLevel, analysis.PeakMagnitude, barHeights, elapsed)
}
break // Finished reading all audio
nRead, err := ReadNextFrame(reader, frameBuf)
if err != nil {
if errors.Is(err, io.EOF) {
// Send final progress update
if progressCb != nil {
barHeights := make([]float64, config.NumBars)
for i := range config.NumBars {
barHeights[i] = analysis.BarMagnitudes[i]
}
// Got partial frame at end of file, use what we have
break
elapsed := time.Since(startTime)
progressCb(frameNum, frameNum, analysis.RMSLevel, analysis.PeakMagnitude, barHeights, elapsed)
}
return nil, fmt.Errorf("error reading audio at frame %d: %w", frameNum, err)
break
}
newSamples = append(newSamples, chunk...)
}

// If we got no new samples, we're done
if len(newSamples) == 0 {
break
return nil, fmt.Errorf("error reading audio at frame %d: %w", frameNum, err)
}

// Shift buffer left by samplesPerFrame, append new samples
copy(fftBuffer, fftBuffer[samplesPerFrame:])
copy(fftBuffer[config.FFTSize-samplesPerFrame:], newSamples)
copy(fftBuffer[config.FFTSize-samplesPerFrame:], frameBuf[:nRead])
if nRead < samplesPerFrame {
clear(fftBuffer[config.FFTSize-samplesPerFrame+nRead:])
}
}

// Set actual frame count and duration
Expand Down Expand Up @@ -222,7 +200,7 @@ func analyzeFrame(coeffs []complex128, audioChunk []float64) FrameAnalysis {

var sum float64
for i := start; i < end; i++ {
magnitude := math.Sqrt(real(coeffs[i])*real(coeffs[i]) + imag(coeffs[i])*imag(coeffs[i]))
magnitude := cmplx.Abs(coeffs[i])
sum += magnitude
}

Expand Down
35 changes: 13 additions & 22 deletions internal/audio/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@ import (
"github.com/linuxmatters/jivefire/internal/config"
)

func TestAnalyzeAudio(t *testing.T) {
func mustAnalyze(t *testing.T) *Profile {
t.Helper()
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
t.Fatalf("Failed to analyse audio: %v", err)
}
return profile
}

func TestAnalyzeAudio(t *testing.T) {
profile := mustAnalyze(t)

// Validate basic properties
if profile.NumFrames <= 0 {
Expand Down Expand Up @@ -66,10 +72,7 @@ func TestAnalyzeAudioInvalidFile(t *testing.T) {
}

func TestAnalyzeFrameStatistics(t *testing.T) {
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
}
profile := mustAnalyze(t)

// Check first few frames have valid statistics
for i := 0; i < 10 && i < len(profile.Frames); i++ {
Expand All @@ -95,10 +98,7 @@ func TestAnalyzeFrameStatistics(t *testing.T) {
}

func TestOptimalBaseScaleCalculation(t *testing.T) {
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
}
profile := mustAnalyze(t)

// Optimal baseScale should be calculated as: 0.85 / GlobalPeak
expectedBaseScale := 0.85 / profile.GlobalPeak
Expand All @@ -120,10 +120,7 @@ func TestOptimalBaseScaleCalculation(t *testing.T) {
}

func TestGlobalPeakIsMaximum(t *testing.T) {
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
}
profile := mustAnalyze(t)

// GlobalPeak should be >= all frame peaks
for i, frame := range profile.Frames {
Expand Down Expand Up @@ -152,10 +149,7 @@ func TestGlobalPeakIsMaximum(t *testing.T) {
}

func TestGlobalRMSIsAverage(t *testing.T) {
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
}
profile := mustAnalyze(t)

// Calculate average RMS manually
var sumRMS float64
Expand All @@ -175,10 +169,7 @@ func TestGlobalRMSIsAverage(t *testing.T) {
}

func TestDynamicRangeCalculation(t *testing.T) {
profile, err := AnalyzeAudio("../../testdata/LMP0.mp3", nil)
if err != nil {
t.Fatalf("Failed to analyze audio: %v", err)
}
profile := mustAnalyze(t)

expectedDynamicRange := profile.GlobalPeak / profile.GlobalRMS

Expand Down
48 changes: 48 additions & 0 deletions internal/audio/buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package audio

import (
"errors"
"fmt"
"io"
)

// FillFFTBuffer reads up to len(buf) samples from reader via repeated ReadChunk
// calls. Returns the number of samples read. Returns (0, nil) on immediate EOF,
// allowing callers to decide whether that is an error.
func FillFFTBuffer(reader *StreamingReader, buf []float64) (int, error) {
var total int
for total < len(buf) {
chunk, err := reader.ReadChunk(len(buf) - total)
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return total, fmt.Errorf("reading audio chunk: %w", err)
}
copy(buf[total:], chunk)
total += len(chunk)
}
return total, nil
}

// ReadNextFrame reads up to len(buf) samples from reader into the provided
// buffer. Returns the number of samples read. Returns (0, io.EOF) when no
// samples are available. Returns (n, nil) for partial frames at end of file.
func ReadNextFrame(reader *StreamingReader, buf []float64) (int, error) {
var total int
for total < len(buf) {
chunk, err := reader.ReadChunk(len(buf) - total)
if err != nil {
if errors.Is(err, io.EOF) {
if total == 0 {
return 0, io.EOF
}
break
}
return 0, fmt.Errorf("reading audio frame: %w", err)
}
copy(buf[total:], chunk)
total += len(chunk)
}
return total, nil
}
Loading
Loading