ascii-chat/client__audio__pipeline_8cpp_source.html

// C++ headers must come FIRST before any C headers that include stdatomic.h

#include <memory>

#include <cstring>

#include <math.h>

#include <atomic>


// WebRTC headers for AEC3 MUST come before ascii-chat headers to avoid macro conflicts

// Define required WebRTC macros before including headers

#define WEBRTC_APM_DEBUG_DUMP 0

#define WEBRTC_MODULE_AUDIO_PROCESSING 1

// WEBRTC_POSIX should only be defined on POSIX systems (Unix/macOS), not Windows

#if defined(__unix__) || defined(__APPLE__)

#define WEBRTC_POSIX 1

#endif


// Suppress WebRTC/Abseil warnings about deprecated builtins and unused parameters

// These are third-party code issues, not our code

#pragma clang diagnostic push

#pragma clang diagnostic ignored "-Wdeprecated-builtins"

#pragma clang diagnostic ignored "-Wunused-parameter"


// WebRTC AEC3 extracted repository has different include paths

// than full WebRTC (no api/audio/ subdirectory)

#include "api/echo_canceller3_factory.h"

#include "api/echo_control.h"

// Note: extracted AEC3 doesn't have environment API - using direct classes

#include "audio_processing/audio_buffer.h"


#pragma clang diagnostic pop


// WebRTC defines FATAL() with no parameters, but ascii-chat defines

// FATAL(code, ...) with parameters. Undefine the WebRTC version before

// including ascii-chat headers so ascii-chat's version takes precedence.

#ifdef FATAL

#undef FATAL

#endif


// Now include ascii-chat headers after WebRTC to avoid macro conflicts

#include "audio/client_audio_pipeline.h"

#include "audio/wav_writer.h"

#include "common.h"

#include "log/logging.h"

#include "platform/abstraction.h"


// Include mixer.h for compressor, noise gate, and filter functions

#include "audio/mixer.h"


// For AEC3 metrics reporting

#include "audio/analysis.h"


#include <opus/opus.h>

#include <string.h>


// Prevent stdatomic.h from defining conflicting macros in C++ context

#define __STDC_NO_ATOMICS__ 1


// ============================================================================

// WebRTC AEC3 C++ Wrapper (hidden from C code)

// ============================================================================


struct WebRTCAec3Wrapper {

  std::unique_ptr<webrtc::EchoControl> aec3;

  webrtc::EchoCanceller3Config config;


  WebRTCAec3Wrapper() = default;

  ~WebRTCAec3Wrapper() = default;

};


// Global tracking of max render RMS for AEC3 diagnostics (accessible from both threads)

static std::atomic<float> g_max_render_rms{0.0f};


// Global counter for render frames fed to AEC3 (for warmup tracking)

static std::atomic<int> g_render_frames_fed{0};


#ifdef __cplusplus

extern "C" {

#endif


// ============================================================================

// Default Configuration

// ============================================================================


client_audio_pipeline_config_t client_audio_pipeline_default_config(void) {

  return (client_audio_pipeline_config_t){

      .sample_rate = CLIENT_AUDIO_PIPELINE_SAMPLE_RATE,

      .frame_size_ms = CLIENT_AUDIO_PIPELINE_FRAME_MS,

      .opus_bitrate = 24000,


      .echo_filter_ms = 250,


      .noise_suppress_db = -25,

      .agc_level = 8000,

      .agc_max_gain = 30,


      // Jitter margin: wait this long before starting playback

      // Lower = less latency but more risk of underruns

      // CRITICAL: Must match AUDIO_JITTER_BUFFER_THRESHOLD in ringbuffer.h!

      .jitter_margin_ms = 20, // 20ms = 1 Opus packet (optimized for LAN)


      // Higher cutoff to cut low-frequency rumble and feedback

      .highpass_hz = 150.0f, // Was 80Hz, increased to break rumble feedback loop

      .lowpass_hz = 8000.0f,


      // Compressor: only compress loud peaks, minimal makeup to avoid clipping

      // User reported clipping with +6dB makeup gain

      .comp_threshold_db = -6.0f, // Only compress peaks above -6dB

      .comp_ratio = 3.0f,         // Gentler 3:1 ratio

      .comp_attack_ms = 5.0f,     // Fast attack for peaks

      .comp_release_ms = 150.0f,  // Slower release

      .comp_makeup_db = 2.0f,     // Reduced from 6dB to prevent clipping


      // Noise gate: VERY aggressive to cut quiet background audio completely

      // User feedback: "don't amplify or play quiet background audio at all"

      .gate_threshold = 0.08f,  // -22dB threshold (was 0.02/-34dB) - cuts quiet audio hard

      .gate_attack_ms = 0.5f,   // Very fast attack

      .gate_release_ms = 30.0f, // Fast release (was 50ms)

      .gate_hysteresis = 0.3f,  // Tighter hysteresis = stays closed longer


      .flags = CLIENT_AUDIO_PIPELINE_FLAGS_ALL,

  };

}


// ============================================================================

// Lifecycle Functions

// ============================================================================


client_audio_pipeline_t *client_audio_pipeline_create(const client_audio_pipeline_config_t *config) {

  client_audio_pipeline_t *p = SAFE_CALLOC(1, sizeof(client_audio_pipeline_t), client_audio_pipeline_t *);

  if (!p) {

    log_error("Failed to allocate client audio pipeline");

    return NULL;

  }


  // Use default config if none provided

  if (config) {

    p->config = *config;

  } else {

    p->config = client_audio_pipeline_default_config();

  }


  p->flags = p->config.flags;

  p->frame_size = p->config.sample_rate * p->config.frame_size_ms / 1000;


  // No mutex needed - full-duplex means single callback thread handles all AEC3


  // Initialize Opus encoder/decoder first (no exceptions)

  int opus_error = 0;

  p->encoder = opus_encoder_create(p->config.sample_rate, 1, OPUS_APPLICATION_VOIP, &opus_error);

  if (!p->encoder || opus_error != OPUS_OK) {

    log_error("Failed to create Opus encoder: %d", opus_error);

    goto error;

  }

  opus_encoder_ctl(p->encoder, OPUS_SET_BITRATE(p->config.opus_bitrate));


  // CRITICAL: Disable DTX (Discontinuous Transmission) to prevent "beeps"

  // DTX stops sending frames during silence, causing audible clicks/beeps when audio resumes

  opus_encoder_ctl(p->encoder, OPUS_SET_DTX(0));


  // Create Opus decoder

  p->decoder = opus_decoder_create(p->config.sample_rate, 1, &opus_error);

  if (!p->decoder || opus_error != OPUS_OK) {

    log_error("Failed to create Opus decoder: %d", opus_error);

    goto error;

  }


  // Create WebRTC AEC3 Echo Cancellation

  // AEC3 provides production-grade acoustic echo cancellation with:

  // - Automatic network delay estimation (0-500ms)

  // - Adaptive filtering to actual echo path

  // - Residual echo suppression via spectral subtraction

  // - Jitter buffer handling via side information

  if (p->flags.echo_cancel) {

    // Configure AEC3 for better low-frequency (bass) echo cancellation

    webrtc::EchoCanceller3Config aec3_config;


    // Increase filter length for bass frequencies (default 13 blocks = ~17ms)

    // Bass at 80Hz has 12.5ms period, so we need at least 50+ blocks (~67ms)

    // to properly model the echo path for low frequencies

    aec3_config.filter.main.length_blocks = 50;           // ~67ms (was 13)

    aec3_config.filter.shadow.length_blocks = 50;         // ~67ms (was 13)

    aec3_config.filter.main_initial.length_blocks = 25;   // ~33ms (was 12)

    aec3_config.filter.shadow_initial.length_blocks = 25; // ~33ms (was 12)


    // More aggressive low-frequency suppression thresholds

    // Lower values = more aggressive echo suppression

    aec3_config.echo_audibility.audibility_threshold_lf = 5; // (was 10)


    // Create AEC3 using the factory

    auto factory = webrtc::EchoCanceller3Factory(aec3_config);


    std::unique_ptr<webrtc::EchoControl> echo_control = factory.Create(static_cast<int>(p->config.sample_rate), // 48kHz

                                                                       1, // num_render_channels (speaker output)

                                                                       1  // num_capture_channels (microphone input)

    );


    if (!echo_control) {

      log_warn("Failed to create WebRTC AEC3 instance - echo cancellation unavailable");

      p->echo_canceller = NULL;

    } else {

      // Successfully created AEC3 - wrap in our C++ wrapper for C compatibility

      auto wrapper = new WebRTCAec3Wrapper();

      wrapper->aec3 = std::move(echo_control);

      wrapper->config = aec3_config;

      p->echo_canceller = wrapper;


      log_info("✓ WebRTC AEC3 initialized (67ms filter for bass, adaptive delay)");


      // Create persistent AudioBuffer instances for AEC3

      p->aec3_render_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);

      p->aec3_capture_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);


      auto *render_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_render_buffer);

      auto *capture_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_capture_buffer);


      // Zero-initialize channel data

      float *const *render_ch = render_buf->channels();

      float *const *capture_ch = capture_buf->channels();

      if (render_ch && render_ch[0]) {

        memset(render_ch[0], 0, 480 * sizeof(float)); // 10ms at 48kHz

      }

      if (capture_ch && capture_ch[0]) {

        memset(capture_ch[0], 0, 480 * sizeof(float));

      }


      // Prime filterbank state with dummy processing cycle

      render_buf->SplitIntoFrequencyBands();

      render_buf->MergeFrequencyBands();

      capture_buf->SplitIntoFrequencyBands();

      capture_buf->MergeFrequencyBands();


      log_info("  - AudioBuffer filterbank state initialized");


      // Warm up AEC3 with 10 silent frames to initialize internal state

      for (int warmup = 0; warmup < 10; warmup++) {

        memset(render_ch[0], 0, 480 * sizeof(float));

        memset(capture_ch[0], 0, 480 * sizeof(float));


        render_buf->SplitIntoFrequencyBands();

        wrapper->aec3->AnalyzeRender(render_buf);

        render_buf->MergeFrequencyBands();


        wrapper->aec3->AnalyzeCapture(capture_buf);

        capture_buf->SplitIntoFrequencyBands();

        wrapper->aec3->SetAudioBufferDelay(0);

        wrapper->aec3->ProcessCapture(capture_buf, false);

        capture_buf->MergeFrequencyBands();

      }

      log_info("  - AEC3 warmed up with 10 silent frames");

      log_info("  - Persistent AudioBuffer instances created");

    }

  }


  // Initialize debug WAV writers for AEC3 analysis (if echo_cancel enabled)

  p->debug_wav_aec3_in = NULL;

  p->debug_wav_aec3_out = NULL;

  if (p->flags.echo_cancel) {

    // Open WAV files to capture AEC3 input and output

    p->debug_wav_aec3_in = wav_writer_open("/tmp/aec3_input.wav", 48000, 1);

    p->debug_wav_aec3_out = wav_writer_open("/tmp/aec3_output.wav", 48000, 1);

    if (p->debug_wav_aec3_in) {

      log_info("Debug: Recording AEC3 input to /tmp/aec3_input.wav");

    }

    if (p->debug_wav_aec3_out) {

      log_info("Debug: Recording AEC3 output to /tmp/aec3_output.wav");

    }


    log_info("✓ AEC3 echo cancellation enabled (full-duplex mode, no ring buffer delay)");

  }


  // Initialize audio processing components (compressor, noise gate, filters)

  // These are applied in the capture path after AEC3 and before Opus encoding

  {

    float sample_rate = (float)p->config.sample_rate;


    // Initialize compressor with config values

    compressor_init(&p->compressor, sample_rate);

    compressor_set_params(&p->compressor, p->config.comp_threshold_db, p->config.comp_ratio, p->config.comp_attack_ms,

                          p->config.comp_release_ms, p->config.comp_makeup_db);

    log_info("✓ Capture compressor: threshold=%.1fdB, ratio=%.1f:1, makeup=+%.1fdB", p->config.comp_threshold_db,

             p->config.comp_ratio, p->config.comp_makeup_db);


    // Initialize noise gate with config values

    noise_gate_init(&p->noise_gate, sample_rate);

    noise_gate_set_params(&p->noise_gate, p->config.gate_threshold, p->config.gate_attack_ms, p->config.gate_release_ms,

                          p->config.gate_hysteresis);

    log_info("✓ Capture noise gate: threshold=%.4f (%.1fdB)", p->config.gate_threshold,

             20.0f * log10f(p->config.gate_threshold + 1e-10f));


    // Initialize PLAYBACK noise gate - cuts quiet received audio before speakers

    // Very low threshold - only cut actual silence, not quiet voice audio

    // The server sends audio with RMS=0.01-0.02, so threshold must be below that

    noise_gate_init(&p->playback_noise_gate, sample_rate);

    noise_gate_set_params(&p->playback_noise_gate,

                          0.002f, // -54dB threshold - only cut near-silence

                          1.0f,   // 1ms attack - fast open

                          50.0f,  // 50ms release - smooth close

                          0.4f);  // Hysteresis

    log_info("✓ Playback noise gate: threshold=0.002 (-54dB)");


    // Initialize highpass filter (removes low-frequency rumble)

    highpass_filter_init(&p->highpass, p->config.highpass_hz, sample_rate);

    log_info("✓ Capture highpass filter: %.1f Hz", p->config.highpass_hz);


    // Initialize lowpass filter (removes high-frequency hiss)

    lowpass_filter_init(&p->lowpass, p->config.lowpass_hz, sample_rate);

    log_info("✓ Capture lowpass filter: %.1f Hz", p->config.lowpass_hz);

  }


  p->initialized = true;


  // Initialize startup fade-in to prevent initial microphone click

  // 200ms at 48kHz = 9600 samples - gradual ramp from silence to full volume

  // Longer fade-in (200ms vs 50ms) gives much smoother transition without audible pop

  p->capture_fadein_remaining = (p->config.sample_rate * 200) / 1000; // 200ms worth of samples

  log_info("✓ Capture fade-in: %d samples (200ms)", p->capture_fadein_remaining);


  log_info("Audio pipeline created: %dHz, %dms frames, %dkbps Opus", p->config.sample_rate, p->config.frame_size_ms,

           p->config.opus_bitrate / 1000);


  return p;


error:

  if (p->encoder)

    opus_encoder_destroy(p->encoder);

  if (p->decoder)

    opus_decoder_destroy(p->decoder);

  if (p->echo_canceller) {

    delete static_cast<WebRTCAec3Wrapper *>(p->echo_canceller);

  }

  SAFE_FREE(p);

  return NULL;

}


void client_audio_pipeline_destroy(client_audio_pipeline_t *pipeline) {

  if (!pipeline)

    return;


  // Clean up WebRTC AEC3 AudioBuffer instances

  if (pipeline->aec3_render_buffer) {

    delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);

    pipeline->aec3_render_buffer = NULL;

  }

  if (pipeline->aec3_capture_buffer) {

    delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);

    pipeline->aec3_capture_buffer = NULL;

  }


  // Clean up WebRTC AEC3

  if (pipeline->echo_canceller) {

    delete static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);

    pipeline->echo_canceller = NULL;

  }


  // Clean up Opus

  if (pipeline->encoder) {

    opus_encoder_destroy(pipeline->encoder);

    pipeline->encoder = NULL;

  }

  if (pipeline->decoder) {

    opus_decoder_destroy(pipeline->decoder);

    pipeline->decoder = NULL;

  }


  // Clean up debug WAV writers

  if (pipeline->debug_wav_aec3_in) {

    wav_writer_close((wav_writer_t *)pipeline->debug_wav_aec3_in);

    pipeline->debug_wav_aec3_in = NULL;

  }

  if (pipeline->debug_wav_aec3_out) {

    wav_writer_close((wav_writer_t *)pipeline->debug_wav_aec3_out);

    pipeline->debug_wav_aec3_out = NULL;

  }


  SAFE_FREE(pipeline);

}


// ============================================================================

// Configuration Functions

// ============================================================================


void client_audio_pipeline_set_flags(client_audio_pipeline_t *pipeline, client_audio_pipeline_flags_t flags) {

  if (!pipeline)

    return;

  // No mutex needed - flags are only read by capture thread

  pipeline->flags = flags;

}


client_audio_pipeline_flags_t client_audio_pipeline_get_flags(client_audio_pipeline_t *pipeline) {

  if (!pipeline)

    return CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL;

  // No mutex needed - flags are only written from main thread during setup

  return pipeline->flags;

}


// ============================================================================

// Audio Processing Functions

// ============================================================================


int client_audio_pipeline_capture(client_audio_pipeline_t *pipeline, const float *input, int num_samples,

                                  uint8_t *opus_out, int max_opus_len) {

  if (!pipeline || !input || !opus_out || num_samples != pipeline->frame_size) {

    return -1;

  }


  // Input is already processed by process_duplex() in full-duplex mode.

  // Just encode with Opus.

  int opus_len = opus_encode_float(pipeline->encoder, input, num_samples, opus_out, max_opus_len);


  if (opus_len < 0) {

    log_error("Opus encoding failed: %d", opus_len);

    return -1;

  }


  return opus_len;

}


int client_audio_pipeline_playback(client_audio_pipeline_t *pipeline, const uint8_t *opus_in, int opus_len,

                                   float *output, int num_samples) {

  if (!pipeline || !opus_in || !output) {

    return -1;

  }


  // No mutex needed - Opus decoder is only used from this thread


  // Decode Opus

  int decoded_samples = opus_decode_float(pipeline->decoder, opus_in, opus_len, output, num_samples, 0);


  if (decoded_samples < 0) {

    log_error("Opus decoding failed: %d", decoded_samples);

    return -1;

  }


  // Apply playback noise gate - cut quiet background audio before it reaches speakers

  if (decoded_samples > 0) {

    noise_gate_process_buffer(&pipeline->playback_noise_gate, output, decoded_samples);

  }


  // NOTE: Render signal is queued to AEC3 in output_callback() when audio plays,

  // not here. The capture thread drains the queue and processes AEC3.


  return decoded_samples;

}


int client_audio_pipeline_get_playback_frame(client_audio_pipeline_t *pipeline, float *output, int num_samples) {

  if (!pipeline || !output) {

    return -1;

  }


  // No mutex needed - this is a placeholder

  memset(output, 0, num_samples * sizeof(float));

  return num_samples;

}


void client_audio_pipeline_process_duplex(client_audio_pipeline_t *pipeline, const float *render_samples,

                                          int render_count, const float *capture_samples, int capture_count,

                                          float *processed_output) {

  if (!pipeline || !processed_output)

    return;


  // Copy capture samples to output buffer for processing

  if (capture_samples && capture_count > 0) {

    memcpy(processed_output, capture_samples, capture_count * sizeof(float));

  } else {

    memset(processed_output, 0, capture_count * sizeof(float));

    return;

  }


  // Check for AEC3 bypass

  static int bypass_aec3 = -1;

  if (bypass_aec3 == -1) {

    const char *env = platform_getenv("BYPASS_AEC3");

    bypass_aec3 = (env && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0)) ? 1 : 0;

    if (bypass_aec3) {

      log_warn("AEC3 BYPASSED (full-duplex mode) via BYPASS_AEC3=1");

    }

  }


  // Debug WAV recording

  if (pipeline->debug_wav_aec3_in) {

    wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_in, capture_samples, capture_count);

  }


  // Apply startup fade-in using smoothstep curve

  if (pipeline->capture_fadein_remaining > 0) {

    const int total_fadein_samples = (pipeline->config.sample_rate * 200) / 1000;

    for (int i = 0; i < capture_count && pipeline->capture_fadein_remaining > 0; i++) {

      float progress = 1.0f - ((float)pipeline->capture_fadein_remaining / (float)total_fadein_samples);

      float gain = smoothstep(progress);

      processed_output[i] *= gain;

      pipeline->capture_fadein_remaining--;

    }

  }


  // WebRTC AEC3 processing - INLINE, no ring buffer, no mutex

  if (!bypass_aec3 && pipeline->flags.echo_cancel && pipeline->echo_canceller) {

    auto wrapper = static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);

    if (wrapper && wrapper->aec3) {

      const int webrtc_frame_size = 480; // 10ms at 48kHz


      auto *render_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);

      auto *capture_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);


      if (render_buf && capture_buf) {

        float *const *render_channels = render_buf->channels();

        float *const *capture_channels = capture_buf->channels();


        if (render_channels && render_channels[0] && capture_channels && capture_channels[0]) {

          // Verify render_samples is valid before accessing

          if (!render_samples && render_count > 0) {

            log_warn_every(1000000, "AEC3: render_samples is NULL but render_count=%d", render_count);

            return;

          }


          // Process in 10ms chunks (AEC3 requirement)

          int render_offset = 0;

          int capture_offset = 0;


          while (capture_offset < capture_count || render_offset < render_count) {

            // STEP 1: Feed render signal (what's playing to speakers)

            // In full-duplex, this is THE EXACT audio being played RIGHT NOW

            if (render_samples && render_offset < render_count) {

              int render_chunk = (render_offset + webrtc_frame_size <= render_count) ? webrtc_frame_size

                                                                                     : (render_count - render_offset);

              if (render_chunk == webrtc_frame_size) {

                // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]

                copy_buffer_with_gain(&render_samples[render_offset], render_channels[0], webrtc_frame_size, 32768.0f);

                render_buf->SplitIntoFrequencyBands();

                wrapper->aec3->AnalyzeRender(render_buf);

                render_buf->MergeFrequencyBands();

                g_render_frames_fed.fetch_add(1, std::memory_order_relaxed);

              }

              render_offset += render_chunk;

            }


            // STEP 2: Process capture (microphone input)

            if (capture_offset < capture_count) {

              int capture_chunk = (capture_offset + webrtc_frame_size <= capture_count)

                                      ? webrtc_frame_size

                                      : (capture_count - capture_offset);

              if (capture_chunk == webrtc_frame_size) {

                // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]

                copy_buffer_with_gain(&processed_output[capture_offset], capture_channels[0], webrtc_frame_size,

                                      32768.0f);


                // AEC3 sequence: AnalyzeCapture, split, ProcessCapture, merge

                wrapper->aec3->AnalyzeCapture(capture_buf);

                capture_buf->SplitIntoFrequencyBands();


                // NOTE: SetAudioBufferDelay() is just an initial hint when use_external_delay_estimator=false

                // (default). AEC3's internal delay estimator will find the actual delay (~144ms in practice). We don't

                // call it here - let AEC3 estimate delay automatically.


                wrapper->aec3->ProcessCapture(capture_buf, false);

                capture_buf->MergeFrequencyBands();


                // Scale back to float range and apply soft clip to prevent distortion

                // Use gentle soft_clip (threshold=0.6, steepness=2.5) to leave headroom for compressor

                for (int j = 0; j < webrtc_frame_size; j++) {

                  float sample = capture_channels[0][j] / 32768.0f;

                  processed_output[capture_offset + j] = soft_clip(sample, 0.6f, 2.5f);

                }


                // Log AEC3 metrics periodically

                static int duplex_log_count = 0;

                if (++duplex_log_count % 100 == 1) {

                  webrtc::EchoControl::Metrics metrics = wrapper->aec3->GetMetrics();

                  log_info("AEC3 DUPLEX: ERL=%.1f ERLE=%.1f delay=%dms", metrics.echo_return_loss,

                           metrics.echo_return_loss_enhancement, metrics.delay_ms);

                  audio_analysis_set_aec3_metrics(metrics.echo_return_loss, metrics.echo_return_loss_enhancement,

                                                  metrics.delay_ms);

                }

              }

              capture_offset += capture_chunk;

            }

          }

        }

      }

    }

  }


  // Debug WAV recording (after AEC3)

  if (pipeline->debug_wav_aec3_out) {

    wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_out, processed_output, capture_count);

  }


  // Apply capture processing chain: filters, noise gate, compressor

  if (pipeline->flags.highpass) {

    highpass_filter_process_buffer(&pipeline->highpass, processed_output, capture_count);

  }

  if (pipeline->flags.lowpass) {

    lowpass_filter_process_buffer(&pipeline->lowpass, processed_output, capture_count);

  }

  if (pipeline->flags.noise_gate) {

    noise_gate_process_buffer(&pipeline->noise_gate, processed_output, capture_count);

  }

  if (pipeline->flags.compressor) {

    for (int i = 0; i < capture_count; i++) {

      float gain = compressor_process_sample(&pipeline->compressor, processed_output[i]);

      processed_output[i] *= gain;

    }

    // Apply soft clipping after compressor - threshold=0.7 gives 3dB headroom

    soft_clip_buffer(processed_output, capture_count, 0.7f, 3.0f);

  }

}


int client_audio_pipeline_jitter_margin(client_audio_pipeline_t *pipeline) {

  if (!pipeline)

    return 0;

  return pipeline->config.jitter_margin_ms;

}


void client_audio_pipeline_reset(client_audio_pipeline_t *pipeline) {

  if (!pipeline)

    return;


  // Reset global counters

  g_render_frames_fed.store(0, std::memory_order_relaxed);

  g_max_render_rms.store(0.0f, std::memory_order_relaxed);


  log_info("Pipeline state reset");

}


#ifdef __cplusplus

}

#endif

abstraction.h
🔌 Cross-platform abstraction layer umbrella header for ascii-chat

audio_analysis_set_aec3_metrics
void audio_analysis_set_aec3_metrics(double echo_return_loss, double echo_return_loss_enhancement, int delay_ms)
Set AEC3 echo cancellation metrics.
Definition analysis.c:508

analysis.h
Audio Analysis and Debugging Interface.

client_audio_pipeline_playback
int client_audio_pipeline_playback(client_audio_pipeline_t *pipeline, const uint8_t *opus_in, int opus_len, float *output, int num_samples)
Decode Opus packet and process for playback.
Definition client_audio_pipeline.cpp:452

client_audio_pipeline_create
client_audio_pipeline_t * client_audio_pipeline_create(const client_audio_pipeline_config_t *config)
Create and initialize a client audio pipeline.
Definition client_audio_pipeline.cpp:153

client_audio_pipeline_process_duplex
void client_audio_pipeline_process_duplex(client_audio_pipeline_t *pipeline, const float *render_samples, int render_count, const float *capture_samples, int capture_count, float *processed_output)
Process AEC3 inline in full-duplex callback.
Definition client_audio_pipeline.cpp:509

client_audio_pipeline_get_flags
client_audio_pipeline_flags_t client_audio_pipeline_get_flags(client_audio_pipeline_t *pipeline)
Get current component enable flags.
Definition client_audio_pipeline.cpp:414

client_audio_pipeline_jitter_margin
int client_audio_pipeline_jitter_margin(client_audio_pipeline_t *pipeline)
Get jitter buffer margin (buffered time in ms)
Definition client_audio_pipeline.cpp:664

client_audio_pipeline_get_playback_frame
int client_audio_pipeline_get_playback_frame(client_audio_pipeline_t *pipeline, float *output, int num_samples)
Get audio frame from jitter buffer for playback callback.
Definition client_audio_pipeline.cpp:482

client_audio_pipeline_default_config
client_audio_pipeline_config_t client_audio_pipeline_default_config(void)
Get default configuration.
Definition client_audio_pipeline.cpp:100

client_audio_pipeline_set_flags
void client_audio_pipeline_set_flags(client_audio_pipeline_t *pipeline, client_audio_pipeline_flags_t flags)
Set component enable flags.
Definition client_audio_pipeline.cpp:407

client_audio_pipeline_capture
int client_audio_pipeline_capture(client_audio_pipeline_t *pipeline, const float *input, int num_samples, uint8_t *opus_out, int max_opus_len)
Process captured audio and encode to Opus.
Definition client_audio_pipeline.cpp:431

client_audio_pipeline_destroy
void client_audio_pipeline_destroy(client_audio_pipeline_t *pipeline)
Destroy a client audio pipeline.
Definition client_audio_pipeline.cpp:360

client_audio_pipeline_reset
void client_audio_pipeline_reset(client_audio_pipeline_t *pipeline)
Reset pipeline state.
Definition client_audio_pipeline.cpp:673

client_audio_pipeline.h
Unified client-side audio processing pipeline.

CLIENT_AUDIO_PIPELINE_FRAME_MS
#define CLIENT_AUDIO_PIPELINE_FRAME_MS
Definition client_audio_pipeline.h:90

CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL
#define CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL
Minimal flags for testing (only codec, no processing)
Definition client_audio_pipeline.h:149

CLIENT_AUDIO_PIPELINE_FLAGS_ALL
#define CLIENT_AUDIO_PIPELINE_FLAGS_ALL
Default flags with all processing enabled.
Definition client_audio_pipeline.h:133

CLIENT_AUDIO_PIPELINE_SAMPLE_RATE
#define CLIENT_AUDIO_PIPELINE_SAMPLE_RATE
Definition client_audio_pipeline.h:87

copy_buffer_with_gain
void copy_buffer_with_gain(const float *src, float *dst, int count, float gain)
Copy buffer with gain scaling.
Definition mixer.c:1128

noise_gate_process_buffer
void noise_gate_process_buffer(noise_gate_t *gate, float *buffer, int num_samples)
Process a buffer of samples through noise gate.
Definition mixer.c:892

soft_clip_buffer
void soft_clip_buffer(float *buffer, int num_samples, float threshold, float steepness)
Apply soft clipping to a buffer.
Definition mixer.c:1032

noise_gate_init
void noise_gate_init(noise_gate_t *gate, float sample_rate)
Initialize a noise gate.
Definition mixer.c:838

soft_clip
float soft_clip(float sample, float threshold, float steepness)
Apply soft clipping to a sample.
Definition mixer.c:1019

compressor_set_params
void compressor_set_params(compressor_t *comp, float threshold_dB, float ratio, float attack_ms, float release_ms, float makeup_dB)
Set compressor parameters.
Definition mixer.c:53

compressor_init
void compressor_init(compressor_t *comp, float sample_rate)
Initialize a compressor.
Definition mixer.c:42

highpass_filter_init
void highpass_filter_init(highpass_filter_t *filter, float cutoff_hz, float sample_rate)
Initialize a high-pass filter.
Definition mixer.c:920

smoothstep
float smoothstep(float t)
Compute smoothstep interpolation.
Definition mixer.c:1046

lowpass_filter_init
void lowpass_filter_init(lowpass_filter_t *filter, float cutoff_hz, float sample_rate)
Initialize a low-pass filter.
Definition mixer.c:970

lowpass_filter_process_buffer
void lowpass_filter_process_buffer(lowpass_filter_t *filter, float *buffer, int num_samples)
Process a buffer of samples through low-pass filter.
Definition mixer.c:1005

highpass_filter_process_buffer
void highpass_filter_process_buffer(highpass_filter_t *filter, float *buffer, int num_samples)
Process a buffer of samples through high-pass filter.
Definition mixer.c:956

noise_gate_set_params
void noise_gate_set_params(noise_gate_t *gate, float threshold, float attack_ms, float release_ms, float hysteresis)
Set noise gate parameters.
Definition mixer.c:852

compressor_process_sample
float compressor_process_sample(compressor_t *comp, float sidechain)
Process a single sample through compressor.
Definition mixer.c:87

OPUS_APPLICATION_VOIP
@ OPUS_APPLICATION_VOIP
Voice over IP (optimized for speech)
Definition opus_codec.h:77

SAFE_FREE
#define SAFE_FREE(ptr)
Definition common.h:320

SAFE_CALLOC
#define SAFE_CALLOC(count, size, cast)
Definition common.h:218

uint8_t
unsigned char uint8_t
Definition common.h:56

log_warn
#define log_warn(...)
Log a WARN message.
Definition log/logging.h:484

log_error
#define log_error(...)
Log an ERROR message.
Definition log/logging.h:501

log_info
#define log_info(...)
Log an INFO message.
Definition log/logging.h:467

log_warn_every
#define log_warn_every(interval_us, fmt,...)
Rate-limited WARN logging.
Definition log/logging.h:610

platform_getenv
const char * platform_getenv(const char *name)
Get an environment variable value.

logging.h
📝 Logging API with multiple log levels and terminal output control

math.h
🔢 Mathematical Utility Functions

mixer.h
Multi-Source Audio Mixing and Processing System.

WebRTCAec3Wrapper
C++ wrapper for WebRTC AEC3 (opaque to C code)
Definition client_audio_pipeline.cpp:78

WebRTCAec3Wrapper::~WebRTCAec3Wrapper
~WebRTCAec3Wrapper()=default

WebRTCAec3Wrapper::config
webrtc::EchoCanceller3Config config
Definition client_audio_pipeline.cpp:80

WebRTCAec3Wrapper::WebRTCAec3Wrapper
WebRTCAec3Wrapper()=default

WebRTCAec3Wrapper::aec3
std::unique_ptr< webrtc::EchoControl > aec3
Definition client_audio_pipeline.cpp:79

client_audio_pipeline_config_t
Pipeline configuration parameters.
Definition client_audio_pipeline.h:167

client_audio_pipeline_config_t::jitter_margin_ms
int jitter_margin_ms
Definition client_audio_pipeline.h:186

client_audio_pipeline_config_t::sample_rate
int sample_rate
Definition client_audio_pipeline.h:169

client_audio_pipeline_config_t::lowpass_hz
float lowpass_hz
Definition client_audio_pipeline.h:191

client_audio_pipeline_config_t::flags
client_audio_pipeline_flags_t flags
Definition client_audio_pipeline.h:214

client_audio_pipeline_config_t::gate_hysteresis
float gate_hysteresis
Definition client_audio_pipeline.h:211

client_audio_pipeline_config_t::comp_attack_ms
float comp_attack_ms
Definition client_audio_pipeline.h:198

client_audio_pipeline_config_t::gate_threshold
float gate_threshold
Definition client_audio_pipeline.h:205

client_audio_pipeline_config_t::highpass_hz
float highpass_hz
Definition client_audio_pipeline.h:189

client_audio_pipeline_config_t::gate_attack_ms
float gate_attack_ms
Definition client_audio_pipeline.h:207

client_audio_pipeline_config_t::comp_threshold_db
float comp_threshold_db
Definition client_audio_pipeline.h:194

client_audio_pipeline_config_t::comp_makeup_db
float comp_makeup_db
Definition client_audio_pipeline.h:202

client_audio_pipeline_config_t::frame_size_ms
int frame_size_ms
Definition client_audio_pipeline.h:171

client_audio_pipeline_config_t::comp_release_ms
float comp_release_ms
Definition client_audio_pipeline.h:200

client_audio_pipeline_config_t::opus_bitrate
int opus_bitrate
Definition client_audio_pipeline.h:173

client_audio_pipeline_config_t::comp_ratio
float comp_ratio
Definition client_audio_pipeline.h:196

client_audio_pipeline_config_t::gate_release_ms
float gate_release_ms
Definition client_audio_pipeline.h:209

client_audio_pipeline_flags_t
Component enable/disable flags.
Definition client_audio_pipeline.h:109

client_audio_pipeline_flags_t::echo_cancel
bool echo_cancel
Definition client_audio_pipeline.h:111

client_audio_pipeline_flags_t::noise_gate
bool noise_gate
Definition client_audio_pipeline.h:123

client_audio_pipeline_flags_t::highpass
bool highpass
Definition client_audio_pipeline.h:125

client_audio_pipeline_flags_t::compressor
bool compressor
Definition client_audio_pipeline.h:121

client_audio_pipeline_flags_t::lowpass
bool lowpass
Definition client_audio_pipeline.h:127

client_audio_pipeline_t
Client audio pipeline state.
Definition client_audio_pipeline.h:229

client_audio_pipeline_t::config
client_audio_pipeline_config_t config
Definition client_audio_pipeline.h:262

client_audio_pipeline_t::capture_fadein_remaining
int capture_fadein_remaining
Definition client_audio_pipeline.h:280

client_audio_pipeline_t::aec3_render_buffer
void * aec3_render_buffer
Definition client_audio_pipeline.h:290

client_audio_pipeline_t::lowpass
lowpass_filter_t lowpass
Definition client_audio_pipeline.h:259

client_audio_pipeline_t::decoder
OpusDecoder * decoder
Definition client_audio_pipeline.h:248

client_audio_pipeline_t::noise_gate
noise_gate_t noise_gate
Definition client_audio_pipeline.h:253

client_audio_pipeline_t::debug_wav_aec3_out
void * debug_wav_aec3_out
Definition client_audio_pipeline.h:284

client_audio_pipeline_t::echo_canceller
void * echo_canceller
Definition client_audio_pipeline.h:234

client_audio_pipeline_t::flags
client_audio_pipeline_flags_t flags
Definition client_audio_pipeline.h:231

client_audio_pipeline_t::encoder
OpusEncoder * encoder
Definition client_audio_pipeline.h:246

client_audio_pipeline_t::initialized
bool initialized
Definition client_audio_pipeline.h:277

client_audio_pipeline_t::compressor
compressor_t compressor
Definition client_audio_pipeline.h:251

client_audio_pipeline_t::debug_wav_aec3_in
void * debug_wav_aec3_in
Definition client_audio_pipeline.h:283

client_audio_pipeline_t::playback_noise_gate
noise_gate_t playback_noise_gate
Definition client_audio_pipeline.h:255

client_audio_pipeline_t::frame_size
int frame_size
Definition client_audio_pipeline.h:265

client_audio_pipeline_t::highpass
highpass_filter_t highpass
Definition client_audio_pipeline.h:257

client_audio_pipeline_t::aec3_capture_buffer
void * aec3_capture_buffer
Definition client_audio_pipeline.h:291

wav_writer_t
WAV file writer context.
Definition wav_writer.h:23

wav_writer_open
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Open WAV file for writing.
Definition wav_writer.c:39

wav_writer_write
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Write audio samples to WAV file.
Definition wav_writer.c:85

wav_writer_close
void wav_writer_close(wav_writer_t *writer)
Close WAV file and finalize header.
Definition wav_writer.c:99

wav_writer.h
Simple WAV file writer for audio debugging.