ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
client_audio_pipeline.cpp File Reference

Unified client-side audio processing pipeline with WebRTC AEC3. More...

Go to the source code of this file.

Data Structures

struct  WebRTCAec3Wrapper
 C++ wrapper for WebRTC AEC3 (opaque to C code) More...
 

Macros

#define WEBRTC_APM_DEBUG_DUMP   0
 
#define WEBRTC_MODULE_AUDIO_PROCESSING   1
 
#define __STDC_NO_ATOMICS__   1
 

Functions

client_audio_pipeline_config_t client_audio_pipeline_default_config (void)
 Get default configuration.
 
client_audio_pipeline_tclient_audio_pipeline_create (const client_audio_pipeline_config_t *config)
 Create and initialize a client audio pipeline.
 
void client_audio_pipeline_destroy (client_audio_pipeline_t *pipeline)
 Destroy a client audio pipeline.
 
void client_audio_pipeline_set_flags (client_audio_pipeline_t *pipeline, client_audio_pipeline_flags_t flags)
 Set component enable flags.
 
client_audio_pipeline_flags_t client_audio_pipeline_get_flags (client_audio_pipeline_t *pipeline)
 Get current component enable flags.
 
int client_audio_pipeline_capture (client_audio_pipeline_t *pipeline, const float *input, int num_samples, uint8_t *opus_out, int max_opus_len)
 Process captured audio and encode to Opus.
 
int client_audio_pipeline_playback (client_audio_pipeline_t *pipeline, const uint8_t *opus_in, int opus_len, float *output, int num_samples)
 Decode Opus packet and process for playback.
 
int client_audio_pipeline_get_playback_frame (client_audio_pipeline_t *pipeline, float *output, int num_samples)
 Get audio frame from jitter buffer for playback callback.
 
void client_audio_pipeline_process_duplex (client_audio_pipeline_t *pipeline, const float *render_samples, int render_count, const float *capture_samples, int capture_count, float *processed_output)
 Process AEC3 inline in full-duplex callback.
 
int client_audio_pipeline_jitter_margin (client_audio_pipeline_t *pipeline)
 Get jitter buffer margin (buffered time in ms)
 
void client_audio_pipeline_reset (client_audio_pipeline_t *pipeline)
 Reset pipeline state.
 

Detailed Description

Unified client-side audio processing pipeline with WebRTC AEC3.

Implements production-grade echo cancellation using WebRTC AEC3 (Acoustic Echo Cancellation v3) with automatic network delay estimation, adaptive filtering, and residual echo suppression.

Uses WebRTC directly via C++ API - no wrapper layer.

Definition in file client_audio_pipeline.cpp.

Macro Definition Documentation

◆ __STDC_NO_ATOMICS__

#define __STDC_NO_ATOMICS__   1

Definition at line 66 of file client_audio_pipeline.cpp.

◆ WEBRTC_APM_DEBUG_DUMP

#define WEBRTC_APM_DEBUG_DUMP   0

Definition at line 20 of file client_audio_pipeline.cpp.

◆ WEBRTC_MODULE_AUDIO_PROCESSING

#define WEBRTC_MODULE_AUDIO_PROCESSING   1

Definition at line 21 of file client_audio_pipeline.cpp.

Function Documentation

◆ client_audio_pipeline_capture()

int client_audio_pipeline_capture ( client_audio_pipeline_t pipeline,
const float *  input,
int  num_samples,
uint8_t opus_out,
int  max_opus_len 
)

Process captured audio and encode to Opus.

Encode already-processed audio to Opus.

In full-duplex mode, AEC3 and DSP processing are done in process_duplex(). This function just does Opus encoding.

Definition at line 431 of file client_audio_pipeline.cpp.

432 {
433 if (!pipeline || !input || !opus_out || num_samples != pipeline->frame_size) {
434 return -1;
435 }
436
437 // Input is already processed by process_duplex() in full-duplex mode.
438 // Just encode with Opus.
439 int opus_len = opus_encode_float(pipeline->encoder, input, num_samples, opus_out, max_opus_len);
440
441 if (opus_len < 0) {
442 log_error("Opus encoding failed: %d", opus_len);
443 return -1;
444 }
445
446 return opus_len;
447}
#define log_error(...)
Log an ERROR message.

References client_audio_pipeline_t::encoder, client_audio_pipeline_t::frame_size, and log_error.

◆ client_audio_pipeline_create()

client_audio_pipeline_t * client_audio_pipeline_create ( const client_audio_pipeline_config_t config)

Create and initialize a client audio pipeline.

Create a new client audio pipeline.

This function:

  • Allocates the pipeline structure
  • Initializes Opus encoder/decoder
  • Sets up WebRTC AEC3 echo cancellation
  • Configures all audio processing parameters

Definition at line 153 of file client_audio_pipeline.cpp.

153 {
155 if (!p) {
156 log_error("Failed to allocate client audio pipeline");
157 return NULL;
158 }
159
160 // Use default config if none provided
161 if (config) {
162 p->config = *config;
163 } else {
165 }
166
167 p->flags = p->config.flags;
169
170 // No mutex needed - full-duplex means single callback thread handles all AEC3
171
172 // Initialize Opus encoder/decoder first (no exceptions)
173 int opus_error = 0;
174 p->encoder = opus_encoder_create(p->config.sample_rate, 1, OPUS_APPLICATION_VOIP, &opus_error);
175 if (!p->encoder || opus_error != OPUS_OK) {
176 log_error("Failed to create Opus encoder: %d", opus_error);
177 goto error;
178 }
179 opus_encoder_ctl(p->encoder, OPUS_SET_BITRATE(p->config.opus_bitrate));
180
181 // CRITICAL: Disable DTX (Discontinuous Transmission) to prevent "beeps"
182 // DTX stops sending frames during silence, causing audible clicks/beeps when audio resumes
183 opus_encoder_ctl(p->encoder, OPUS_SET_DTX(0));
184
185 // Create Opus decoder
186 p->decoder = opus_decoder_create(p->config.sample_rate, 1, &opus_error);
187 if (!p->decoder || opus_error != OPUS_OK) {
188 log_error("Failed to create Opus decoder: %d", opus_error);
189 goto error;
190 }
191
192 // Create WebRTC AEC3 Echo Cancellation
193 // AEC3 provides production-grade acoustic echo cancellation with:
194 // - Automatic network delay estimation (0-500ms)
195 // - Adaptive filtering to actual echo path
196 // - Residual echo suppression via spectral subtraction
197 // - Jitter buffer handling via side information
198 if (p->flags.echo_cancel) {
199 // Configure AEC3 for better low-frequency (bass) echo cancellation
200 webrtc::EchoCanceller3Config aec3_config;
201
202 // Increase filter length for bass frequencies (default 13 blocks = ~17ms)
203 // Bass at 80Hz has 12.5ms period, so we need at least 50+ blocks (~67ms)
204 // to properly model the echo path for low frequencies
205 aec3_config.filter.main.length_blocks = 50; // ~67ms (was 13)
206 aec3_config.filter.shadow.length_blocks = 50; // ~67ms (was 13)
207 aec3_config.filter.main_initial.length_blocks = 25; // ~33ms (was 12)
208 aec3_config.filter.shadow_initial.length_blocks = 25; // ~33ms (was 12)
209
210 // More aggressive low-frequency suppression thresholds
211 // Lower values = more aggressive echo suppression
212 aec3_config.echo_audibility.audibility_threshold_lf = 5; // (was 10)
213
214 // Create AEC3 using the factory
215 auto factory = webrtc::EchoCanceller3Factory(aec3_config);
216
217 std::unique_ptr<webrtc::EchoControl> echo_control = factory.Create(static_cast<int>(p->config.sample_rate), // 48kHz
218 1, // num_render_channels (speaker output)
219 1 // num_capture_channels (microphone input)
220 );
221
222 if (!echo_control) {
223 log_warn("Failed to create WebRTC AEC3 instance - echo cancellation unavailable");
224 p->echo_canceller = NULL;
225 } else {
226 // Successfully created AEC3 - wrap in our C++ wrapper for C compatibility
227 auto wrapper = new WebRTCAec3Wrapper();
228 wrapper->aec3 = std::move(echo_control);
229 wrapper->config = aec3_config;
230 p->echo_canceller = wrapper;
231
232 log_info("✓ WebRTC AEC3 initialized (67ms filter for bass, adaptive delay)");
233
234 // Create persistent AudioBuffer instances for AEC3
235 p->aec3_render_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
236 p->aec3_capture_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
237
238 auto *render_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_render_buffer);
239 auto *capture_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_capture_buffer);
240
241 // Zero-initialize channel data
242 float *const *render_ch = render_buf->channels();
243 float *const *capture_ch = capture_buf->channels();
244 if (render_ch && render_ch[0]) {
245 memset(render_ch[0], 0, 480 * sizeof(float)); // 10ms at 48kHz
246 }
247 if (capture_ch && capture_ch[0]) {
248 memset(capture_ch[0], 0, 480 * sizeof(float));
249 }
250
251 // Prime filterbank state with dummy processing cycle
252 render_buf->SplitIntoFrequencyBands();
253 render_buf->MergeFrequencyBands();
254 capture_buf->SplitIntoFrequencyBands();
255 capture_buf->MergeFrequencyBands();
256
257 log_info(" - AudioBuffer filterbank state initialized");
258
259 // Warm up AEC3 with 10 silent frames to initialize internal state
260 for (int warmup = 0; warmup < 10; warmup++) {
261 memset(render_ch[0], 0, 480 * sizeof(float));
262 memset(capture_ch[0], 0, 480 * sizeof(float));
263
264 render_buf->SplitIntoFrequencyBands();
265 wrapper->aec3->AnalyzeRender(render_buf);
266 render_buf->MergeFrequencyBands();
267
268 wrapper->aec3->AnalyzeCapture(capture_buf);
269 capture_buf->SplitIntoFrequencyBands();
270 wrapper->aec3->SetAudioBufferDelay(0);
271 wrapper->aec3->ProcessCapture(capture_buf, false);
272 capture_buf->MergeFrequencyBands();
273 }
274 log_info(" - AEC3 warmed up with 10 silent frames");
275 log_info(" - Persistent AudioBuffer instances created");
276 }
277 }
278
279 // Initialize debug WAV writers for AEC3 analysis (if echo_cancel enabled)
280 p->debug_wav_aec3_in = NULL;
281 p->debug_wav_aec3_out = NULL;
282 if (p->flags.echo_cancel) {
283 // Open WAV files to capture AEC3 input and output
284 p->debug_wav_aec3_in = wav_writer_open("/tmp/aec3_input.wav", 48000, 1);
285 p->debug_wav_aec3_out = wav_writer_open("/tmp/aec3_output.wav", 48000, 1);
286 if (p->debug_wav_aec3_in) {
287 log_info("Debug: Recording AEC3 input to /tmp/aec3_input.wav");
288 }
289 if (p->debug_wav_aec3_out) {
290 log_info("Debug: Recording AEC3 output to /tmp/aec3_output.wav");
291 }
292
293 log_info("✓ AEC3 echo cancellation enabled (full-duplex mode, no ring buffer delay)");
294 }
295
296 // Initialize audio processing components (compressor, noise gate, filters)
297 // These are applied in the capture path after AEC3 and before Opus encoding
298 {
299 float sample_rate = (float)p->config.sample_rate;
300
301 // Initialize compressor with config values
302 compressor_init(&p->compressor, sample_rate);
305 log_info("✓ Capture compressor: threshold=%.1fdB, ratio=%.1f:1, makeup=+%.1fdB", p->config.comp_threshold_db,
307
308 // Initialize noise gate with config values
309 noise_gate_init(&p->noise_gate, sample_rate);
312 log_info("✓ Capture noise gate: threshold=%.4f (%.1fdB)", p->config.gate_threshold,
313 20.0f * log10f(p->config.gate_threshold + 1e-10f));
314
315 // Initialize PLAYBACK noise gate - cuts quiet received audio before speakers
316 // Very low threshold - only cut actual silence, not quiet voice audio
317 // The server sends audio with RMS=0.01-0.02, so threshold must be below that
318 noise_gate_init(&p->playback_noise_gate, sample_rate);
320 0.002f, // -54dB threshold - only cut near-silence
321 1.0f, // 1ms attack - fast open
322 50.0f, // 50ms release - smooth close
323 0.4f); // Hysteresis
324 log_info("✓ Playback noise gate: threshold=0.002 (-54dB)");
325
326 // Initialize highpass filter (removes low-frequency rumble)
327 highpass_filter_init(&p->highpass, p->config.highpass_hz, sample_rate);
328 log_info("✓ Capture highpass filter: %.1f Hz", p->config.highpass_hz);
329
330 // Initialize lowpass filter (removes high-frequency hiss)
331 lowpass_filter_init(&p->lowpass, p->config.lowpass_hz, sample_rate);
332 log_info("✓ Capture lowpass filter: %.1f Hz", p->config.lowpass_hz);
333 }
334
335 p->initialized = true;
336
337 // Initialize startup fade-in to prevent initial microphone click
338 // 200ms at 48kHz = 9600 samples - gradual ramp from silence to full volume
339 // Longer fade-in (200ms vs 50ms) gives much smoother transition without audible pop
340 p->capture_fadein_remaining = (p->config.sample_rate * 200) / 1000; // 200ms worth of samples
341 log_info("✓ Capture fade-in: %d samples (200ms)", p->capture_fadein_remaining);
342
343 log_info("Audio pipeline created: %dHz, %dms frames, %dkbps Opus", p->config.sample_rate, p->config.frame_size_ms,
344 p->config.opus_bitrate / 1000);
345
346 return p;
347
348error:
349 if (p->encoder)
350 opus_encoder_destroy(p->encoder);
351 if (p->decoder)
352 opus_decoder_destroy(p->decoder);
353 if (p->echo_canceller) {
354 delete static_cast<WebRTCAec3Wrapper *>(p->echo_canceller);
355 }
356 SAFE_FREE(p);
357 return NULL;
358}
client_audio_pipeline_config_t client_audio_pipeline_default_config(void)
Get default configuration.
void noise_gate_init(noise_gate_t *gate, float sample_rate)
Initialize a noise gate.
Definition mixer.c:838
void compressor_set_params(compressor_t *comp, float threshold_dB, float ratio, float attack_ms, float release_ms, float makeup_dB)
Set compressor parameters.
Definition mixer.c:53
void compressor_init(compressor_t *comp, float sample_rate)
Initialize a compressor.
Definition mixer.c:42
void highpass_filter_init(highpass_filter_t *filter, float cutoff_hz, float sample_rate)
Initialize a high-pass filter.
Definition mixer.c:920
void lowpass_filter_init(lowpass_filter_t *filter, float cutoff_hz, float sample_rate)
Initialize a low-pass filter.
Definition mixer.c:970
void noise_gate_set_params(noise_gate_t *gate, float threshold, float attack_ms, float release_ms, float hysteresis)
Set noise gate parameters.
Definition mixer.c:852
@ OPUS_APPLICATION_VOIP
Voice over IP (optimized for speech)
Definition opus_codec.h:77
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_CALLOC(count, size, cast)
Definition common.h:218
#define log_warn(...)
Log a WARN message.
#define log_info(...)
Log an INFO message.
C++ wrapper for WebRTC AEC3 (opaque to C code)
client_audio_pipeline_flags_t flags
Client audio pipeline state.
client_audio_pipeline_config_t config
client_audio_pipeline_flags_t flags
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Open WAV file for writing.
Definition wav_writer.c:39

References client_audio_pipeline_t::aec3_capture_buffer, client_audio_pipeline_t::aec3_render_buffer, client_audio_pipeline_t::capture_fadein_remaining, client_audio_pipeline_default_config(), client_audio_pipeline_config_t::comp_attack_ms, client_audio_pipeline_config_t::comp_makeup_db, client_audio_pipeline_config_t::comp_ratio, client_audio_pipeline_config_t::comp_release_ms, client_audio_pipeline_config_t::comp_threshold_db, client_audio_pipeline_t::compressor, compressor_init(), compressor_set_params(), client_audio_pipeline_t::config, client_audio_pipeline_t::debug_wav_aec3_in, client_audio_pipeline_t::debug_wav_aec3_out, client_audio_pipeline_t::decoder, client_audio_pipeline_flags_t::echo_cancel, client_audio_pipeline_t::echo_canceller, client_audio_pipeline_t::encoder, client_audio_pipeline_config_t::flags, client_audio_pipeline_t::flags, client_audio_pipeline_t::frame_size, client_audio_pipeline_config_t::frame_size_ms, client_audio_pipeline_config_t::gate_attack_ms, client_audio_pipeline_config_t::gate_hysteresis, client_audio_pipeline_config_t::gate_release_ms, client_audio_pipeline_config_t::gate_threshold, client_audio_pipeline_t::highpass, highpass_filter_init(), client_audio_pipeline_config_t::highpass_hz, client_audio_pipeline_t::initialized, log_error, log_info, log_warn, client_audio_pipeline_t::lowpass, lowpass_filter_init(), client_audio_pipeline_config_t::lowpass_hz, client_audio_pipeline_t::noise_gate, noise_gate_init(), noise_gate_set_params(), OPUS_APPLICATION_VOIP, client_audio_pipeline_config_t::opus_bitrate, client_audio_pipeline_t::playback_noise_gate, SAFE_CALLOC, SAFE_FREE, client_audio_pipeline_config_t::sample_rate, and wav_writer_open().

Referenced by audio_client_init().

◆ client_audio_pipeline_default_config()

client_audio_pipeline_config_t client_audio_pipeline_default_config ( void  )

Get default configuration.

Returns
Configuration with sensible defaults for voice chat

Definition at line 100 of file client_audio_pipeline.cpp.

100 {
103 .frame_size_ms = CLIENT_AUDIO_PIPELINE_FRAME_MS,
104 .opus_bitrate = 24000,
105
106 .echo_filter_ms = 250,
107
108 .noise_suppress_db = -25,
109 .agc_level = 8000,
110 .agc_max_gain = 30,
111
112 // Jitter margin: wait this long before starting playback
113 // Lower = less latency but more risk of underruns
114 // CRITICAL: Must match AUDIO_JITTER_BUFFER_THRESHOLD in ringbuffer.h!
115 .jitter_margin_ms = 20, // 20ms = 1 Opus packet (optimized for LAN)
116
117 // Higher cutoff to cut low-frequency rumble and feedback
118 .highpass_hz = 150.0f, // Was 80Hz, increased to break rumble feedback loop
119 .lowpass_hz = 8000.0f,
120
121 // Compressor: only compress loud peaks, minimal makeup to avoid clipping
122 // User reported clipping with +6dB makeup gain
123 .comp_threshold_db = -6.0f, // Only compress peaks above -6dB
124 .comp_ratio = 3.0f, // Gentler 3:1 ratio
125 .comp_attack_ms = 5.0f, // Fast attack for peaks
126 .comp_release_ms = 150.0f, // Slower release
127 .comp_makeup_db = 2.0f, // Reduced from 6dB to prevent clipping
128
129 // Noise gate: VERY aggressive to cut quiet background audio completely
130 // User feedback: "don't amplify or play quiet background audio at all"
131 .gate_threshold = 0.08f, // -22dB threshold (was 0.02/-34dB) - cuts quiet audio hard
132 .gate_attack_ms = 0.5f, // Very fast attack
133 .gate_release_ms = 30.0f, // Fast release (was 50ms)
134 .gate_hysteresis = 0.3f, // Tighter hysteresis = stays closed longer
135
137 };
138}
#define CLIENT_AUDIO_PIPELINE_FRAME_MS
#define CLIENT_AUDIO_PIPELINE_FLAGS_ALL
Default flags with all processing enabled.
#define CLIENT_AUDIO_PIPELINE_SAMPLE_RATE
Pipeline configuration parameters.

References CLIENT_AUDIO_PIPELINE_FLAGS_ALL, CLIENT_AUDIO_PIPELINE_FRAME_MS, CLIENT_AUDIO_PIPELINE_SAMPLE_RATE, and client_audio_pipeline_config_t::sample_rate.

Referenced by audio_client_init(), and client_audio_pipeline_create().

◆ client_audio_pipeline_destroy()

void client_audio_pipeline_destroy ( client_audio_pipeline_t pipeline)

Destroy a client audio pipeline.

Parameters
pipelinePipeline to destroy (can be NULL)

Frees all resources including SpeexDSP states, Opus codec, and all work buffers.

Definition at line 360 of file client_audio_pipeline.cpp.

360 {
361 if (!pipeline)
362 return;
363
364 // Clean up WebRTC AEC3 AudioBuffer instances
365 if (pipeline->aec3_render_buffer) {
366 delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);
367 pipeline->aec3_render_buffer = NULL;
368 }
369 if (pipeline->aec3_capture_buffer) {
370 delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);
371 pipeline->aec3_capture_buffer = NULL;
372 }
373
374 // Clean up WebRTC AEC3
375 if (pipeline->echo_canceller) {
376 delete static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);
377 pipeline->echo_canceller = NULL;
378 }
379
380 // Clean up Opus
381 if (pipeline->encoder) {
382 opus_encoder_destroy(pipeline->encoder);
383 pipeline->encoder = NULL;
384 }
385 if (pipeline->decoder) {
386 opus_decoder_destroy(pipeline->decoder);
387 pipeline->decoder = NULL;
388 }
389
390 // Clean up debug WAV writers
391 if (pipeline->debug_wav_aec3_in) {
393 pipeline->debug_wav_aec3_in = NULL;
394 }
395 if (pipeline->debug_wav_aec3_out) {
397 pipeline->debug_wav_aec3_out = NULL;
398 }
399
400 SAFE_FREE(pipeline);
401}
WAV file writer context.
Definition wav_writer.h:23
void wav_writer_close(wav_writer_t *writer)
Close WAV file and finalize header.
Definition wav_writer.c:99

References client_audio_pipeline_t::aec3_capture_buffer, client_audio_pipeline_t::aec3_render_buffer, client_audio_pipeline_t::debug_wav_aec3_in, client_audio_pipeline_t::debug_wav_aec3_out, client_audio_pipeline_t::decoder, client_audio_pipeline_t::echo_canceller, client_audio_pipeline_t::encoder, SAFE_FREE, and wav_writer_close().

Referenced by audio_cleanup(), and audio_client_init().

◆ client_audio_pipeline_get_flags()

client_audio_pipeline_flags_t client_audio_pipeline_get_flags ( client_audio_pipeline_t pipeline)

Get current component enable flags.

Parameters
pipelinePipeline instance
Returns
Current flags

Definition at line 414 of file client_audio_pipeline.cpp.

414 {
415 if (!pipeline)
417 // No mutex needed - flags are only written from main thread during setup
418 return pipeline->flags;
419}
#define CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL
Minimal flags for testing (only codec, no processing)

References CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL, and client_audio_pipeline_t::flags.

◆ client_audio_pipeline_get_playback_frame()

int client_audio_pipeline_get_playback_frame ( client_audio_pipeline_t pipeline,
float *  output,
int  num_samples 
)

Get audio frame from jitter buffer for playback callback.

Get a processed playback frame (currently just returns decoded frame)

Definition at line 482 of file client_audio_pipeline.cpp.

482 {
483 if (!pipeline || !output) {
484 return -1;
485 }
486
487 // No mutex needed - this is a placeholder
488 memset(output, 0, num_samples * sizeof(float));
489 return num_samples;
490}

◆ client_audio_pipeline_jitter_margin()

int client_audio_pipeline_jitter_margin ( client_audio_pipeline_t pipeline)

Get jitter buffer margin (buffered time in ms)

Get jitter buffer margin

Definition at line 664 of file client_audio_pipeline.cpp.

664 {
665 if (!pipeline)
666 return 0;
667 return pipeline->config.jitter_margin_ms;
668}

References client_audio_pipeline_t::config, and client_audio_pipeline_config_t::jitter_margin_ms.

◆ client_audio_pipeline_playback()

int client_audio_pipeline_playback ( client_audio_pipeline_t pipeline,
const uint8_t opus_in,
int  opus_len,
float *  output,
int  num_samples 
)

Decode Opus packet and process for playback.

Process network playback (decode and register with echo canceller as reference)

Definition at line 452 of file client_audio_pipeline.cpp.

453 {
454 if (!pipeline || !opus_in || !output) {
455 return -1;
456 }
457
458 // No mutex needed - Opus decoder is only used from this thread
459
460 // Decode Opus
461 int decoded_samples = opus_decode_float(pipeline->decoder, opus_in, opus_len, output, num_samples, 0);
462
463 if (decoded_samples < 0) {
464 log_error("Opus decoding failed: %d", decoded_samples);
465 return -1;
466 }
467
468 // Apply playback noise gate - cut quiet background audio before it reaches speakers
469 if (decoded_samples > 0) {
470 noise_gate_process_buffer(&pipeline->playback_noise_gate, output, decoded_samples);
471 }
472
473 // NOTE: Render signal is queued to AEC3 in output_callback() when audio plays,
474 // not here. The capture thread drains the queue and processes AEC3.
475
476 return decoded_samples;
477}
void noise_gate_process_buffer(noise_gate_t *gate, float *buffer, int num_samples)
Process a buffer of samples through noise gate.
Definition mixer.c:892

References client_audio_pipeline_t::decoder, log_error, noise_gate_process_buffer(), and client_audio_pipeline_t::playback_noise_gate.

Referenced by audio_decode_opus().

◆ client_audio_pipeline_process_duplex()

void client_audio_pipeline_process_duplex ( client_audio_pipeline_t pipeline,
const float *  render_samples,
int  render_count,
const float *  capture_samples,
int  capture_count,
float *  processed_output 
)

Process AEC3 inline in full-duplex callback.

Process AEC3 inline in full-duplex callback (REAL-TIME SAFE).

This is the PROFESSIONAL approach to AEC3 timing:

  • Called from a single PortAudio full-duplex callback
  • render_samples = what is being played to speakers RIGHT NOW
  • capture_samples = what microphone captured RIGHT NOW
  • Perfect synchronization - no timing mismatch possible

This function does ALL AEC3 processing inline:

  1. AnalyzeRender on render samples (speaker output)
  2. AnalyzeCapture + ProcessCapture on capture samples
  3. Apply filters, noise gate, compressor

Returns processed capture samples in processed_output. Opus encoding is done separately by the encoding thread.

Definition at line 509 of file client_audio_pipeline.cpp.

511 {
512 if (!pipeline || !processed_output)
513 return;
514
515 // Copy capture samples to output buffer for processing
516 if (capture_samples && capture_count > 0) {
517 memcpy(processed_output, capture_samples, capture_count * sizeof(float));
518 } else {
519 memset(processed_output, 0, capture_count * sizeof(float));
520 return;
521 }
522
523 // Check for AEC3 bypass
524 static int bypass_aec3 = -1;
525 if (bypass_aec3 == -1) {
526 const char *env = platform_getenv("BYPASS_AEC3");
527 bypass_aec3 = (env && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0)) ? 1 : 0;
528 if (bypass_aec3) {
529 log_warn("AEC3 BYPASSED (full-duplex mode) via BYPASS_AEC3=1");
530 }
531 }
532
533 // Debug WAV recording
534 if (pipeline->debug_wav_aec3_in) {
535 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_in, capture_samples, capture_count);
536 }
537
538 // Apply startup fade-in using smoothstep curve
539 if (pipeline->capture_fadein_remaining > 0) {
540 const int total_fadein_samples = (pipeline->config.sample_rate * 200) / 1000;
541 for (int i = 0; i < capture_count && pipeline->capture_fadein_remaining > 0; i++) {
542 float progress = 1.0f - ((float)pipeline->capture_fadein_remaining / (float)total_fadein_samples);
543 float gain = smoothstep(progress);
544 processed_output[i] *= gain;
545 pipeline->capture_fadein_remaining--;
546 }
547 }
548
549 // WebRTC AEC3 processing - INLINE, no ring buffer, no mutex
550 if (!bypass_aec3 && pipeline->flags.echo_cancel && pipeline->echo_canceller) {
551 auto wrapper = static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);
552 if (wrapper && wrapper->aec3) {
553 const int webrtc_frame_size = 480; // 10ms at 48kHz
554
555 auto *render_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);
556 auto *capture_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);
557
558 if (render_buf && capture_buf) {
559 float *const *render_channels = render_buf->channels();
560 float *const *capture_channels = capture_buf->channels();
561
562 if (render_channels && render_channels[0] && capture_channels && capture_channels[0]) {
563 // Verify render_samples is valid before accessing
564 if (!render_samples && render_count > 0) {
565 log_warn_every(1000000, "AEC3: render_samples is NULL but render_count=%d", render_count);
566 return;
567 }
568
569 // Process in 10ms chunks (AEC3 requirement)
570 int render_offset = 0;
571 int capture_offset = 0;
572
573 while (capture_offset < capture_count || render_offset < render_count) {
574 // STEP 1: Feed render signal (what's playing to speakers)
575 // In full-duplex, this is THE EXACT audio being played RIGHT NOW
576 if (render_samples && render_offset < render_count) {
577 int render_chunk = (render_offset + webrtc_frame_size <= render_count) ? webrtc_frame_size
578 : (render_count - render_offset);
579 if (render_chunk == webrtc_frame_size) {
580 // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]
581 copy_buffer_with_gain(&render_samples[render_offset], render_channels[0], webrtc_frame_size, 32768.0f);
582 render_buf->SplitIntoFrequencyBands();
583 wrapper->aec3->AnalyzeRender(render_buf);
584 render_buf->MergeFrequencyBands();
585 g_render_frames_fed.fetch_add(1, std::memory_order_relaxed);
586 }
587 render_offset += render_chunk;
588 }
589
590 // STEP 2: Process capture (microphone input)
591 if (capture_offset < capture_count) {
592 int capture_chunk = (capture_offset + webrtc_frame_size <= capture_count)
593 ? webrtc_frame_size
594 : (capture_count - capture_offset);
595 if (capture_chunk == webrtc_frame_size) {
596 // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]
597 copy_buffer_with_gain(&processed_output[capture_offset], capture_channels[0], webrtc_frame_size,
598 32768.0f);
599
600 // AEC3 sequence: AnalyzeCapture, split, ProcessCapture, merge
601 wrapper->aec3->AnalyzeCapture(capture_buf);
602 capture_buf->SplitIntoFrequencyBands();
603
604 // NOTE: SetAudioBufferDelay() is just an initial hint when use_external_delay_estimator=false
605 // (default). AEC3's internal delay estimator will find the actual delay (~144ms in practice). We don't
606 // call it here - let AEC3 estimate delay automatically.
607
608 wrapper->aec3->ProcessCapture(capture_buf, false);
609 capture_buf->MergeFrequencyBands();
610
611 // Scale back to float range and apply soft clip to prevent distortion
612 // Use gentle soft_clip (threshold=0.6, steepness=2.5) to leave headroom for compressor
613 for (int j = 0; j < webrtc_frame_size; j++) {
614 float sample = capture_channels[0][j] / 32768.0f;
615 processed_output[capture_offset + j] = soft_clip(sample, 0.6f, 2.5f);
616 }
617
618 // Log AEC3 metrics periodically
619 static int duplex_log_count = 0;
620 if (++duplex_log_count % 100 == 1) {
621 webrtc::EchoControl::Metrics metrics = wrapper->aec3->GetMetrics();
622 log_info("AEC3 DUPLEX: ERL=%.1f ERLE=%.1f delay=%dms", metrics.echo_return_loss,
623 metrics.echo_return_loss_enhancement, metrics.delay_ms);
624 audio_analysis_set_aec3_metrics(metrics.echo_return_loss, metrics.echo_return_loss_enhancement,
625 metrics.delay_ms);
626 }
627 }
628 capture_offset += capture_chunk;
629 }
630 }
631 }
632 }
633 }
634 }
635
636 // Debug WAV recording (after AEC3)
637 if (pipeline->debug_wav_aec3_out) {
638 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_out, processed_output, capture_count);
639 }
640
641 // Apply capture processing chain: filters, noise gate, compressor
642 if (pipeline->flags.highpass) {
643 highpass_filter_process_buffer(&pipeline->highpass, processed_output, capture_count);
644 }
645 if (pipeline->flags.lowpass) {
646 lowpass_filter_process_buffer(&pipeline->lowpass, processed_output, capture_count);
647 }
648 if (pipeline->flags.noise_gate) {
649 noise_gate_process_buffer(&pipeline->noise_gate, processed_output, capture_count);
650 }
651 if (pipeline->flags.compressor) {
652 for (int i = 0; i < capture_count; i++) {
653 float gain = compressor_process_sample(&pipeline->compressor, processed_output[i]);
654 processed_output[i] *= gain;
655 }
656 // Apply soft clipping after compressor - threshold=0.7 gives 3dB headroom
657 soft_clip_buffer(processed_output, capture_count, 0.7f, 3.0f);
658 }
659}
void audio_analysis_set_aec3_metrics(double echo_return_loss, double echo_return_loss_enhancement, int delay_ms)
Set AEC3 echo cancellation metrics.
Definition analysis.c:508
void copy_buffer_with_gain(const float *src, float *dst, int count, float gain)
Copy buffer with gain scaling.
Definition mixer.c:1128
void soft_clip_buffer(float *buffer, int num_samples, float threshold, float steepness)
Apply soft clipping to a buffer.
Definition mixer.c:1032
float soft_clip(float sample, float threshold, float steepness)
Apply soft clipping to a sample.
Definition mixer.c:1019
float smoothstep(float t)
Compute smoothstep interpolation.
Definition mixer.c:1046
void lowpass_filter_process_buffer(lowpass_filter_t *filter, float *buffer, int num_samples)
Process a buffer of samples through low-pass filter.
Definition mixer.c:1005
void highpass_filter_process_buffer(highpass_filter_t *filter, float *buffer, int num_samples)
Process a buffer of samples through high-pass filter.
Definition mixer.c:956
float compressor_process_sample(compressor_t *comp, float sidechain)
Process a single sample through compressor.
Definition mixer.c:87
#define log_warn_every(interval_us, fmt,...)
Rate-limited WARN logging.
const char * platform_getenv(const char *name)
Get an environment variable value.
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Write audio samples to WAV file.
Definition wav_writer.c:85

References client_audio_pipeline_t::aec3_capture_buffer, client_audio_pipeline_t::aec3_render_buffer, audio_analysis_set_aec3_metrics(), client_audio_pipeline_t::capture_fadein_remaining, client_audio_pipeline_flags_t::compressor, client_audio_pipeline_t::compressor, compressor_process_sample(), client_audio_pipeline_t::config, copy_buffer_with_gain(), client_audio_pipeline_t::debug_wav_aec3_in, client_audio_pipeline_t::debug_wav_aec3_out, client_audio_pipeline_flags_t::echo_cancel, client_audio_pipeline_t::echo_canceller, client_audio_pipeline_t::flags, client_audio_pipeline_flags_t::highpass, client_audio_pipeline_t::highpass, highpass_filter_process_buffer(), log_info, log_warn, log_warn_every, client_audio_pipeline_flags_t::lowpass, client_audio_pipeline_t::lowpass, lowpass_filter_process_buffer(), client_audio_pipeline_flags_t::noise_gate, client_audio_pipeline_t::noise_gate, noise_gate_process_buffer(), platform_getenv(), client_audio_pipeline_config_t::sample_rate, smoothstep(), soft_clip(), soft_clip_buffer(), and wav_writer_write().

◆ client_audio_pipeline_reset()

void client_audio_pipeline_reset ( client_audio_pipeline_t pipeline)

Reset pipeline state.

Reset pipeline state

Definition at line 673 of file client_audio_pipeline.cpp.

673 {
674 if (!pipeline)
675 return;
676
677 // Reset global counters
678 g_render_frames_fed.store(0, std::memory_order_relaxed);
679 g_max_render_rms.store(0.0f, std::memory_order_relaxed);
680
681 log_info("Pipeline state reset");
682}

References log_info.

◆ client_audio_pipeline_set_flags()

void client_audio_pipeline_set_flags ( client_audio_pipeline_t pipeline,
client_audio_pipeline_flags_t  flags 
)

Set component enable flags.

Parameters
pipelinePipeline instance
flagsNew flags to set

Thread-safe. Changes take effect on next capture/playback call.

Definition at line 407 of file client_audio_pipeline.cpp.

407 {
408 if (!pipeline)
409 return;
410 // No mutex needed - flags are only read by capture thread
411 pipeline->flags = flags;
412}

References client_audio_pipeline_t::flags.