ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
client_audio_pipeline.cpp File Reference

Unified client-side audio processing pipeline with WebRTC AEC3. More...

Go to the source code of this file.

Data Structures

struct  WebRTCAec3Wrapper
 C++ wrapper for WebRTC AEC3 (opaque to C code) More...
 

Macros

#define WEBRTC_APM_DEBUG_DUMP   0
 
#define WEBRTC_MODULE_AUDIO_PROCESSING   1
 
#define __STDC_NO_ATOMICS__   1
 

Functions

client_audio_pipeline_config_t client_audio_pipeline_default_config (void)
 
client_audio_pipeline_t * client_audio_pipeline_create (const client_audio_pipeline_config_t *config)
 Create and initialize a client audio pipeline.
 
void client_audio_pipeline_destroy (client_audio_pipeline_t *pipeline)
 
void client_audio_pipeline_set_flags (client_audio_pipeline_t *pipeline, client_audio_pipeline_flags_t flags)
 
client_audio_pipeline_flags_t client_audio_pipeline_get_flags (client_audio_pipeline_t *pipeline)
 
int client_audio_pipeline_capture (client_audio_pipeline_t *pipeline, const float *input, int num_samples, uint8_t *opus_out, int max_opus_len)
 
int client_audio_pipeline_playback (client_audio_pipeline_t *pipeline, const uint8_t *opus_in, int opus_len, float *output, int num_samples)
 
int client_audio_pipeline_get_playback_frame (client_audio_pipeline_t *pipeline, float *output, int num_samples)
 
void client_audio_pipeline_process_duplex (client_audio_pipeline_t *pipeline, const float *render_samples, int render_count, const float *capture_samples, int capture_count, float *processed_output)
 
int client_audio_pipeline_jitter_margin (client_audio_pipeline_t *pipeline)
 
void client_audio_pipeline_reset (client_audio_pipeline_t *pipeline)
 

Detailed Description

Unified client-side audio processing pipeline with WebRTC AEC3.

Implements production-grade echo cancellation using WebRTC AEC3 (Acoustic Echo Cancellation v3) with automatic network delay estimation, adaptive filtering, and residual echo suppression.

Uses WebRTC directly via C++ API - no wrapper layer.

Definition in file client_audio_pipeline.cpp.

Macro Definition Documentation

◆ __STDC_NO_ATOMICS__

#define __STDC_NO_ATOMICS__   1

Definition at line 67 of file client_audio_pipeline.cpp.

◆ WEBRTC_APM_DEBUG_DUMP

#define WEBRTC_APM_DEBUG_DUMP   0

Definition at line 20 of file client_audio_pipeline.cpp.

◆ WEBRTC_MODULE_AUDIO_PROCESSING

#define WEBRTC_MODULE_AUDIO_PROCESSING   1

Definition at line 21 of file client_audio_pipeline.cpp.

Function Documentation

◆ client_audio_pipeline_capture()

int client_audio_pipeline_capture ( client_audio_pipeline_t *  pipeline,
const float *  input,
int  num_samples,
uint8_t *  opus_out,
int  max_opus_len 
)

Encode already-processed audio to Opus.

In full-duplex mode, AEC3 and DSP processing are done in process_duplex(). This function just does Opus encoding.

Definition at line 432 of file client_audio_pipeline.cpp.

433 {
434 if (!pipeline || !input || !opus_out || num_samples != pipeline->frame_size) {
435 return -1;
436 }
437
438 // Input is already processed by process_duplex() in full-duplex mode.
439 // Just encode with Opus.
440 int opus_len = opus_encode_float(pipeline->encoder, input, num_samples, opus_out, max_opus_len);
441
442 if (opus_len < 0) {
443 log_error("Opus encoding failed: %d", opus_len);
444 return -1;
445 }
446
447 return opus_len;
448}

◆ client_audio_pipeline_create()

client_audio_pipeline_t * client_audio_pipeline_create ( const client_audio_pipeline_config_t *  config)

Create and initialize a client audio pipeline.

This function:

  • Allocates the pipeline structure
  • Initializes Opus encoder/decoder
  • Sets up WebRTC AEC3 echo cancellation
  • Configures all audio processing parameters

Definition at line 154 of file client_audio_pipeline.cpp.

154 {
155 client_audio_pipeline_t *p = SAFE_CALLOC(1, sizeof(client_audio_pipeline_t), client_audio_pipeline_t *);
156 if (!p) {
157 log_error("Failed to allocate client audio pipeline");
158 return NULL;
159 }
160
161 // Use default config if none provided
162 if (config) {
163 p->config = *config;
164 } else {
166 }
167
168 p->flags = p->config.flags;
169 p->frame_size = p->config.sample_rate * p->config.frame_size_ns / 1000;
170
171 // No mutex needed - full-duplex means single callback thread handles all AEC3
172
173 // Initialize Opus encoder/decoder first (no exceptions)
174 int opus_error = 0;
175 p->encoder = opus_encoder_create(p->config.sample_rate, 1, OPUS_APPLICATION_VOIP, &opus_error);
176 if (!p->encoder || opus_error != OPUS_OK) {
177 log_error("Failed to create Opus encoder: %d", opus_error);
178 goto error;
179 }
180 opus_encoder_ctl(p->encoder, OPUS_SET_BITRATE(p->config.opus_bitrate));
181
182 // Disable DTX (Discontinuous Transmission) to prevent "beeps".
183 // DTX stops sending frames during silence, causing audible clicks/beeps when audio resumes.
184 // opus_encoder_ctl(p->encoder, OPUS_SET_DTX(0));
185
186 // Create Opus decoder
187 p->decoder = opus_decoder_create(p->config.sample_rate, 1, &opus_error);
188 if (!p->decoder || opus_error != OPUS_OK) {
189 log_error("Failed to create Opus decoder: %d", opus_error);
190 goto error;
191 }
192
193 // Create WebRTC AEC3 Echo Cancellation
194 // AEC3 provides production-grade acoustic echo cancellation with:
195 // - Automatic network delay estimation (0-500ms)
196 // - Adaptive filtering to actual echo path
197 // - Residual echo suppression via spectral subtraction
198 // - Jitter buffer handling via side information
199 if (p->flags.echo_cancel) {
200 // Configure AEC3 for better low-frequency (bass) echo cancellation
201 webrtc::EchoCanceller3Config aec3_config;
202
203 // Increase filter length for bass frequencies (default 13 blocks = ~17ms)
204 // Bass at 80Hz has 12.5ms period, so we need at least 50+ blocks (~67ms)
205 // to properly model the echo path for low frequencies
206 aec3_config.filter.main.length_blocks = 50; // ~67ms (was 13)
207 aec3_config.filter.shadow.length_blocks = 50; // ~67ms (was 13)
208 aec3_config.filter.main_initial.length_blocks = 25; // ~33ms (was 12)
209 aec3_config.filter.shadow_initial.length_blocks = 25; // ~33ms (was 12)
210
211 // More aggressive low-frequency suppression thresholds
212 // Lower values = more aggressive echo suppression
213 aec3_config.echo_audibility.audibility_threshold_lf = 5; // (was 10)
214
215 // Create AEC3 using the factory
216 auto factory = webrtc::EchoCanceller3Factory(aec3_config);
217
218 std::unique_ptr<webrtc::EchoControl> echo_control = factory.Create(static_cast<int>(p->config.sample_rate), // 48kHz
219 1, // num_render_channels (speaker output)
220 1 // num_capture_channels (microphone input)
221 );
222
223 if (!echo_control) {
224 log_warn("Failed to create WebRTC AEC3 instance - echo cancellation unavailable");
225 p->echo_canceller = NULL;
226 } else {
227 // Successfully created AEC3 - wrap in our C++ wrapper for C compatibility
228 auto wrapper = new WebRTCAec3Wrapper();
229 wrapper->aec3 = std::move(echo_control);
230 wrapper->config = aec3_config;
231 p->echo_canceller = wrapper;
232
233 log_info("✓ WebRTC AEC3 initialized (67ms filter for bass, adaptive delay)");
234
235 // Create persistent AudioBuffer instances for AEC3
236 p->aec3_render_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
237 p->aec3_capture_buffer = new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
238
239 auto *render_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_render_buffer);
240 auto *capture_buf = static_cast<webrtc::AudioBuffer *>(p->aec3_capture_buffer);
241
242 // Zero-initialize channel data
243 float *const *render_ch = render_buf->channels();
244 float *const *capture_ch = capture_buf->channels();
245 if (render_ch && render_ch[0]) {
246 memset(render_ch[0], 0, 480 * sizeof(float)); // 10ms at 48kHz
247 }
248 if (capture_ch && capture_ch[0]) {
249 memset(capture_ch[0], 0, 480 * sizeof(float));
250 }
251
252 // Prime filterbank state with dummy processing cycle
253 render_buf->SplitIntoFrequencyBands();
254 render_buf->MergeFrequencyBands();
255 capture_buf->SplitIntoFrequencyBands();
256 capture_buf->MergeFrequencyBands();
257
258 log_info(" - AudioBuffer filterbank state initialized");
259
260 // Warm up AEC3 with 10 silent frames to initialize internal state
261 for (int warmup = 0; warmup < 10; warmup++) {
262 memset(render_ch[0], 0, 480 * sizeof(float));
263 memset(capture_ch[0], 0, 480 * sizeof(float));
264
265 render_buf->SplitIntoFrequencyBands();
266 wrapper->aec3->AnalyzeRender(render_buf);
267 render_buf->MergeFrequencyBands();
268
269 wrapper->aec3->AnalyzeCapture(capture_buf);
270 capture_buf->SplitIntoFrequencyBands();
271 wrapper->aec3->SetAudioBufferDelay(0);
272 wrapper->aec3->ProcessCapture(capture_buf, false);
273 capture_buf->MergeFrequencyBands();
274 }
275 log_info(" - AEC3 warmed up with 10 silent frames");
276 log_info(" - Persistent AudioBuffer instances created");
277 }
278 }
279
280 // Initialize debug WAV writers for AEC3 analysis (if echo_cancel enabled)
281 p->debug_wav_aec3_in = NULL;
282 p->debug_wav_aec3_out = NULL;
283 if (p->flags.echo_cancel) {
284 // Open WAV files to capture AEC3 input and output
285 p->debug_wav_aec3_in = wav_writer_open("/tmp/aec3_input.wav", 48000, 1);
286 p->debug_wav_aec3_out = wav_writer_open("/tmp/aec3_output.wav", 48000, 1);
287 if (p->debug_wav_aec3_in) {
288 log_info("Debug: Recording AEC3 input to /tmp/aec3_input.wav");
289 }
290 if (p->debug_wav_aec3_out) {
291 log_info("Debug: Recording AEC3 output to /tmp/aec3_output.wav");
292 }
293
294 log_info("✓ AEC3 echo cancellation enabled (full-duplex mode, no ring buffer delay)");
295 }
296
297 // Initialize audio processing components (compressor, noise gate, filters)
298 // These are applied in the capture path after AEC3 and before Opus encoding
299 {
300 float sample_rate = (float)p->config.sample_rate;
301
302 // Initialize compressor with config values
303 compressor_init(&p->compressor, sample_rate);
304 compressor_set_params(&p->compressor, p->config.comp_threshold_db, p->config.comp_ratio, p->config.comp_attack_ns,
305 p->config.comp_release_ns, p->config.comp_makeup_db);
306 log_info("✓ Capture compressor: threshold=%.1fdB, ratio=%.1f:1, makeup=+%.1fdB", p->config.comp_threshold_db,
307 p->config.comp_ratio, p->config.comp_makeup_db);
308
309 // Initialize noise gate with config values
310 noise_gate_init(&p->noise_gate, sample_rate);
311 noise_gate_set_params(&p->noise_gate, p->config.gate_threshold, p->config.gate_attack_ns, p->config.gate_release_ns,
312 p->config.gate_hysteresis);
313 log_info("✓ Capture noise gate: threshold=%.4f (%.1fdB)", p->config.gate_threshold,
314 20.0f * log10f(p->config.gate_threshold + 1e-10f));
315
316 // Initialize PLAYBACK noise gate - cuts quiet received audio before speakers
317 // Very low threshold - only cut actual silence, not quiet voice audio
318 // The server sends audio with RMS=0.01-0.02, so threshold must be below that
319 noise_gate_init(&p->playback_noise_gate, sample_rate);
320 noise_gate_set_params(&p->playback_noise_gate,
321 0.002f, // -54dB threshold - only cut near-silence
322 1.0f, // 1ms attack - fast open
323 50.0f, // 50ms release - smooth close
324 0.4f); // Hysteresis
325 log_info("✓ Playback noise gate: threshold=0.002 (-54dB)");
326
327 // Initialize highpass filter (removes low-frequency rumble)
328 highpass_filter_init(&p->highpass, p->config.highpass_hz, sample_rate);
329 log_info("✓ Capture highpass filter: %.1f Hz", p->config.highpass_hz);
330
331 // Initialize lowpass filter (removes high-frequency hiss)
332 lowpass_filter_init(&p->lowpass, p->config.lowpass_hz, sample_rate);
333 log_info("✓ Capture lowpass filter: %.1f Hz", p->config.lowpass_hz);
334 }
335
336 p->initialized = true;
337
338 // Initialize startup fade-in to prevent initial microphone click
339 // 200ms at 48kHz = 9600 samples - gradual ramp from silence to full volume
340 // Longer fade-in (200ms vs 50ms) gives much smoother transition without audible pop
341 p->capture_fadein_remaining = (p->config.sample_rate * 200) / 1000; // 200ms worth of samples
342 log_info("✓ Capture fade-in: %d samples (200ms)", p->capture_fadein_remaining);
343
344 log_info("Audio pipeline created: %dHz, %dms frames, %dkbps Opus", p->config.sample_rate, p->config.frame_size_ns,
345 p->config.opus_bitrate / 1000);
346
347 return p;
348
349error:
350 if (p->encoder)
351 opus_encoder_destroy(p->encoder);
352 if (p->decoder)
353 opus_decoder_destroy(p->decoder);
354 if (p->echo_canceller) {
355 delete static_cast<WebRTCAec3Wrapper *>(p->echo_canceller);
356 }
357 SAFE_FREE(p);
358 return NULL;
359}
client_audio_pipeline_config_t client_audio_pipeline_default_config(void)
void noise_gate_init(noise_gate_t *gate, float sample_rate)
Definition mixer.c:816
void compressor_init(compressor_t *comp, float sample_rate)
Definition mixer.c:42
void compressor_set_params(compressor_t *comp, float threshold_dB, float ratio, uint64_t attack_ns, uint64_t release_ns, float makeup_dB)
Definition mixer.c:53
void highpass_filter_init(highpass_filter_t *filter, float cutoff_hz, float sample_rate)
Definition mixer.c:901
void lowpass_filter_init(lowpass_filter_t *filter, float cutoff_hz, float sample_rate)
Definition mixer.c:951
void noise_gate_set_params(noise_gate_t *gate, float threshold, uint64_t attack_ns, uint64_t release_ns, float hysteresis)
Definition mixer.c:830
C++ wrapper for WebRTC AEC3 (opaque to C code)
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Definition wav_writer.c:49

References client_audio_pipeline_default_config(), compressor_init(), compressor_set_params(), highpass_filter_init(), lowpass_filter_init(), noise_gate_init(), noise_gate_set_params(), and wav_writer_open().

Referenced by audio_client_init().

◆ client_audio_pipeline_default_config()

client_audio_pipeline_config_t client_audio_pipeline_default_config ( void  )

Definition at line 101 of file client_audio_pipeline.cpp.

101 {
102 return (client_audio_pipeline_config_t){
103 .sample_rate = CLIENT_AUDIO_PIPELINE_SAMPLE_RATE,
104 .frame_size_ns = CLIENT_AUDIO_PIPELINE_FRAME_MS,
105 .opus_bitrate = 24000,
106
107 .echo_filter_ns = 250,
108
109 .noise_suppress_db = -25,
110 .agc_level = 16000, // Increased from 8000 for louder output
111 .agc_max_gain = 35, // Increased from 30 dB to handle very quiet mics (35 dB = ~56x gain)
112
113 // Jitter margin: wait this long before starting playback
114 // Lower = less latency but more risk of underruns
115 // Must match AUDIO_JITTER_BUFFER_THRESHOLD in ringbuffer.h.
116 .jitter_margin_ns = 20, // 20ms = 1 Opus packet (optimized for LAN)
117
118 // Higher cutoff to cut low-frequency rumble and feedback
119 .highpass_hz = 150.0f, // Was 80Hz, increased to break rumble feedback loop
120 .lowpass_hz = 8000.0f,
121
122 // Compressor: only compress loud peaks, moderate makeup for volume
123 // User reported clipping with +6dB makeup gain
124 .comp_threshold_db = -12.0f, // Compress above -12dB (was -6dB)
125 .comp_ratio = 3.0f, // Gentler 3:1 ratio
126 .comp_attack_ns = 5 * NS_PER_MS_INT, // Fast attack for peaks
127 .comp_release_ns = 150 * NS_PER_MS_INT, // Slower release
128 .comp_makeup_db = 6.0f, // Increased from 2dB for more output volume
129
130 // Noise gate: VERY aggressive to cut quiet background audio completely
131 // User feedback: "don't amplify or play quiet background audio at all"
132 .gate_threshold = 0.08f, // -22dB threshold (was 0.02/-34dB) - cuts quiet audio hard
133 .gate_attack_ns = 500 * NS_PER_US_INT, // Very fast attack
134 .gate_release_ns = 30 * NS_PER_MS_INT, // Fast release (was 50ms)
135 .gate_hysteresis = 0.3f, // Tighter hysteresis = stays closed longer
136
137 .flags = CLIENT_AUDIO_PIPELINE_FLAGS_ALL,
138 };
139}

Referenced by audio_client_init(), and client_audio_pipeline_create().

◆ client_audio_pipeline_destroy()

void client_audio_pipeline_destroy ( client_audio_pipeline_t *  pipeline)

Definition at line 361 of file client_audio_pipeline.cpp.

361 {
362 if (!pipeline)
363 return;
364
365 // Clean up WebRTC AEC3 AudioBuffer instances
366 if (pipeline->aec3_render_buffer) {
367 delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);
368 pipeline->aec3_render_buffer = NULL;
369 }
370 if (pipeline->aec3_capture_buffer) {
371 delete static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);
372 pipeline->aec3_capture_buffer = NULL;
373 }
374
375 // Clean up WebRTC AEC3
376 if (pipeline->echo_canceller) {
377 delete static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);
378 pipeline->echo_canceller = NULL;
379 }
380
381 // Clean up Opus
382 if (pipeline->encoder) {
383 opus_encoder_destroy(pipeline->encoder);
384 pipeline->encoder = NULL;
385 }
386 if (pipeline->decoder) {
387 opus_decoder_destroy(pipeline->decoder);
388 pipeline->decoder = NULL;
389 }
390
391 // Clean up debug WAV writers
392 if (pipeline->debug_wav_aec3_in) {
393 wav_writer_close((wav_writer_t *)pipeline->debug_wav_aec3_in);
394 pipeline->debug_wav_aec3_in = NULL;
395 }
396 if (pipeline->debug_wav_aec3_out) {
397 wav_writer_close((wav_writer_t *)pipeline->debug_wav_aec3_out);
398 pipeline->debug_wav_aec3_out = NULL;
399 }
400
401 SAFE_FREE(pipeline);
402}
void wav_writer_close(wav_writer_t *writer)
Definition wav_writer.c:113

References wav_writer_close().

Referenced by audio_cleanup(), and audio_client_init().

◆ client_audio_pipeline_get_flags()

client_audio_pipeline_flags_t client_audio_pipeline_get_flags ( client_audio_pipeline_t *  pipeline)

Definition at line 415 of file client_audio_pipeline.cpp.

415 {
416 if (!pipeline)
417 return CLIENT_AUDIO_PIPELINE_FLAGS_MINIMAL;
418 // No mutex needed - flags are only written from main thread during setup
419 return pipeline->flags;
420}

◆ client_audio_pipeline_get_playback_frame()

int client_audio_pipeline_get_playback_frame ( client_audio_pipeline_t *  pipeline,
float *  output,
int  num_samples 
)

Get a processed playback frame (currently just returns decoded frame)

Definition at line 483 of file client_audio_pipeline.cpp.

483 {
484 if (!pipeline || !output) {
485 return -1;
486 }
487
488 // No mutex needed - this is a placeholder
489 memset(output, 0, num_samples * sizeof(float));
490 return num_samples;
491}

◆ client_audio_pipeline_jitter_margin()

int client_audio_pipeline_jitter_margin ( client_audio_pipeline_t *  pipeline)

Get jitter buffer margin

Definition at line 685 of file client_audio_pipeline.cpp.

685 {
686 if (!pipeline)
687 return 0;
688 return pipeline->config.jitter_margin_ns;
689}

◆ client_audio_pipeline_playback()

int client_audio_pipeline_playback ( client_audio_pipeline_t *  pipeline,
const uint8_t *  opus_in,
int  opus_len,
float *  output,
int  num_samples 
)

Process network playback (decode and register with echo canceller as reference)

Definition at line 453 of file client_audio_pipeline.cpp.

454 {
455 if (!pipeline || !opus_in || !output) {
456 return -1;
457 }
458
459 // No mutex needed - Opus decoder is only used from this thread
460
461 // Decode Opus
462 int decoded_samples = opus_decode_float(pipeline->decoder, opus_in, opus_len, output, num_samples, 0);
463
464 if (decoded_samples < 0) {
465 log_error("Opus decoding failed: %d", decoded_samples);
466 return -1;
467 }
468
469 // Apply playback noise gate - cut quiet background audio before it reaches speakers
470 if (decoded_samples > 0) {
471 noise_gate_process_buffer(&pipeline->playback_noise_gate, output, decoded_samples);
472 }
473
474 // NOTE: Render signal is queued to AEC3 in output_callback() when audio plays,
475 // not here. The capture thread drains the queue and processes AEC3.
476
477 return decoded_samples;
478}
void noise_gate_process_buffer(noise_gate_t *gate, float *buffer, int num_samples)
Definition mixer.c:873

References noise_gate_process_buffer().

Referenced by audio_decode_opus().

◆ client_audio_pipeline_process_duplex()

void client_audio_pipeline_process_duplex ( client_audio_pipeline_t *  pipeline,
const float *  render_samples,
int  render_count,
const float *  capture_samples,
int  capture_count,
float *  processed_output 
)

Process AEC3 inline in full-duplex callback (REAL-TIME SAFE).

This is the PROFESSIONAL approach to AEC3 timing:

  • Called from a single PortAudio full-duplex callback
  • render_samples = what is being played to speakers RIGHT NOW
  • capture_samples = what microphone captured RIGHT NOW
  • Perfect synchronization - no timing mismatch possible

This function does ALL AEC3 processing inline:

  1. AnalyzeRender on render samples (speaker output)
  2. AnalyzeCapture + ProcessCapture on capture samples
  3. Apply filters, noise gate, compressor

Returns processed capture samples in processed_output. Opus encoding is done separately by the encoding thread.

Definition at line 510 of file client_audio_pipeline.cpp.

512 {
513 if (!pipeline || !processed_output)
514 return;
515
516 // Copy capture samples to output buffer for processing
517 if (capture_samples && capture_count > 0) {
518 memcpy(processed_output, capture_samples, capture_count * sizeof(float));
519 } else {
520 memset(processed_output, 0, capture_count * sizeof(float));
521 return;
522 }
523
524 // Check for AEC3 bypass
525 static int bypass_aec3 = -1;
526 if (bypass_aec3 == -1) {
527 const char *env = platform_getenv("BYPASS_AEC3");
528 bypass_aec3 = (env && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0)) ? 1 : 0;
529 if (bypass_aec3) {
530 log_warn("AEC3 BYPASSED (full-duplex mode) via BYPASS_AEC3=1");
531 }
532 }
533
534 // Debug WAV recording
535 if (pipeline->debug_wav_aec3_in) {
536 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_in, capture_samples, capture_count);
537 }
538
539 // Apply startup fade-in using smoothstep curve
540 if (pipeline->capture_fadein_remaining > 0) {
541 const int total_fadein_samples = (pipeline->config.sample_rate * 200) / 1000;
542 for (int i = 0; i < capture_count && pipeline->capture_fadein_remaining > 0; i++) {
543 float progress = 1.0f - ((float)pipeline->capture_fadein_remaining / (float)total_fadein_samples);
544 float gain = smoothstep(progress);
545 processed_output[i] *= gain;
546 pipeline->capture_fadein_remaining--;
547 }
548 }
549
550 // WebRTC AEC3 processing - INLINE, no ring buffer, no mutex
551 if (!bypass_aec3 && pipeline->flags.echo_cancel && pipeline->echo_canceller) {
552 auto wrapper = static_cast<WebRTCAec3Wrapper *>(pipeline->echo_canceller);
553 if (wrapper && wrapper->aec3) {
554 const int webrtc_frame_size = 480; // 10ms at 48kHz
555
556 auto *render_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_render_buffer);
557 auto *capture_buf = static_cast<webrtc::AudioBuffer *>(pipeline->aec3_capture_buffer);
558
559 if (render_buf && capture_buf) {
560 float *const *render_channels = render_buf->channels();
561 float *const *capture_channels = capture_buf->channels();
562
563 if (render_channels && render_channels[0] && capture_channels && capture_channels[0]) {
564 // Verify render_samples is valid before accessing
565 if (!render_samples && render_count > 0) {
566 log_warn_every(1000000, "AEC3: render_samples is NULL but render_count=%d", render_count);
567 return;
568 }
569
570 // Process in 10ms chunks (AEC3 requirement)
571 int render_offset = 0;
572 int capture_offset = 0;
573
574 while (capture_offset < capture_count || render_offset < render_count) {
575 // STEP 1: Feed render signal (what's playing to speakers)
576 // In full-duplex, this is THE EXACT audio being played RIGHT NOW
577 if (render_samples && render_offset < render_count) {
578 int render_chunk = (render_offset + webrtc_frame_size <= render_count) ? webrtc_frame_size
579 : (render_count - render_offset);
580 if (render_chunk == webrtc_frame_size) {
581 // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]
582 copy_buffer_with_gain(&render_samples[render_offset], render_channels[0], webrtc_frame_size, 32768.0f);
583 render_buf->SplitIntoFrequencyBands();
584 wrapper->aec3->AnalyzeRender(render_buf);
585 render_buf->MergeFrequencyBands();
586 g_render_frames_fed.fetch_add(1, std::memory_order_relaxed);
587 }
588 render_offset += render_chunk;
589 }
590
591 // STEP 2: Process capture (microphone input)
592 if (capture_offset < capture_count) {
593 int capture_chunk = (capture_offset + webrtc_frame_size <= capture_count)
594 ? webrtc_frame_size
595 : (capture_count - capture_offset);
596 if (capture_chunk == webrtc_frame_size) {
597 // Scale float [-1,1] to WebRTC int16-range [-32768, 32767]
598 copy_buffer_with_gain(&processed_output[capture_offset], capture_channels[0], webrtc_frame_size,
599 32768.0f);
600
601 // AEC3 sequence: AnalyzeCapture, split, ProcessCapture, merge
602 wrapper->aec3->AnalyzeCapture(capture_buf);
603 capture_buf->SplitIntoFrequencyBands();
604
605 // NOTE: SetAudioBufferDelay() is just an initial hint when use_external_delay_estimator=false
606 // (default). AEC3's internal delay estimator will find the actual delay (~144ms in practice). We don't
607 // call it here - let AEC3 estimate delay automatically.
608
609 wrapper->aec3->ProcessCapture(capture_buf, false);
610 capture_buf->MergeFrequencyBands();
611
612 // Scale back to float range and apply soft clip to prevent distortion
613 // Use gentle soft_clip (threshold=0.6, steepness=2.5) to leave headroom for compressor
614 for (int j = 0; j < webrtc_frame_size; j++) {
615 float sample = capture_channels[0][j] / 32768.0f;
616 processed_output[capture_offset + j] = soft_clip(sample, 0.6f, 2.5f);
617 }
618
619 // Log AEC3 metrics periodically
620 static int duplex_log_count = 0;
621 if (++duplex_log_count % 100 == 1) {
622 webrtc::EchoControl::Metrics metrics = wrapper->aec3->GetMetrics();
623 log_info("AEC3 DUPLEX: ERL=%.1f ERLE=%.1f delay=%dms", metrics.echo_return_loss,
624 metrics.echo_return_loss_enhancement, metrics.delay_ms);
625 audio_analysis_set_aec3_metrics(metrics.echo_return_loss, metrics.echo_return_loss_enhancement,
626 metrics.delay_ms);
627 }
628 }
629 capture_offset += capture_chunk;
630 }
631 }
632 }
633 }
634 }
635 }
636
637 // Debug WAV recording (after AEC3)
638 if (pipeline->debug_wav_aec3_out) {
639 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_out, processed_output, capture_count);
640 }
641
642 // Apply manual AGC (simple pre-gain to boost quiet microphones)
643 // TODO: Replace with proper WebRTC AGC module for adaptive gain control
644 static int agc_call_count = 0;
645 agc_call_count++;
646 if (agc_call_count <= 3 || agc_call_count % 100 == 0) {
647 log_info("AGC check #%d: flags.agc=%d, agc_max_gain=%.1f", agc_call_count, pipeline->flags.agc,
648 pipeline->config.agc_max_gain);
649 }
650
651 if (pipeline->flags.agc) {
652 // Convert dB to linear gain: linear = 10^(dB/20)
653 const float agc_pregain = powf(10.0f, pipeline->config.agc_max_gain / 20.0f);
654 for (int i = 0; i < capture_count; i++) {
655 processed_output[i] *= agc_pregain;
656 }
657 if (agc_call_count <= 3 || agc_call_count % 100 == 0) {
658 log_info("AGC: Applied %.1f dB pre-gain (%.2fx multiplier)", pipeline->config.agc_max_gain, agc_pregain);
659 }
660 }
661
662 // Apply capture processing chain: filters, noise gate, compressor
663 if (pipeline->flags.highpass) {
664 highpass_filter_process_buffer(&pipeline->highpass, processed_output, capture_count);
665 }
666 if (pipeline->flags.lowpass) {
667 lowpass_filter_process_buffer(&pipeline->lowpass, processed_output, capture_count);
668 }
669 if (pipeline->flags.noise_gate) {
670 noise_gate_process_buffer(&pipeline->noise_gate, processed_output, capture_count);
671 }
672 if (pipeline->flags.compressor) {
673 for (int i = 0; i < capture_count; i++) {
674 float gain = compressor_process_sample(&pipeline->compressor, processed_output[i]);
675 processed_output[i] *= gain;
676 }
677 // Apply soft clipping after compressor - threshold=0.7 gives 3dB headroom
678 soft_clip_buffer(processed_output, capture_count, 0.7f, 3.0f);
679 }
680}
void audio_analysis_set_aec3_metrics(double echo_return_loss, double echo_return_loss_enhancement, uint64_t delay_ns)
Definition analysis.c:504
void copy_buffer_with_gain(const float *src, float *dst, int count, float gain)
Definition mixer.c:1109
void soft_clip_buffer(float *buffer, int num_samples, float threshold, float steepness)
Definition mixer.c:1013
float soft_clip(float sample, float threshold, float steepness)
Definition mixer.c:1000
float smoothstep(float t)
Definition mixer.c:1027
void lowpass_filter_process_buffer(lowpass_filter_t *filter, float *buffer, int num_samples)
Definition mixer.c:986
void highpass_filter_process_buffer(highpass_filter_t *filter, float *buffer, int num_samples)
Definition mixer.c:937
float compressor_process_sample(compressor_t *comp, float sidechain)
Definition mixer.c:87
const char * platform_getenv(const char *name)
Definition wasm/system.c:13
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Definition wav_writer.c:95

References audio_analysis_set_aec3_metrics(), compressor_process_sample(), copy_buffer_with_gain(), highpass_filter_process_buffer(), lowpass_filter_process_buffer(), noise_gate_process_buffer(), platform_getenv(), smoothstep(), soft_clip(), soft_clip_buffer(), and wav_writer_write().

◆ client_audio_pipeline_reset()

void client_audio_pipeline_reset ( client_audio_pipeline_t *  pipeline)

Reset pipeline state

Definition at line 694 of file client_audio_pipeline.cpp.

694 {
695 if (!pipeline)
696 return;
697
698 // Reset global counters
699 g_render_frames_fed.store(0, std::memory_order_relaxed);
700 g_max_render_rms.store(0.0f, std::memory_order_relaxed);
701
702 log_info("Pipeline state reset");
703}

◆ client_audio_pipeline_set_flags()

void client_audio_pipeline_set_flags ( client_audio_pipeline_t *  pipeline,
client_audio_pipeline_flags_t  flags 
)

Definition at line 408 of file client_audio_pipeline.cpp.

408 {
409 if (!pipeline)
410 return;
411 // No mutex needed - flags are only read by capture thread
412 pipeline->flags = flags;
413}