155 client_audio_pipeline_t *p = SAFE_CALLOC(1,
sizeof(client_audio_pipeline_t), client_audio_pipeline_t *);
157 log_error(
"Failed to allocate client audio pipeline");
168 p->flags = p->config.flags;
169 p->frame_size = p->config.sample_rate * p->config.frame_size_ns / 1000;
175 p->encoder = opus_encoder_create(p->config.sample_rate, 1, OPUS_APPLICATION_VOIP, &opus_error);
176 if (!p->encoder || opus_error != OPUS_OK) {
177 log_error(
"Failed to create Opus encoder: %d", opus_error);
180 opus_encoder_ctl(p->encoder, OPUS_SET_BITRATE(p->config.opus_bitrate));
187 p->decoder = opus_decoder_create(p->config.sample_rate, 1, &opus_error);
188 if (!p->decoder || opus_error != OPUS_OK) {
189 log_error(
"Failed to create Opus decoder: %d", opus_error);
199 if (p->flags.echo_cancel) {
201 webrtc::EchoCanceller3Config aec3_config;
206 aec3_config.filter.main.length_blocks = 50;
207 aec3_config.filter.shadow.length_blocks = 50;
208 aec3_config.filter.main_initial.length_blocks = 25;
209 aec3_config.filter.shadow_initial.length_blocks = 25;
213 aec3_config.echo_audibility.audibility_threshold_lf = 5;
216 auto factory = webrtc::EchoCanceller3Factory(aec3_config);
218 std::unique_ptr<webrtc::EchoControl> echo_control = factory.Create(
static_cast<int>(p->config.sample_rate),
224 log_warn(
"Failed to create WebRTC AEC3 instance - echo cancellation unavailable");
225 p->echo_canceller = NULL;
229 wrapper->aec3 = std::move(echo_control);
230 wrapper->config = aec3_config;
231 p->echo_canceller = wrapper;
233 log_info(
"✓ WebRTC AEC3 initialized (67ms filter for bass, adaptive delay)");
236 p->aec3_render_buffer =
new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
237 p->aec3_capture_buffer =
new webrtc::AudioBuffer(48000, 1, 48000, 1, 48000, 1);
239 auto *render_buf =
static_cast<webrtc::AudioBuffer *
>(p->aec3_render_buffer);
240 auto *capture_buf =
static_cast<webrtc::AudioBuffer *
>(p->aec3_capture_buffer);
243 float *
const *render_ch = render_buf->channels();
244 float *
const *capture_ch = capture_buf->channels();
245 if (render_ch && render_ch[0]) {
246 memset(render_ch[0], 0, 480 *
sizeof(
float));
248 if (capture_ch && capture_ch[0]) {
249 memset(capture_ch[0], 0, 480 *
sizeof(
float));
253 render_buf->SplitIntoFrequencyBands();
254 render_buf->MergeFrequencyBands();
255 capture_buf->SplitIntoFrequencyBands();
256 capture_buf->MergeFrequencyBands();
258 log_info(
" - AudioBuffer filterbank state initialized");
261 for (
int warmup = 0; warmup < 10; warmup++) {
262 memset(render_ch[0], 0, 480 *
sizeof(
float));
263 memset(capture_ch[0], 0, 480 *
sizeof(
float));
265 render_buf->SplitIntoFrequencyBands();
266 wrapper->aec3->AnalyzeRender(render_buf);
267 render_buf->MergeFrequencyBands();
269 wrapper->aec3->AnalyzeCapture(capture_buf);
270 capture_buf->SplitIntoFrequencyBands();
271 wrapper->aec3->SetAudioBufferDelay(0);
272 wrapper->aec3->ProcessCapture(capture_buf,
false);
273 capture_buf->MergeFrequencyBands();
275 log_info(
" - AEC3 warmed up with 10 silent frames");
276 log_info(
" - Persistent AudioBuffer instances created");
281 p->debug_wav_aec3_in = NULL;
282 p->debug_wav_aec3_out = NULL;
283 if (p->flags.echo_cancel) {
285 p->debug_wav_aec3_in =
wav_writer_open(
"/tmp/aec3_input.wav", 48000, 1);
286 p->debug_wav_aec3_out =
wav_writer_open(
"/tmp/aec3_output.wav", 48000, 1);
287 if (p->debug_wav_aec3_in) {
288 log_info(
"Debug: Recording AEC3 input to /tmp/aec3_input.wav");
290 if (p->debug_wav_aec3_out) {
291 log_info(
"Debug: Recording AEC3 output to /tmp/aec3_output.wav");
294 log_info(
"✓ AEC3 echo cancellation enabled (full-duplex mode, no ring buffer delay)");
300 float sample_rate = (float)p->config.sample_rate;
304 compressor_set_params(&p->compressor, p->config.comp_threshold_db, p->config.comp_ratio, p->config.comp_attack_ns,
305 p->config.comp_release_ns, p->config.comp_makeup_db);
306 log_info(
"✓ Capture compressor: threshold=%.1fdB, ratio=%.1f:1, makeup=+%.1fdB", p->config.comp_threshold_db,
307 p->config.comp_ratio, p->config.comp_makeup_db);
311 noise_gate_set_params(&p->noise_gate, p->config.gate_threshold, p->config.gate_attack_ns, p->config.gate_release_ns,
312 p->config.gate_hysteresis);
313 log_info(
"✓ Capture noise gate: threshold=%.4f (%.1fdB)", p->config.gate_threshold,
314 20.0f * log10f(p->config.gate_threshold + 1e-10f));
325 log_info(
"✓ Playback noise gate: threshold=0.002 (-54dB)");
329 log_info(
"✓ Capture highpass filter: %.1f Hz", p->config.highpass_hz);
333 log_info(
"✓ Capture lowpass filter: %.1f Hz", p->config.lowpass_hz);
336 p->initialized =
true;
341 p->capture_fadein_remaining = (p->config.sample_rate * 200) / 1000;
342 log_info(
"✓ Capture fade-in: %d samples (200ms)", p->capture_fadein_remaining);
344 log_info(
"Audio pipeline created: %dHz, %dms frames, %dkbps Opus", p->config.sample_rate, p->config.frame_size_ns,
345 p->config.opus_bitrate / 1000);
351 opus_encoder_destroy(p->encoder);
353 opus_decoder_destroy(p->decoder);
354 if (p->echo_canceller) {
366 if (pipeline->aec3_render_buffer) {
367 delete static_cast<webrtc::AudioBuffer *
>(pipeline->aec3_render_buffer);
368 pipeline->aec3_render_buffer = NULL;
370 if (pipeline->aec3_capture_buffer) {
371 delete static_cast<webrtc::AudioBuffer *
>(pipeline->aec3_capture_buffer);
372 pipeline->aec3_capture_buffer = NULL;
376 if (pipeline->echo_canceller) {
378 pipeline->echo_canceller = NULL;
382 if (pipeline->encoder) {
383 opus_encoder_destroy(pipeline->encoder);
384 pipeline->encoder = NULL;
386 if (pipeline->decoder) {
387 opus_decoder_destroy(pipeline->decoder);
388 pipeline->decoder = NULL;
392 if (pipeline->debug_wav_aec3_in) {
394 pipeline->debug_wav_aec3_in = NULL;
396 if (pipeline->debug_wav_aec3_out) {
398 pipeline->debug_wav_aec3_out = NULL;
511 int render_count,
const float *capture_samples,
int capture_count,
512 float *processed_output) {
513 if (!pipeline || !processed_output)
517 if (capture_samples && capture_count > 0) {
518 memcpy(processed_output, capture_samples, capture_count *
sizeof(
float));
520 memset(processed_output, 0, capture_count *
sizeof(
float));
525 static int bypass_aec3 = -1;
526 if (bypass_aec3 == -1) {
528 bypass_aec3 = (env && (strcmp(env,
"1") == 0 || strcmp(env,
"true") == 0)) ? 1 : 0;
530 log_warn(
"AEC3 BYPASSED (full-duplex mode) via BYPASS_AEC3=1");
535 if (pipeline->debug_wav_aec3_in) {
536 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_in, capture_samples, capture_count);
540 if (pipeline->capture_fadein_remaining > 0) {
541 const int total_fadein_samples = (pipeline->config.sample_rate * 200) / 1000;
542 for (
int i = 0; i < capture_count && pipeline->capture_fadein_remaining > 0; i++) {
543 float progress = 1.0f - ((float)pipeline->capture_fadein_remaining / (
float)total_fadein_samples);
545 processed_output[i] *= gain;
546 pipeline->capture_fadein_remaining--;
551 if (!bypass_aec3 && pipeline->flags.echo_cancel && pipeline->echo_canceller) {
553 if (wrapper && wrapper->aec3) {
554 const int webrtc_frame_size = 480;
556 auto *render_buf =
static_cast<webrtc::AudioBuffer *
>(pipeline->aec3_render_buffer);
557 auto *capture_buf =
static_cast<webrtc::AudioBuffer *
>(pipeline->aec3_capture_buffer);
559 if (render_buf && capture_buf) {
560 float *
const *render_channels = render_buf->channels();
561 float *
const *capture_channels = capture_buf->channels();
563 if (render_channels && render_channels[0] && capture_channels && capture_channels[0]) {
565 if (!render_samples && render_count > 0) {
566 log_warn_every(1000000,
"AEC3: render_samples is NULL but render_count=%d", render_count);
571 int render_offset = 0;
572 int capture_offset = 0;
574 while (capture_offset < capture_count || render_offset < render_count) {
577 if (render_samples && render_offset < render_count) {
578 int render_chunk = (render_offset + webrtc_frame_size <= render_count) ? webrtc_frame_size
579 : (render_count - render_offset);
580 if (render_chunk == webrtc_frame_size) {
582 copy_buffer_with_gain(&render_samples[render_offset], render_channels[0], webrtc_frame_size, 32768.0f);
583 render_buf->SplitIntoFrequencyBands();
584 wrapper->aec3->AnalyzeRender(render_buf);
585 render_buf->MergeFrequencyBands();
586 g_render_frames_fed.fetch_add(1, std::memory_order_relaxed);
588 render_offset += render_chunk;
592 if (capture_offset < capture_count) {
593 int capture_chunk = (capture_offset + webrtc_frame_size <= capture_count)
595 : (capture_count - capture_offset);
596 if (capture_chunk == webrtc_frame_size) {
602 wrapper->aec3->AnalyzeCapture(capture_buf);
603 capture_buf->SplitIntoFrequencyBands();
609 wrapper->aec3->ProcessCapture(capture_buf,
false);
610 capture_buf->MergeFrequencyBands();
614 for (
int j = 0; j < webrtc_frame_size; j++) {
615 float sample = capture_channels[0][j] / 32768.0f;
616 processed_output[capture_offset + j] =
soft_clip(sample, 0.6f, 2.5f);
620 static int duplex_log_count = 0;
621 if (++duplex_log_count % 100 == 1) {
622 webrtc::EchoControl::Metrics metrics = wrapper->aec3->GetMetrics();
623 log_info(
"AEC3 DUPLEX: ERL=%.1f ERLE=%.1f delay=%dms", metrics.echo_return_loss,
624 metrics.echo_return_loss_enhancement, metrics.delay_ms);
629 capture_offset += capture_chunk;
638 if (pipeline->debug_wav_aec3_out) {
639 wav_writer_write((wav_writer_t *)pipeline->debug_wav_aec3_out, processed_output, capture_count);
644 static int agc_call_count = 0;
646 if (agc_call_count <= 3 || agc_call_count % 100 == 0) {
647 log_info(
"AGC check #%d: flags.agc=%d, agc_max_gain=%.1f", agc_call_count, pipeline->flags.agc,
648 pipeline->config.agc_max_gain);
651 if (pipeline->flags.agc) {
653 const float agc_pregain = powf(10.0f, pipeline->config.agc_max_gain / 20.0f);
654 for (
int i = 0; i < capture_count; i++) {
655 processed_output[i] *= agc_pregain;
657 if (agc_call_count <= 3 || agc_call_count % 100 == 0) {
658 log_info(
"AGC: Applied %.1f dB pre-gain (%.2fx multiplier)", pipeline->config.agc_max_gain, agc_pregain);
663 if (pipeline->flags.highpass) {
666 if (pipeline->flags.lowpass) {
669 if (pipeline->flags.noise_gate) {
672 if (pipeline->flags.compressor) {
673 for (
int i = 0; i < capture_count; i++) {
675 processed_output[i] *= gain;