ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
lib/audio/audio.c
Go to the documentation of this file.
1
8#include "audio/audio.h"
10#include "util/endian.h"
11#include "common.h"
12#include "util/endian.h"
13#include "asciichat_errno.h" // For asciichat_errno system
14#include "buffer_pool.h"
15#include "options/options.h"
16#include "platform/init.h" // For static_mutex_t
17#include "network/packet.h" // For audio_batch_packet_t
18#include "log/logging.h" // For log_* macros
19#include <stdlib.h>
20#include <string.h>
21#include <math.h>
22#include <stdatomic.h>
23
24#ifdef _WIN32
25#include <malloc.h> // For _alloca on Windows
26#define alloca _alloca
27#else
28#include <unistd.h> // For dup, dup2, close, STDERR_FILENO
29#include <fcntl.h> // For O_WRONLY
30#endif
31
32// PortAudio initialization reference counter
33// Tracks how many audio contexts are using PortAudio to avoid conflicts
34static unsigned int g_pa_init_refcount = 0;
35static static_mutex_t g_pa_refcount_mutex = STATIC_MUTEX_INIT;
36
45static int duplex_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
46 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags,
47 void *userData) {
48 (void)timeInfo;
49
50 audio_context_t *ctx = (audio_context_t *)userData;
51 const float *input = (const float *)inputBuffer;
52 float *output = (float *)outputBuffer;
53 size_t num_samples = framesPerBuffer * AUDIO_CHANNELS;
54
55 // Silence on shutdown
56 if (atomic_load(&ctx->shutting_down)) {
57 if (output) {
58 SAFE_MEMSET(output, num_samples * sizeof(float), 0, num_samples * sizeof(float));
59 }
60 return paContinue;
61 }
62
63 // Log status flags
64 if (statusFlags != 0) {
65 if (statusFlags & paOutputUnderflow) {
66 log_warn_every(LOG_RATE_FAST, "PortAudio output underflow");
67 }
68 if (statusFlags & paInputOverflow) {
69 log_warn_every(LOG_RATE_FAST, "PortAudio input overflow");
70 }
71 }
72
73 // STEP 1: Read from jitter buffer → output (speaker)
74 size_t samples_read = 0;
75 if (output && ctx->playback_buffer) {
76 // Log playback buffer latency (how much audio is queued for playback)
77 size_t buffer_samples = audio_ring_buffer_available_read(ctx->playback_buffer);
78 float buffer_latency_ms = (float)buffer_samples / 48.0f; // samples / (48000 / 1000)
79 log_debug_every(500000, "LATENCY: Playback buffer %.1fms (%zu samples)", buffer_latency_ms, buffer_samples);
80
81 samples_read = audio_ring_buffer_read(ctx->playback_buffer, output, num_samples);
82 if (samples_read == 0) {
83 SAFE_MEMSET(output, num_samples * sizeof(float), 0, num_samples * sizeof(float));
84 }
85 } else if (output) {
86 SAFE_MEMSET(output, num_samples * sizeof(float), 0, num_samples * sizeof(float));
87 }
88
89 // STEP 2: Process AEC3 inline - render and capture at EXACT same time
90 if (ctx->audio_pipeline && input && output) {
91 // Allocate processed buffer on stack
92 float *processed = (float *)alloca(num_samples * sizeof(float));
93
94 // CRITICAL FIX: For AEC3 render signal, peek at jitter buffer even if not ready for playback yet
95 // This prevents AEC3 from getting silence during jitter fill period
96 float *aec3_render = output; // Default: use actual output
97 float *peeked_audio = NULL;
98
99 if (samples_read == 0 && ctx->playback_buffer) {
100 // Jitter buffer not ready for playback, but peek at available audio for AEC3
101 peeked_audio = (float *)alloca(num_samples * sizeof(float));
102 size_t peeked = audio_ring_buffer_peek(ctx->playback_buffer, peeked_audio, num_samples);
103
104 if (peeked > 0) {
105 // Zero-pad if we didn't get enough samples
106 if (peeked < num_samples) {
107 SAFE_MEMSET(peeked_audio + peeked, (num_samples - peeked) * sizeof(float), 0,
108 (num_samples - peeked) * sizeof(float));
109 }
110 aec3_render = peeked_audio; // Use peeked audio for AEC3
111
112 // Log when we're using peeked audio (helpful for debugging)
113 log_debug_every(1000000, "AEC3: Using peeked audio (%zu samples) - jitter buffer filling", peeked);
114 }
115 // else: no audio available at all, aec3_render stays as silence
116 }
117
118 // DIAGNOSTIC: Calculate RMS of render signal being fed to AEC3
119 float render_rms = 0.0f;
120 if (aec3_render) {
121 float sum_squares = 0.0f;
122 for (size_t i = 0; i < num_samples; i++) {
123 sum_squares += aec3_render[i] * aec3_render[i];
124 }
125 render_rms = sqrtf(sum_squares / (float)num_samples);
126 }
127 log_info_every(1000000, "AEC3 RENDER SIGNAL: RMS=%.6f, samples_read=%zu, using_peeked=%s, buffer_available=%zu",
128 render_rms, samples_read, (peeked_audio != NULL) ? "YES" : "NO",
129 ctx->playback_buffer ? audio_ring_buffer_available_read(ctx->playback_buffer) : 0);
130
131 // This does: AnalyzeRender(aec3_render) + ProcessCapture(input) + filters + compressor
132 // All in one call, perfect synchronization
134 (client_audio_pipeline_t *)ctx->audio_pipeline, // NOLINT(readability-suspicious-call-argument)
135 aec3_render, // render_samples (peeked OR playing audio for AEC3)
136 (int)num_samples, // render_count
137 input, // capture_samples (microphone input)
138 (int)num_samples, // capture_count
139 processed // processed_output (processed capture)
140 );
141
142 // Write processed capture to ring buffer for encoding thread
143 if (ctx->capture_buffer) {
144 audio_ring_buffer_write(ctx->capture_buffer, processed, (int)num_samples);
145 }
146 } else if (input && ctx->capture_buffer) {
147 // No pipeline - write raw capture
148 audio_ring_buffer_write(ctx->capture_buffer, input, (int)num_samples);
149 }
150
151 return paContinue;
152}
153
165void resample_linear(const float *src, size_t src_samples, float *dst, size_t dst_samples, double src_rate,
166 double dst_rate) {
167 if (src_samples == 0 || dst_samples == 0) {
168 SAFE_MEMSET(dst, dst_samples * sizeof(float), 0, dst_samples * sizeof(float));
169 return;
170 }
171
172 double ratio = src_rate / dst_rate;
173
174 for (size_t i = 0; i < dst_samples; i++) {
175 double src_pos = (double)i * ratio;
176 size_t idx0 = (size_t)src_pos;
177 size_t idx1 = idx0 + 1;
178 double frac = src_pos - (double)idx0;
179
180 // Clamp indices to valid range
181 if (idx0 >= src_samples)
182 idx0 = src_samples - 1;
183 if (idx1 >= src_samples)
184 idx1 = src_samples - 1;
185
186 // Linear interpolation
187 dst[i] = (float)((1.0 - frac) * src[idx0] + frac * src[idx1]);
188 }
189}
190
197static int output_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
198 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags,
199 void *userData) {
200 (void)inputBuffer;
201 (void)timeInfo;
202
203 audio_context_t *ctx = (audio_context_t *)userData;
204 float *output = (float *)outputBuffer;
205 size_t num_output_samples = framesPerBuffer * AUDIO_CHANNELS;
206
207 // Silence on shutdown
208 if (atomic_load(&ctx->shutting_down)) {
209 if (output) {
210 SAFE_MEMSET(output, num_output_samples * sizeof(float), 0, num_output_samples * sizeof(float));
211 }
212 return paContinue;
213 }
214
215 if (statusFlags & paOutputUnderflow) {
216 log_warn_every(LOG_RATE_FAST, "PortAudio output underflow (separate stream)");
217 }
218
219 // Read from playback buffer → output (speaker)
220 if (output && ctx->playback_buffer) {
221 // Check if we need to resample (buffer is at sample_rate, output is at output_device_rate)
222 bool needs_resample =
223 (ctx->output_device_rate > 0 && ctx->sample_rate > 0 && ctx->output_device_rate != ctx->sample_rate);
224
225 if (needs_resample) {
226 // Calculate how many samples we need from the 48kHz buffer to produce num_output_samples at output rate
227 double ratio = ctx->sample_rate / ctx->output_device_rate; // e.g., 48000/44100 = 1.088
228 size_t num_src_samples = (size_t)((double)num_output_samples * ratio) + 2; // +2 for interpolation safety
229
230 // Read from buffer at internal sample rate
231 float *src_buffer = (float *)alloca(num_src_samples * sizeof(float));
232 size_t samples_read = audio_ring_buffer_read(ctx->playback_buffer, src_buffer, num_src_samples);
233
234 if (samples_read == 0) {
235 SAFE_MEMSET(output, num_output_samples * sizeof(float), 0, num_output_samples * sizeof(float));
236 } else {
237 // Store 48kHz signal in render buffer BEFORE resampling (for AEC3)
238 // This avoids double resampling (48→44.1→48) and preserves quality
239 if (ctx->render_buffer) {
240 audio_ring_buffer_write(ctx->render_buffer, src_buffer, (int)samples_read);
241 }
242
243 // THEN resample from 48kHz to output device rate
244 resample_linear(src_buffer, samples_read, output, num_output_samples, ctx->sample_rate,
245 ctx->output_device_rate);
246 }
247 } else {
248 // No resampling needed - direct read
249 size_t samples_read = audio_ring_buffer_read(ctx->playback_buffer, output, num_output_samples);
250 if (samples_read == 0) {
251 SAFE_MEMSET(output, num_output_samples * sizeof(float), 0, num_output_samples * sizeof(float));
252 }
253
254 // Copy to render buffer for AEC3 reference
255 if (ctx->render_buffer) {
256 audio_ring_buffer_write(ctx->render_buffer, output, (int)num_output_samples);
257 }
258 }
259 } else if (output) {
260 SAFE_MEMSET(output, num_output_samples * sizeof(float), 0, num_output_samples * sizeof(float));
261 }
262
263 return paContinue;
264}
265
272static int input_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
273 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData) {
274 (void)outputBuffer;
275 (void)timeInfo;
276
277 audio_context_t *ctx = (audio_context_t *)userData;
278 const float *input = (const float *)inputBuffer;
279 size_t num_samples = framesPerBuffer * AUDIO_CHANNELS;
280
281 // Silence on shutdown
282 if (atomic_load(&ctx->shutting_down)) {
283 return paContinue;
284 }
285
286 if (statusFlags & paInputOverflow) {
287 log_warn_every(LOG_RATE_FAST, "PortAudio input overflow (separate stream)");
288 }
289
290 // Process AEC3 with render reference from render_buffer
291 if (ctx->audio_pipeline && input && ctx->render_buffer) {
292 // Render buffer now stores samples at internal sample_rate (48kHz), not output device rate
293 // Check if we need to resample render (render_buffer is at sample_rate, input is at input_device_rate)
294 bool needs_resample =
295 (ctx->input_device_rate > 0 && ctx->sample_rate > 0 && ctx->sample_rate != ctx->input_device_rate);
296
297 float *render = (float *)alloca(num_samples * sizeof(float));
298
299 // Static buffer to keep last render samples when timing between callbacks is off
300 // This ensures AEC3 always has a render reference, even if slightly stale
301 static float last_render[960]; // Max 20ms at 48kHz
302 static size_t last_render_count = 0;
303 static bool last_render_valid = false;
304
305 if (needs_resample) {
306 // Render buffer is at internal sample_rate (48kHz), we need samples at input_device_rate
307 // Calculate how many samples to read from render buffer
308 double ratio = ctx->sample_rate / ctx->input_device_rate; // e.g., 48000/44100 = 1.088
309 size_t num_render_samples = (size_t)((double)num_samples * ratio) + 2;
310
311 float *render_raw = (float *)alloca(num_render_samples * sizeof(float));
312 size_t render_read = audio_ring_buffer_read(ctx->render_buffer, render_raw, num_render_samples);
313
314 if (render_read == 0) {
315 // Try peeking for any available samples
316 render_read = audio_ring_buffer_peek(ctx->render_buffer, render_raw, num_render_samples);
317 if (render_read > 0) {
318 log_debug_every(1000000, "AEC3 separate: Using peeked render (%zu samples)", render_read);
319 }
320 }
321
322 if (render_read == 0 && last_render_valid) {
323 // Use last known render samples as fallback
324 size_t copy_count = (last_render_count < num_samples) ? last_render_count : num_samples;
325 SAFE_MEMCPY(render, copy_count * sizeof(float), last_render, copy_count * sizeof(float));
326 if (copy_count < num_samples) {
327 SAFE_MEMSET(render + copy_count, (num_samples - copy_count) * sizeof(float), 0,
328 (num_samples - copy_count) * sizeof(float));
329 }
330 log_debug_every(1000000, "AEC3 separate: Using cached last_render (%zu samples)", copy_count);
331 } else if (render_read == 0) {
332 SAFE_MEMSET(render, num_samples * sizeof(float), 0, num_samples * sizeof(float));
333 } else {
334 // Resample from internal sample_rate to input_device_rate
335 resample_linear(render_raw, render_read, render, num_samples, ctx->sample_rate, ctx->input_device_rate);
336 // Cache for future use
337 size_t cache_count = (num_samples < 960) ? num_samples : 960;
338 SAFE_MEMCPY(last_render, cache_count * sizeof(float), render, cache_count * sizeof(float));
339 last_render_count = cache_count;
340 last_render_valid = true;
341 }
342 } else {
343 // No resampling needed - direct read (both at 48kHz)
344 size_t render_samples = audio_ring_buffer_read(ctx->render_buffer, render, num_samples);
345
346 if (render_samples == 0) {
347 // Try peeking
348 render_samples = audio_ring_buffer_peek(ctx->render_buffer, render, num_samples);
349 if (render_samples > 0) {
350 log_debug_every(1000000, "AEC3 separate: Using peeked render (%zu samples)", render_samples);
351 }
352 }
353
354 if (render_samples == 0 && last_render_valid) {
355 // Use cached render
356 size_t copy_count = (last_render_count < num_samples) ? last_render_count : num_samples;
357 SAFE_MEMCPY(render, copy_count * sizeof(float), last_render, copy_count * sizeof(float));
358 if (copy_count < num_samples) {
359 SAFE_MEMSET(render + copy_count, (num_samples - copy_count) * sizeof(float), 0,
360 (num_samples - copy_count) * sizeof(float));
361 }
362 log_debug_every(1000000, "AEC3 separate: Using cached last_render (%zu samples)", copy_count);
363 } else if (render_samples < num_samples) {
364 // Zero-pad if not enough
365 SAFE_MEMSET(render + render_samples, (num_samples - render_samples) * sizeof(float), 0,
366 (num_samples - render_samples) * sizeof(float));
367 }
368
369 // Cache for future use
370 if (render_samples > 0) {
371 size_t cache_count = (num_samples < 960) ? num_samples : 960;
372 SAFE_MEMCPY(last_render, cache_count * sizeof(float), render, cache_count * sizeof(float));
373 last_render_count = cache_count;
374 last_render_valid = true;
375 }
376 }
377
378 // Process through AEC3
379 float *processed = (float *)alloca(num_samples * sizeof(float));
381 (int)num_samples, // render = what's playing to speakers
382 input, (int)num_samples, // capture = microphone input
383 processed // output = processed capture
384 );
385
386 // Write processed capture to ring buffer for encoding thread
387 if (ctx->capture_buffer) {
388 audio_ring_buffer_write(ctx->capture_buffer, processed, (int)num_samples);
389 }
390 } else if (input && ctx->capture_buffer) {
391 // No pipeline - write raw capture
392 audio_ring_buffer_write(ctx->capture_buffer, input, (int)num_samples);
393 }
394
395 return paContinue;
396}
397
398// Forward declaration for internal helper function
399static audio_ring_buffer_t *audio_ring_buffer_create_internal(bool jitter_buffer_enabled);
400
401static audio_ring_buffer_t *audio_ring_buffer_create_internal(bool jitter_buffer_enabled) {
402 size_t rb_size = sizeof(audio_ring_buffer_t);
404
405 if (!rb) {
406 SET_ERRNO(ERROR_MEMORY, "Failed to allocate audio ring buffer from buffer pool");
407 return NULL;
408 }
409
410 SAFE_MEMSET(rb->data, sizeof(rb->data), 0, sizeof(rb->data));
411 rb->write_index = 0;
412 rb->read_index = 0;
413 // For capture buffers (jitter_buffer_enabled=false), mark as already filled to bypass jitter logic
414 // For playback buffers (jitter_buffer_enabled=true), start unfilled to wait for threshold
415 rb->jitter_buffer_filled = !jitter_buffer_enabled;
417 rb->crossfade_fade_in = false;
418 rb->last_sample = 0.0f;
419 rb->underrun_count = 0;
420 rb->jitter_buffer_enabled = jitter_buffer_enabled;
421
422 if (mutex_init(&rb->mutex) != 0) {
423 SET_ERRNO(ERROR_THREAD, "Failed to initialize audio ring buffer mutex");
424 buffer_pool_free(NULL, rb, sizeof(audio_ring_buffer_t));
425 return NULL;
426 }
427
428 return rb;
429}
430
432 return audio_ring_buffer_create_internal(true); // Default: enable jitter buffering for playback
433}
434
436 return audio_ring_buffer_create_internal(false); // Disable jitter buffering for capture
437}
438
440 if (!rb)
441 return;
442
443 mutex_destroy(&rb->mutex);
444 buffer_pool_free(NULL, rb, sizeof(audio_ring_buffer_t));
445}
446
448 if (!rb)
449 return;
450
451 mutex_lock(&rb->mutex);
452 // Reset buffer to empty state (no audio to play = silence at shutdown)
453 rb->write_index = 0;
454 rb->read_index = 0;
455 rb->last_sample = 0.0f;
456 // Clear the actual data to zeros to prevent any stale audio
457 SAFE_MEMSET(rb->data, sizeof(rb->data), 0, sizeof(rb->data));
458 mutex_unlock(&rb->mutex);
459}
460
462 if (!rb || !data || samples <= 0)
463 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb=%p, data=%p, samples=%d", rb, data, samples);
464
465 // Validate samples doesn't exceed our buffer size
466 if (samples > AUDIO_RING_BUFFER_SIZE) {
467 return SET_ERRNO(ERROR_BUFFER, "Attempted to write %d samples, but buffer size is only %d", samples,
469 }
470
471 // LOCK-FREE: Load indices with proper memory ordering
472 // - Load our own write_index with relaxed (no sync needed with ourselves)
473 // - Load reader's read_index with acquire (see reader's updates to free space)
474 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_relaxed);
475 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_acquire);
476
477 // Calculate current buffer level (how many samples are buffered)
478 int buffer_level;
479 if (write_idx >= read_idx) {
480 buffer_level = (int)(write_idx - read_idx);
481 } else {
482 buffer_level = AUDIO_RING_BUFFER_SIZE - (int)(read_idx - write_idx);
483 }
484 int available = AUDIO_RING_BUFFER_SIZE - buffer_level;
485
486 // HIGH WATER MARK: Drop OLD samples to prevent latency accumulation
487 // This is critical for real-time audio - we always want the NEWEST data
488 // ALWAYS apply high-water-mark on write, regardless of jitter_buffer_enabled
489 // jitter_buffer_enabled only controls READ side (whether to wait for threshold)
490 // On WRITE side, we ALWAYS want to drop old samples to bound latency
491 if (buffer_level + samples > AUDIO_JITTER_HIGH_WATER_MARK) {
492 // Calculate how many old samples to drop to bring buffer to target level
493 int excess = (buffer_level + samples) - AUDIO_JITTER_TARGET_LEVEL;
494 if (excess > 0) {
495 // Advance read_index to drop old samples
496 // Note: This is safe because the reader checks for underrun and handles it gracefully
497 unsigned int new_read_idx = (read_idx + (unsigned int)excess) % AUDIO_RING_BUFFER_SIZE;
498 atomic_store_explicit(&rb->read_index, new_read_idx, memory_order_release);
499
501 "Audio buffer high water mark exceeded: dropping %d OLD samples to reduce latency "
502 "(buffer was %d, target %d)",
503 excess, buffer_level, AUDIO_JITTER_TARGET_LEVEL);
504
505 // Recalculate available space after dropping old samples
506 read_idx = new_read_idx;
507 buffer_level = AUDIO_JITTER_TARGET_LEVEL - samples;
508 if (buffer_level < 0)
509 buffer_level = 0;
510 available = AUDIO_RING_BUFFER_SIZE - buffer_level;
511 }
512 }
513
514 // Now write the new samples - should always have enough space after above
515 int samples_to_write = samples;
516 if (samples > available) {
517 // This should rarely happen after the high water mark logic above
518 int samples_dropped = samples - available;
519 samples_to_write = available;
520 log_warn_every(LOG_RATE_FAST, "Audio buffer overflow: dropping %d of %d incoming samples (buffer_used=%d/%d)",
521 samples_dropped, samples, AUDIO_RING_BUFFER_SIZE - available, AUDIO_RING_BUFFER_SIZE);
522 }
523
524 // Write only the samples that fit (preserves existing data integrity)
525 if (samples_to_write > 0) {
526 int remaining = AUDIO_RING_BUFFER_SIZE - (int)write_idx;
527
528 if (samples_to_write <= remaining) {
529 // Can copy in one chunk
530 SAFE_MEMCPY(&rb->data[write_idx], samples_to_write * sizeof(float), data, samples_to_write * sizeof(float));
531 } else {
532 // Need to wrap around - copy in two chunks
533 SAFE_MEMCPY(&rb->data[write_idx], remaining * sizeof(float), data, remaining * sizeof(float));
534 SAFE_MEMCPY(&rb->data[0], (samples_to_write - remaining) * sizeof(float), &data[remaining],
535 (samples_to_write - remaining) * sizeof(float));
536 }
537
538 // LOCK-FREE: Store new write_index with release ordering
539 // This ensures all data writes above are visible before the index update
540 unsigned int new_write_idx = (write_idx + (unsigned int)samples_to_write) % AUDIO_RING_BUFFER_SIZE;
541 atomic_store_explicit(&rb->write_index, new_write_idx, memory_order_release);
542 }
543
544 // Note: jitter buffer fill check is now done in read function for better control
545
546 return ASCIICHAT_OK; // Success
547}
548
549size_t audio_ring_buffer_read(audio_ring_buffer_t *rb, float *data, size_t samples) {
550 if (!rb || !data || samples <= 0) {
551 SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb=%p, data=%p, samples=%d", rb, data, samples);
552 return 0; // Return 0 samples read on error
553 }
554
555 // LOCK-FREE: Load indices with proper memory ordering
556 // - Load writer's write_index with acquire (see writer's data updates)
557 // - Load our own read_index with relaxed (no sync needed with ourselves)
558 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
559 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
560
561 // Calculate available samples
562 size_t available;
563 if (write_idx >= read_idx) {
564 available = write_idx - read_idx;
565 } else {
566 available = AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
567 }
568
569 // LOCK-FREE: Load jitter buffer state with acquire ordering
570 bool jitter_filled = atomic_load_explicit(&rb->jitter_buffer_filled, memory_order_acquire);
571 int crossfade_remaining = atomic_load_explicit(&rb->crossfade_samples_remaining, memory_order_acquire);
572 bool fade_in = atomic_load_explicit(&rb->crossfade_fade_in, memory_order_acquire);
573
574 // Jitter buffer: don't read until initial fill threshold is reached
575 // (only for playback buffers - capture buffers have jitter_buffer_enabled = false)
576 if (!jitter_filled && rb->jitter_buffer_enabled) {
577 // First, check if we're in the middle of a fade-out that needs to continue
578 // This happens when fade-out spans multiple buffer reads
579 if (!fade_in && crossfade_remaining > 0) {
580 // Continue fade-out from where we left off
581 int fade_start = AUDIO_CROSSFADE_SAMPLES - crossfade_remaining;
582 size_t fade_samples = (samples < (size_t)crossfade_remaining) ? samples : (size_t)crossfade_remaining;
583 float last = rb->last_sample; // NOT atomic - only written by reader
584 for (size_t i = 0; i < fade_samples; i++) {
585 float fade_factor = 1.0f - ((float)(fade_start + (int)i) / (float)AUDIO_CROSSFADE_SAMPLES);
586 data[i] = last * fade_factor;
587 }
588 // Fill rest with silence
589 for (size_t i = fade_samples; i < samples; i++) {
590 data[i] = 0.0f;
591 }
592 // Update crossfade state atomically
593 atomic_store_explicit(&rb->crossfade_samples_remaining, crossfade_remaining - (int)fade_samples,
594 memory_order_release);
595 if (crossfade_remaining - (int)fade_samples <= 0) {
596 rb->last_sample = 0.0f;
597 }
598
599 return samples; // Return full buffer (with continued fade-out)
600 }
601
602 // Check if we've accumulated enough samples to start playback
603 if (available >= AUDIO_JITTER_BUFFER_THRESHOLD) {
604 atomic_store_explicit(&rb->jitter_buffer_filled, true, memory_order_release);
605 atomic_store_explicit(&rb->crossfade_samples_remaining, AUDIO_CROSSFADE_SAMPLES, memory_order_release);
606 atomic_store_explicit(&rb->crossfade_fade_in, true, memory_order_release);
607 log_info("Jitter buffer filled (%zu samples), starting playback with fade-in", available);
608 // Reload state for processing below
609 jitter_filled = true;
610 crossfade_remaining = AUDIO_CROSSFADE_SAMPLES;
611 fade_in = true;
612 } else {
613 // Log buffer fill progress every second
614 log_debug_every(1000000, "Jitter buffer filling: %zu/%d samples (%.1f%%)", available,
616 return 0; // Return silence until buffer is filled
617 }
618 }
619
620 // Periodic buffer health logging (every 5 seconds when healthy)
621 static unsigned int health_log_counter = 0;
622 if (++health_log_counter % 250 == 0) { // ~5 seconds at 50Hz callback rate
623 unsigned int underruns = atomic_load_explicit(&rb->underrun_count, memory_order_relaxed);
624 log_debug("Buffer health: %zu/%d samples (%.1f%%), underruns=%u", available, AUDIO_RING_BUFFER_SIZE,
625 (100.0f * available) / AUDIO_RING_BUFFER_SIZE, underruns);
626 }
627
628 // Low buffer handling: DON'T pause playback - continue reading what's available
629 // and fill the rest with silence. Pausing causes a feedback loop where:
630 // 1. Underrun -> pause reading -> buffer overflows from incoming samples
631 // 2. Threshold reached -> resume reading -> drains too fast -> underrun again
632 //
633 // Instead: always consume samples to prevent overflow, use silence for missing data
634 if (rb->jitter_buffer_enabled && available < AUDIO_JITTER_LOW_WATER_MARK) {
635 unsigned int underrun_count = atomic_fetch_add_explicit(&rb->underrun_count, 1, memory_order_relaxed) + 1;
637 "Audio buffer low #%u: only %zu samples available (low water mark: %d), padding with silence",
638 underrun_count, available, AUDIO_JITTER_LOW_WATER_MARK);
639 // Don't set jitter_buffer_filled = false - keep reading to prevent overflow
640 }
641
642 size_t to_read = (samples > available) ? available : samples;
643
644 // Optimize: copy in chunks instead of one sample at a time
645 size_t remaining = AUDIO_RING_BUFFER_SIZE - read_idx;
646
647 if (to_read <= remaining) {
648 // Can copy in one chunk
649 SAFE_MEMCPY(data, to_read * sizeof(float), &rb->data[read_idx], to_read * sizeof(float));
650 } else {
651 // Need to wrap around - copy in two chunks
652 SAFE_MEMCPY(data, remaining * sizeof(float), &rb->data[read_idx], remaining * sizeof(float));
653 SAFE_MEMCPY(&data[remaining], (to_read - remaining) * sizeof(float), &rb->data[0],
654 (to_read - remaining) * sizeof(float));
655 }
656
657 // LOCK-FREE: Store new read_index with release ordering
658 // This ensures all data reads above complete before the index update
659 unsigned int new_read_idx = (read_idx + (unsigned int)to_read) % AUDIO_RING_BUFFER_SIZE;
660 atomic_store_explicit(&rb->read_index, new_read_idx, memory_order_release);
661
662 // Apply fade-in if recovering from underrun
663 if (fade_in && crossfade_remaining > 0) {
664 int fade_start = AUDIO_CROSSFADE_SAMPLES - crossfade_remaining;
665 size_t fade_samples = (to_read < (size_t)crossfade_remaining) ? to_read : (size_t)crossfade_remaining;
666
667 for (size_t i = 0; i < fade_samples; i++) {
668 float fade_factor = (float)(fade_start + (int)i + 1) / (float)AUDIO_CROSSFADE_SAMPLES;
669 data[i] *= fade_factor;
670 }
671
672 int new_crossfade_remaining = crossfade_remaining - (int)fade_samples;
673 atomic_store_explicit(&rb->crossfade_samples_remaining, new_crossfade_remaining, memory_order_release);
674 if (new_crossfade_remaining <= 0) {
675 atomic_store_explicit(&rb->crossfade_fade_in, false, memory_order_release);
676 log_debug("Audio fade-in complete");
677 }
678 }
679
680 // Save last sample for potential fade-out
681 // Note: only update if we actually read some data
682 // This is NOT atomic - only the reader thread writes this
683 if (to_read > 0) {
684 rb->last_sample = data[to_read - 1];
685 }
686
687 // Fill any remaining samples with pure silence if we couldn't read enough
688 // NOTE: Previous code applied fade-out from last sample, but this created
689 // audible "little extra sounds in the gaps" during frequent underruns.
690 // Pure silence is less disruptive than artificial fade artifacts.
691 if (to_read < samples) {
692 size_t silence_samples = samples - to_read;
693 SAFE_MEMSET(data + to_read, silence_samples * sizeof(float), 0, silence_samples * sizeof(float));
694 }
695
696 return samples; // Always return full buffer (with silence padding if needed)
697}
698
710size_t audio_ring_buffer_peek(audio_ring_buffer_t *rb, float *data, size_t samples) {
711 if (!rb || !data || samples <= 0) {
712 return 0;
713 }
714
715 // LOCK-FREE: Load indices with proper memory ordering
716 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
717 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
718
719 // Calculate available samples
720 size_t available;
721 if (write_idx >= read_idx) {
722 available = write_idx - read_idx;
723 } else {
724 available = AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
725 }
726
727 size_t to_peek = (samples > available) ? available : samples;
728
729 if (to_peek == 0) {
730 return 0;
731 }
732
733 // Copy samples in chunks (handle wraparound)
734 size_t first_chunk = (read_idx + to_peek <= AUDIO_RING_BUFFER_SIZE) ? to_peek : (AUDIO_RING_BUFFER_SIZE - read_idx);
735
736 SAFE_MEMCPY(data, first_chunk * sizeof(float), rb->data + read_idx, first_chunk * sizeof(float));
737
738 if (first_chunk < to_peek) {
739 // Wraparound: copy second chunk from beginning of buffer
740 size_t second_chunk = to_peek - first_chunk;
741 SAFE_MEMCPY(data + first_chunk, second_chunk * sizeof(float), rb->data, second_chunk * sizeof(float));
742 }
743
744 return to_peek;
745}
746
748 if (!rb)
749 return 0;
750
751 // LOCK-FREE: Load indices with proper memory ordering
752 // Use acquire for write_index to see writer's updates
753 // Use relaxed for read_index (our own index)
754 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
755 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
756
757 if (write_idx >= read_idx) {
758 return write_idx - read_idx;
759 }
760
761 return AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
762}
763
765 if (!rb) {
766 SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb is NULL");
767 return 0;
768 }
769
771}
772
774 if (!ctx) {
775 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx is NULL");
776 }
777
778 SAFE_MEMSET(ctx, sizeof(audio_context_t), 0, sizeof(audio_context_t));
779
780 if (mutex_init(&ctx->state_mutex) != 0) {
781 return SET_ERRNO(ERROR_THREAD, "Failed to initialize audio context mutex");
782 }
783
784 // Initialize PortAudio with reference counting
785 static_mutex_lock(&g_pa_refcount_mutex);
786 if (g_pa_init_refcount == 0) {
787 // Suppress PortAudio backend probe errors (ALSA/JACK/OSS warnings)
788 // These are harmless - PortAudio tries multiple backends until one works
789 int stderr_fd_backup = -1;
790 int devnull_fd = -1;
791#ifndef _WIN32
792 stderr_fd_backup = dup(STDERR_FILENO);
793 devnull_fd = platform_open("/dev/null", O_WRONLY, 0);
794 if (stderr_fd_backup >= 0 && devnull_fd >= 0) {
795 dup2(devnull_fd, STDERR_FILENO);
796 }
797#endif
798
799 PaError err = Pa_Initialize();
800
801 // Restore stderr IMMEDIATELY so real errors are visible
802#ifndef _WIN32
803 if (stderr_fd_backup >= 0) {
804 dup2(stderr_fd_backup, STDERR_FILENO);
805 close(stderr_fd_backup);
806 }
807 if (devnull_fd >= 0) {
808 close(devnull_fd);
809 }
810#endif
811
812 if (err != paNoError) {
813 static_mutex_unlock(&g_pa_refcount_mutex);
815 // stderr is restored, so this error will be visible
816 return SET_ERRNO(ERROR_AUDIO, "Failed to initialize PortAudio: %s", Pa_GetErrorText(err));
817 }
818
819 log_debug("PortAudio initialized successfully (probe warnings suppressed)");
820 }
821 g_pa_init_refcount++;
822 static_mutex_unlock(&g_pa_refcount_mutex);
823
824 // Enumerate all audio devices for debugging
825 int numDevices = Pa_GetDeviceCount();
826 const size_t max_device_info_size = 4096; // Limit total device info size
827 char device_names[max_device_info_size];
828 int offset = 0;
829 for (int i = 0; i < numDevices && offset < (int)sizeof(device_names) - 256; i++) {
830 const PaDeviceInfo *deviceInfo = Pa_GetDeviceInfo(i);
831 if (deviceInfo && deviceInfo->name) {
832 int remaining = sizeof(device_names) - offset;
833 if (remaining < 256)
834 break;
835
836 int len = snprintf(&device_names[offset], remaining,
837 "\n Device %d: %s (inputs=%d, outputs=%d, sample_rate=%.0f Hz)%s%s", i, deviceInfo->name,
838 deviceInfo->maxInputChannels, deviceInfo->maxOutputChannels, deviceInfo->defaultSampleRate,
839 (i == Pa_GetDefaultInputDevice()) ? " [DEFAULT INPUT]" : "",
840 (i == Pa_GetDefaultOutputDevice()) ? " [DEFAULT OUTPUT]" : "");
841 if (len > 0 && len < remaining) {
842 offset += len;
843 } else {
844 // Buffer full or error - stop here
845 break;
846 }
847 }
848 }
849 device_names[offset] = '\0';
850 if (offset > 0) {
851 log_debug("PortAudio found %d audio devices:%s", numDevices, device_names);
852 } else {
853 log_warn("PortAudio found no audio devices");
854 }
855
856 // Create capture buffer WITHOUT jitter buffering (PortAudio writes directly from microphone)
858 if (!ctx->capture_buffer) {
859 // Decrement refcount and terminate if this was the only context
860 static_mutex_lock(&g_pa_refcount_mutex);
861 if (g_pa_init_refcount > 0) {
862 g_pa_init_refcount--;
863 if (g_pa_init_refcount == 0) {
864 Pa_Terminate();
865 }
866 }
867 static_mutex_unlock(&g_pa_refcount_mutex);
869 return SET_ERRNO(ERROR_MEMORY, "Failed to create capture buffer");
870 }
871
873 if (!ctx->playback_buffer) {
875 // Decrement refcount and terminate if this was the only context
876 static_mutex_lock(&g_pa_refcount_mutex);
877 if (g_pa_init_refcount > 0) {
878 g_pa_init_refcount--;
879 if (g_pa_init_refcount == 0) {
880 Pa_Terminate();
881 }
882 }
883 static_mutex_unlock(&g_pa_refcount_mutex);
885 return SET_ERRNO(ERROR_MEMORY, "Failed to create playback buffer");
886 }
887
888 ctx->initialized = true;
889 atomic_store(&ctx->shutting_down, false);
890 log_info("Audio system initialized successfully");
891 return ASCIICHAT_OK;
892}
893
895 if (!ctx || !ctx->initialized) {
896 return;
897 }
898
899 // Stop duplex stream if running
900 if (ctx->running) {
902 }
903
904 mutex_lock(&ctx->state_mutex);
905
908
909 // Terminate PortAudio only when last context is destroyed
910 static_mutex_lock(&g_pa_refcount_mutex);
911 if (g_pa_init_refcount > 0) {
912 g_pa_init_refcount--;
913 if (g_pa_init_refcount == 0) {
914 Pa_Terminate();
915 }
916 }
917 static_mutex_unlock(&g_pa_refcount_mutex);
918
919 ctx->initialized = false;
920
923
924 log_info("Audio system destroyed");
925}
926
927void audio_set_pipeline(audio_context_t *ctx, void *pipeline) {
928 if (!ctx)
929 return;
930 ctx->audio_pipeline = pipeline;
931}
932
934 if (!ctx || !ctx->initialized) {
935 return SET_ERRNO(ERROR_INVALID_STATE, "Audio context not initialized");
936 }
937
938 mutex_lock(&ctx->state_mutex);
939
940 // Already running?
941 if (ctx->duplex_stream || ctx->input_stream || ctx->output_stream) {
943 return ASCIICHAT_OK;
944 }
945
946 // Setup input parameters
947 PaStreamParameters inputParams;
948 if (GET_OPTION(microphone_index) >= 0) {
949 inputParams.device = GET_OPTION(microphone_index);
950 } else {
951 inputParams.device = Pa_GetDefaultInputDevice();
952 }
953
954 if (inputParams.device == paNoDevice) {
956 return SET_ERRNO(ERROR_AUDIO, "No input device available");
957 }
958
959 const PaDeviceInfo *inputInfo = Pa_GetDeviceInfo(inputParams.device);
960 if (!inputInfo) {
962 return SET_ERRNO(ERROR_AUDIO, "Input device info not found");
963 }
964
965 inputParams.channelCount = AUDIO_CHANNELS;
966 inputParams.sampleFormat = paFloat32;
967 inputParams.suggestedLatency = inputInfo->defaultLowInputLatency;
968 inputParams.hostApiSpecificStreamInfo = NULL;
969
970 // Setup output parameters
971 PaStreamParameters outputParams;
972 if (GET_OPTION(speakers_index) >= 0) {
973 outputParams.device = GET_OPTION(speakers_index);
974 } else {
975 outputParams.device = Pa_GetDefaultOutputDevice();
976 }
977
978 if (outputParams.device == paNoDevice) {
980 return SET_ERRNO(ERROR_AUDIO, "No output device available");
981 }
982
983 const PaDeviceInfo *outputInfo = Pa_GetDeviceInfo(outputParams.device);
984 if (!outputInfo) {
986 return SET_ERRNO(ERROR_AUDIO, "Output device info not found");
987 }
988
989 outputParams.channelCount = AUDIO_CHANNELS;
990 outputParams.sampleFormat = paFloat32;
991 outputParams.suggestedLatency = outputInfo->defaultLowOutputLatency;
992 outputParams.hostApiSpecificStreamInfo = NULL;
993
994 // Store device rates for diagnostics
995 ctx->input_device_rate = inputInfo->defaultSampleRate;
996 ctx->output_device_rate = outputInfo->defaultSampleRate;
997
998 log_info("Opening audio:");
999 log_info(" Input: %s (%.0f Hz)", inputInfo->name, inputInfo->defaultSampleRate);
1000 log_info(" Output: %s (%.0f Hz)", outputInfo->name, outputInfo->defaultSampleRate);
1001
1002 // Check if sample rates differ - ALSA full-duplex doesn't handle this well
1003 bool rates_differ = (inputInfo->defaultSampleRate != outputInfo->defaultSampleRate);
1004 bool try_separate = rates_differ;
1005 PaError err = paNoError;
1006
1007 if (!try_separate) {
1008 // Try full-duplex first (preferred - perfect AEC3 timing)
1009 err = Pa_OpenStream(&ctx->duplex_stream, &inputParams, &outputParams, AUDIO_SAMPLE_RATE, AUDIO_FRAMES_PER_BUFFER,
1010 paClipOff, duplex_callback, ctx);
1011
1012 if (err == paNoError) {
1013 err = Pa_StartStream(ctx->duplex_stream);
1014 if (err != paNoError) {
1015 Pa_CloseStream(ctx->duplex_stream);
1016 ctx->duplex_stream = NULL;
1017 log_warn("Full-duplex stream failed to start: %s", Pa_GetErrorText(err));
1018 try_separate = true;
1019 }
1020 } else {
1021 log_warn("Full-duplex stream failed to open: %s", Pa_GetErrorText(err));
1022 try_separate = true;
1023 }
1024 }
1025
1026 if (try_separate) {
1027 // Fall back to separate streams (needed when sample rates differ)
1028 log_info("Using separate input/output streams (sample rates differ: %.0f vs %.0f Hz)", inputInfo->defaultSampleRate,
1029 outputInfo->defaultSampleRate);
1030 log_info(" Will resample: buffer at %.0f Hz → output at %.0f Hz", (double)AUDIO_SAMPLE_RATE,
1031 outputInfo->defaultSampleRate);
1032
1033 // Store the internal sample rate (buffer rate)
1035
1036 // Create render buffer for AEC3 reference synchronization
1038 if (!ctx->render_buffer) {
1040 return SET_ERRNO(ERROR_MEMORY, "Failed to create render buffer");
1041 }
1042
1043 // Open output stream at NATIVE device rate - we'll resample from 48kHz buffer in callback
1044 err = Pa_OpenStream(&ctx->output_stream, NULL, &outputParams, outputInfo->defaultSampleRate,
1045 AUDIO_FRAMES_PER_BUFFER, paClipOff, output_callback, ctx);
1046 if (err != paNoError) {
1048 ctx->render_buffer = NULL;
1050 return SET_ERRNO(ERROR_AUDIO, "Failed to open output stream: %s", Pa_GetErrorText(err));
1051 }
1052
1053 // Open input stream at PIPELINE rate (48kHz) - let PortAudio resample from device if needed
1054 // This ensures input matches sample_rate for AEC3, avoiding resampling in our callback
1055 err = Pa_OpenStream(&ctx->input_stream, &inputParams, NULL, AUDIO_SAMPLE_RATE, AUDIO_FRAMES_PER_BUFFER, paClipOff,
1056 input_callback, ctx);
1057 if (err != paNoError) {
1058 Pa_CloseStream(ctx->output_stream);
1059 ctx->output_stream = NULL;
1061 ctx->render_buffer = NULL;
1063 return SET_ERRNO(ERROR_AUDIO, "Failed to open input stream: %s", Pa_GetErrorText(err));
1064 }
1065
1066 // Start both streams
1067 err = Pa_StartStream(ctx->output_stream);
1068 if (err != paNoError) {
1069 Pa_CloseStream(ctx->input_stream);
1070 Pa_CloseStream(ctx->output_stream);
1071 ctx->input_stream = NULL;
1072 ctx->output_stream = NULL;
1074 ctx->render_buffer = NULL;
1076 return SET_ERRNO(ERROR_AUDIO, "Failed to start output stream: %s", Pa_GetErrorText(err));
1077 }
1078
1079 err = Pa_StartStream(ctx->input_stream);
1080 if (err != paNoError) {
1081 Pa_StopStream(ctx->output_stream);
1082 Pa_CloseStream(ctx->input_stream);
1083 Pa_CloseStream(ctx->output_stream);
1084 ctx->input_stream = NULL;
1085 ctx->output_stream = NULL;
1087 ctx->render_buffer = NULL;
1089 return SET_ERRNO(ERROR_AUDIO, "Failed to start input stream: %s", Pa_GetErrorText(err));
1090 }
1091
1092 ctx->separate_streams = true;
1093 log_info("Separate streams started successfully");
1094 } else {
1095 ctx->separate_streams = false;
1096 log_info("Full-duplex stream started (single callback, perfect AEC3 timing)");
1097 }
1098
1100
1101 ctx->running = true;
1104
1105 return ASCIICHAT_OK;
1106}
1107
1109 if (!ctx || !ctx->initialized) {
1110 return SET_ERRNO(ERROR_INVALID_STATE, "Audio context not initialized");
1111 }
1112
1113 atomic_store(&ctx->shutting_down, true);
1114
1115 if (ctx->playback_buffer) {
1117 }
1118
1119 Pa_Sleep(50); // Let callbacks drain
1120
1121 mutex_lock(&ctx->state_mutex);
1122
1123 if (ctx->duplex_stream) {
1124 Pa_StopStream(ctx->duplex_stream);
1125 Pa_CloseStream(ctx->duplex_stream);
1126 ctx->duplex_stream = NULL;
1127 }
1128
1129 // Stop separate streams if used
1130 if (ctx->input_stream) {
1131 Pa_StopStream(ctx->input_stream);
1132 Pa_CloseStream(ctx->input_stream);
1133 ctx->input_stream = NULL;
1134 }
1135
1136 if (ctx->output_stream) {
1137 Pa_StopStream(ctx->output_stream);
1138 Pa_CloseStream(ctx->output_stream);
1139 ctx->output_stream = NULL;
1140 }
1141
1142 // Cleanup render buffer
1143 if (ctx->render_buffer) {
1145 ctx->render_buffer = NULL;
1146 }
1147
1148 ctx->running = false;
1149 ctx->separate_streams = false;
1151
1152 log_info("Audio stopped");
1153 return ASCIICHAT_OK;
1154}
1155
1156asciichat_error_t audio_read_samples(audio_context_t *ctx, float *buffer, int num_samples) {
1157 if (!ctx || !ctx->initialized || !buffer || num_samples <= 0) {
1158 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx=%p, buffer=%p, num_samples=%d", ctx, buffer,
1159 num_samples);
1160 }
1161
1162 // audio_ring_buffer_read now returns number of samples read, not error code
1163 int samples_read = audio_ring_buffer_read(ctx->capture_buffer, buffer, num_samples);
1164 return (samples_read >= 0) ? ASCIICHAT_OK : ERROR_AUDIO;
1165}
1166
1167asciichat_error_t audio_write_samples(audio_context_t *ctx, const float *buffer, int num_samples) {
1168 if (!ctx || !ctx->initialized || !buffer || num_samples <= 0) {
1169 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx=%p, buffer=%p, num_samples=%d", ctx, buffer,
1170 num_samples);
1171 }
1172
1173 // Don't accept new audio data during shutdown - this prevents garbage/beeps
1174 if (atomic_load(&ctx->shutting_down)) {
1175 return ASCIICHAT_OK; // Silently discard
1176 }
1177
1178 asciichat_error_t result = audio_ring_buffer_write(ctx->playback_buffer, buffer, num_samples);
1179
1180 return result;
1181}
1182
1183// Internal helper to list audio devices (input or output)
1184static asciichat_error_t audio_list_devices_internal(audio_device_info_t **out_devices, unsigned int *out_count,
1185 bool list_inputs) {
1186 if (!out_devices || !out_count) {
1187 return SET_ERRNO(ERROR_INVALID_PARAM, "audio_list_devices: invalid parameters");
1188 }
1189
1190 *out_devices = NULL;
1191 *out_count = 0;
1192
1193 // Check if PortAudio is already initialized by another audio context
1194 static_mutex_lock(&g_pa_refcount_mutex);
1195 bool pa_was_initialized = (g_pa_init_refcount > 0);
1196
1197 // Initialize PortAudio only if not already initialized
1198 if (!pa_was_initialized) {
1199 // Suppress PortAudio backend probe errors (ALSA/JACK/OSS warnings)
1200 // These are harmless - PortAudio tries multiple backends until one works
1201 int stderr_fd_backup = -1;
1202 int devnull_fd = -1;
1203#ifndef _WIN32
1204 stderr_fd_backup = dup(STDERR_FILENO);
1205 devnull_fd = platform_open("/dev/null", O_WRONLY, 0);
1206 if (stderr_fd_backup >= 0 && devnull_fd >= 0) {
1207 dup2(devnull_fd, STDERR_FILENO);
1208 }
1209#endif
1210
1211 PaError err = Pa_Initialize();
1212
1213 // Restore stderr
1214#ifndef _WIN32
1215 if (stderr_fd_backup >= 0) {
1216 dup2(stderr_fd_backup, STDERR_FILENO);
1217 close(stderr_fd_backup);
1218 }
1219 if (devnull_fd >= 0) {
1220 close(devnull_fd);
1221 }
1222#endif
1223
1224 if (err != paNoError) {
1225 static_mutex_unlock(&g_pa_refcount_mutex);
1226 return SET_ERRNO(ERROR_AUDIO, "Failed to initialize PortAudio: %s", Pa_GetErrorText(err));
1227 }
1228 g_pa_init_refcount = 1; // Set refcount to 1 for temporary initialization
1229 } else {
1230 // PortAudio is already initialized - increment refcount to prevent
1231 // termination while we're using it
1232 g_pa_init_refcount++;
1233 }
1234 static_mutex_unlock(&g_pa_refcount_mutex);
1235
1236 int num_devices = Pa_GetDeviceCount();
1237 if (num_devices < 0) {
1238 // Decrement refcount and terminate only if we initialized PortAudio ourselves
1239 static_mutex_lock(&g_pa_refcount_mutex);
1240 if (g_pa_init_refcount > 0) {
1241 g_pa_init_refcount--;
1242 if (!pa_was_initialized && g_pa_init_refcount == 0) {
1243 Pa_Terminate();
1244 }
1245 }
1246 static_mutex_unlock(&g_pa_refcount_mutex);
1247 return SET_ERRNO(ERROR_AUDIO, "Failed to get device count: %s", Pa_GetErrorText(num_devices));
1248 }
1249
1250 if (num_devices == 0) {
1251 // Decrement refcount and terminate only if we initialized PortAudio ourselves
1252 static_mutex_lock(&g_pa_refcount_mutex);
1253 if (g_pa_init_refcount > 0) {
1254 g_pa_init_refcount--;
1255 if (!pa_was_initialized && g_pa_init_refcount == 0) {
1256 Pa_Terminate();
1257 }
1258 }
1259 static_mutex_unlock(&g_pa_refcount_mutex);
1260 return ASCIICHAT_OK; // No devices found
1261 }
1262
1263 // Get default device indices
1264 PaDeviceIndex default_input = Pa_GetDefaultInputDevice();
1265 PaDeviceIndex default_output = Pa_GetDefaultOutputDevice();
1266
1267 // First pass: count matching devices
1268 unsigned int device_count = 0;
1269 for (int i = 0; i < num_devices; i++) {
1270 const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
1271 if (info) {
1272 bool matches = list_inputs ? (info->maxInputChannels > 0) : (info->maxOutputChannels > 0);
1273 if (matches) {
1274 device_count++;
1275 }
1276 }
1277 }
1278
1279 if (device_count == 0) {
1280 // Decrement refcount and terminate only if we initialized PortAudio ourselves
1281 static_mutex_lock(&g_pa_refcount_mutex);
1282 if (g_pa_init_refcount > 0) {
1283 g_pa_init_refcount--;
1284 if (!pa_was_initialized && g_pa_init_refcount == 0) {
1285 Pa_Terminate();
1286 }
1287 }
1288 static_mutex_unlock(&g_pa_refcount_mutex);
1289 return ASCIICHAT_OK; // No matching devices
1290 }
1291
1292 // Allocate device array
1293 audio_device_info_t *devices = SAFE_CALLOC(device_count, sizeof(audio_device_info_t), audio_device_info_t *);
1294 if (!devices) {
1295 // Decrement refcount and terminate only if we initialized PortAudio ourselves
1296 static_mutex_lock(&g_pa_refcount_mutex);
1297 if (g_pa_init_refcount > 0) {
1298 g_pa_init_refcount--;
1299 if (!pa_was_initialized && g_pa_init_refcount == 0) {
1300 Pa_Terminate();
1301 }
1302 }
1303 static_mutex_unlock(&g_pa_refcount_mutex);
1304 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate audio device info array");
1305 }
1306
1307 // Second pass: populate device info
1308 unsigned int idx = 0;
1309 for (int i = 0; i < num_devices && idx < device_count; i++) {
1310 const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
1311 if (!info)
1312 continue;
1313
1314 bool match = list_inputs ? (info->maxInputChannels > 0) : (info->maxOutputChannels > 0);
1315 if (!match)
1316 continue;
1317
1318 devices[idx].index = i;
1319 if (info->name) {
1320 SAFE_STRNCPY(devices[idx].name, info->name, AUDIO_DEVICE_NAME_MAX);
1321 } else {
1322 SAFE_STRNCPY(devices[idx].name, "<Unknown>", AUDIO_DEVICE_NAME_MAX);
1323 }
1324 devices[idx].max_input_channels = info->maxInputChannels;
1325 devices[idx].max_output_channels = info->maxOutputChannels;
1326 devices[idx].default_sample_rate = info->defaultSampleRate;
1327 devices[idx].is_default_input = (i == default_input);
1328 devices[idx].is_default_output = (i == default_output);
1329 idx++;
1330 }
1331
1332 // Decrement refcount and terminate only if we initialized PortAudio ourselves
1333 static_mutex_lock(&g_pa_refcount_mutex);
1334 if (g_pa_init_refcount > 0) {
1335 g_pa_init_refcount--;
1336 if (!pa_was_initialized && g_pa_init_refcount == 0) {
1337 Pa_Terminate();
1338 }
1339 }
1340 static_mutex_unlock(&g_pa_refcount_mutex);
1341
1342 *out_devices = devices;
1343 *out_count = idx;
1344 return ASCIICHAT_OK;
1345}
1346
1347asciichat_error_t audio_list_input_devices(audio_device_info_t **out_devices, unsigned int *out_count) {
1348 return audio_list_devices_internal(out_devices, out_count, true);
1349}
1350
1351asciichat_error_t audio_list_output_devices(audio_device_info_t **out_devices, unsigned int *out_count) {
1352 return audio_list_devices_internal(out_devices, out_count, false);
1353}
1354
1356 SAFE_FREE(devices);
1357}
1358
1359asciichat_error_t audio_dequantize_samples(const uint8_t *samples_ptr, uint32_t total_samples, float *out_samples) {
1360 if (!samples_ptr || !out_samples || total_samples == 0) {
1361 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters for audio dequantization");
1362 }
1363
1364 for (uint32_t i = 0; i < total_samples; i++) {
1365 uint32_t network_sample;
1366 // Use memcpy to safely handle potential misalignment from packet header
1367 memcpy(&network_sample, samples_ptr + i * sizeof(uint32_t), sizeof(uint32_t));
1368 int32_t scaled = (int32_t)NET_TO_HOST_U32(network_sample);
1369 out_samples[i] = (float)scaled / 2147483647.0f;
1370 }
1371
1372 return ASCIICHAT_OK;
1373}
1374
1376 // Delegate to platform abstraction layer
1378 if (result == ASCIICHAT_OK) {
1379 log_info("✓ Audio thread real-time priority set successfully");
1380 }
1381 return result;
1382}
1383
1384/* ============================================================================
1385 * Audio Batch Packet Parsing
1386 * ============================================================================
1387 */
1388
1389asciichat_error_t audio_parse_batch_header(const void *data, size_t len, audio_batch_info_t *out_batch) {
1390 if (!data) {
1391 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch header data pointer is NULL");
1392 }
1393
1394 if (!out_batch) {
1395 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch info output pointer is NULL");
1396 }
1397
1398 if (len < sizeof(audio_batch_packet_t)) {
1399 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch header too small (len=%zu, expected=%zu)", len,
1400 sizeof(audio_batch_packet_t));
1401 }
1402
1403 const audio_batch_packet_t *batch_header = (const audio_batch_packet_t *)data;
1404
1405 // Unpack network byte order values to host byte order
1406 out_batch->batch_count = ntohl(batch_header->batch_count);
1407 out_batch->total_samples = ntohl(batch_header->total_samples);
1408 out_batch->sample_rate = ntohl(batch_header->sample_rate);
1409 out_batch->channels = ntohl(batch_header->channels);
1410
1411 return ASCIICHAT_OK;
1412}
1413
1415 if (!batch) {
1416 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch info pointer is NULL");
1417 }
1418
1419 // Validate batch_count
1420 if (batch->batch_count == 0) {
1421 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch count cannot be zero");
1422 }
1423
1424 // Check for reasonable max (256 frames per batch is very generous)
1425 if (batch->batch_count > 256) {
1426 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch count too large (batch_count=%u, max=256)", batch->batch_count);
1427 }
1428
1429 // Validate channels (1=mono, 2=stereo, max 8 for multi-channel)
1430 if (batch->channels == 0 || batch->channels > 8) {
1431 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid channel count (channels=%u, valid=1-8)", batch->channels);
1432 }
1433
1434 // Validate sample rate
1436 return SET_ERRNO(ERROR_INVALID_PARAM, "Unsupported sample rate (sample_rate=%u)", batch->sample_rate);
1437 }
1438
1439 // Check for reasonable sample counts
1440 if (batch->total_samples == 0) {
1441 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch has zero samples");
1442 }
1443
1444 // Each batch typically has samples_per_frame worth of samples
1445 // For 48kHz at 20ms per frame: 48000 * 0.02 = 960 samples per frame
1446 // With max 256 frames, that's up to ~245k samples per batch
1447 if (batch->total_samples > 1000000) {
1448 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch sample count suspiciously large (total_samples=%u)",
1449 batch->total_samples);
1450 }
1451
1452 return ASCIICHAT_OK;
1453}
1454
1456 // List of commonly supported audio sample rates
1457 static const uint32_t supported_rates[] = {
1458 8000, // Telephone quality
1459 16000, // Wideband telephony
1460 24000, // High quality speech
1461 32000, // Good for video
1462 44100, // CD quality (less common in VoIP)
1463 48000, // Standard professional
1464 96000, // High-end professional
1465 192000, // Ultra-high-end mastering
1466 };
1467
1468 const size_t rate_count = sizeof(supported_rates) / sizeof(supported_rates[0]);
1469 for (size_t i = 0; i < rate_count; i++) {
1470 if (sample_rate == supported_rates[i]) {
1471 return true;
1472 }
1473 }
1474
1475 return false;
1476}
⚠️‼️ Error and/or exit() when things go bad.
🗃️ Lock-Free Unified Memory Buffer Pool with Lazy Allocation
void client_audio_pipeline_process_duplex(client_audio_pipeline_t *pipeline, const float *render_samples, int render_count, const float *capture_samples, int capture_count, float *processed_output)
Process AEC3 inline in full-duplex callback.
Unified client-side audio processing pipeline.
🔄 Network byte order conversion helpers
#define NET_TO_HOST_U32(val)
Definition endian.h:86
size_t audio_ring_buffer_read(audio_ring_buffer_t *rb, float *data, size_t samples)
Read audio samples from ring buffer.
void audio_free_device_list(audio_device_info_t *devices)
Free device list allocated by audio_list_input_devices/audio_list_output_devices.
asciichat_error_t audio_ring_buffer_write(audio_ring_buffer_t *rb, const float *data, int samples)
Write audio samples to ring buffer.
#define AUDIO_SAMPLE_RATE
Audio sample rate (48kHz professional quality, Opus-compatible)
void audio_ring_buffer_destroy(audio_ring_buffer_t *rb)
Destroy an audio ring buffer.
asciichat_error_t audio_init(audio_context_t *ctx)
Initialize audio context and PortAudio.
asciichat_error_t audio_set_realtime_priority(void)
Request real-time priority for current thread.
audio_ring_buffer_t * audio_ring_buffer_create(void)
Create a new audio ring buffer (for playback with jitter buffering)
bool audio_is_supported_sample_rate(uint32_t sample_rate)
Check if a sample rate is a standard/supported rate.
#define AUDIO_FRAMES_PER_BUFFER
Audio frames per buffer (480 = 10ms at 48kHz, matches WebRTC AEC3 frame size)
size_t audio_ring_buffer_available_read(audio_ring_buffer_t *rb)
Get number of samples available for reading.
asciichat_error_t audio_start_duplex(audio_context_t *ctx)
Start full-duplex audio (simultaneous capture and playback)
asciichat_error_t audio_parse_batch_header(const void *data, size_t len, audio_batch_info_t *out_batch)
Parse an audio batch packet header from raw packet data.
void audio_destroy(audio_context_t *ctx)
Destroy audio context and clean up resources.
asciichat_error_t audio_stop_duplex(audio_context_t *ctx)
Stop full-duplex audio.
asciichat_error_t audio_write_samples(audio_context_t *ctx, const float *buffer, int num_samples)
Write audio samples to playback buffer.
#define AUDIO_CHANNELS
Number of audio channels (1 = mono)
void audio_set_pipeline(audio_context_t *ctx, void *pipeline)
Set audio pipeline for echo cancellation.
audio_ring_buffer_t * audio_ring_buffer_create_for_capture(void)
Create a new audio ring buffer for capture (without jitter buffering)
size_t audio_ring_buffer_peek(audio_ring_buffer_t *rb, float *data, size_t samples)
Peek at available samples without consuming them (for AEC3 render signal)
asciichat_error_t audio_validate_batch_params(const audio_batch_info_t *batch)
Validate audio batch parameters for sanity.
asciichat_error_t audio_list_input_devices(audio_device_info_t **out_devices, unsigned int *out_count)
List available audio input devices (microphones)
asciichat_error_t audio_dequantize_samples(const uint8_t *samples_ptr, uint32_t total_samples, float *out_samples)
Dequantize network audio samples from int32 to float.
asciichat_error_t audio_list_output_devices(audio_device_info_t **out_devices, unsigned int *out_count)
List available audio output devices (speakers)
void audio_ring_buffer_clear(audio_ring_buffer_t *rb)
Clear all audio samples from ring buffer.
asciichat_error_t audio_read_samples(audio_context_t *ctx, float *buffer, int num_samples)
Read captured audio samples from capture buffer.
void resample_linear(const float *src, size_t src_samples, float *dst, size_t dst_samples, double src_rate, double dst_rate)
Resample audio using linear interpolation.
size_t audio_ring_buffer_available_write(audio_ring_buffer_t *rb)
Get number of sample slots available for writing.
void buffer_pool_free(buffer_pool_t *pool, void *data, size_t size)
Free a buffer back to the pool (lock-free)
void * buffer_pool_alloc(buffer_pool_t *pool, size_t size)
Allocate a buffer from the pool (lock-free fast path)
unsigned int uint32_t
Definition common.h:58
#define SAFE_STRNCPY(dst, src, size)
Definition common.h:358
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_MEMSET(dest, dest_size, ch, count)
Definition common.h:389
#define SAFE_CALLOC(count, size, cast)
Definition common.h:218
unsigned char uint8_t
Definition common.h:56
#define SAFE_MEMCPY(dest, dest_size, src, count)
Definition common.h:388
#define SET_ERRNO(code, context_msg,...)
Set error code with custom context message and log it.
asciichat_error_t
Error and exit codes - unified status values (0-255)
Definition error_codes.h:46
@ ERROR_INVALID_STATE
@ ERROR_AUDIO
Definition error_codes.h:64
@ ERROR_MEMORY
Definition error_codes.h:53
@ ASCIICHAT_OK
Definition error_codes.h:48
@ ERROR_INVALID_PARAM
@ ERROR_BUFFER
Definition error_codes.h:96
@ ERROR_THREAD
Definition error_codes.h:95
#define LOG_RATE_FAST
Log rate limit: 1 second (1,000,000 microseconds)
Definition log_rates.h:26
#define log_warn(...)
Log a WARN message.
#define log_info_every(interval_us, fmt,...)
Rate-limited INFO logging.
#define log_debug_every(interval_us, fmt,...)
Rate-limited DEBUG logging.
#define log_info(...)
Log an INFO message.
#define log_debug(...)
Log a DEBUG message.
#define log_warn_every(interval_us, fmt,...)
Rate-limited WARN logging.
uint32_t channels
Number of audio channels (1=mono, 2=stereo)
Definition packet.h:804
uint32_t batch_count
Number of audio chunks in this batch (usually AUDIO_BATCH_COUNT = 32)
Definition packet.h:798
uint32_t sample_rate
Sample rate in Hz (e.g., 44100, 48000)
Definition packet.h:802
uint32_t total_samples
Total audio samples across all chunks (typically 8192)
Definition packet.h:800
#define GET_OPTION(field)
Safely get a specific option field (lock-free read)
Definition options.h:644
int mutex_init(mutex_t *mutex)
Initialize a mutex.
asciichat_error_t asciichat_thread_set_realtime_priority(void)
Set the current thread to real-time priority.
#define mutex_lock(mutex)
Lock a mutex (with debug tracking in debug builds)
Definition mutex.h:140
#define STATIC_MUTEX_INIT
Definition init.h:107
int platform_open(const char *pathname, int flags,...)
Safe file open (open replacement)
#define mutex_unlock(mutex)
Unlock a mutex (with debug tracking in debug builds)
Definition mutex.h:175
int mutex_destroy(mutex_t *mutex)
Destroy a mutex.
struct audio_ring_buffer audio_ring_buffer_t
Audio ring buffer for real-time audio streaming.
#define AUDIO_JITTER_HIGH_WATER_MARK
High water mark - drop OLD samples when buffer exceeds this.
Definition ringbuffer.h:172
#define AUDIO_JITTER_BUFFER_THRESHOLD
Jitter buffer threshold - samples needed before starting playback.
Definition ringbuffer.h:148
#define AUDIO_RING_BUFFER_SIZE
Audio ring buffer size in samples (192000 samples = 4 seconds @ 48kHz)
Definition ringbuffer.h:135
#define AUDIO_JITTER_LOW_WATER_MARK
Low water mark - warn when available drops below this.
Definition ringbuffer.h:159
#define AUDIO_JITTER_TARGET_LEVEL
Target buffer level after dropping old samples.
Definition ringbuffer.h:182
#define AUDIO_CROSSFADE_SAMPLES
Crossfade duration in samples for smooth underrun recovery.
Definition ringbuffer.h:189
Platform initialization and static synchronization helpers.
🔊 Audio Capture and Playback Interface for ascii-chat
#define AUDIO_DEVICE_NAME_MAX
Maximum length of audio device name.
📝 Logging API with multiple log levels and terminal output control
🔢 Mathematical Utility Functions
⚙️ Command-line options parsing and configuration management for ascii-chat
Packet protocol implementation with encryption and compression support.
Parsed audio batch packet header information.
uint32_t batch_count
Number of audio frames in this batch.
uint32_t channels
Number of channels (1=mono, 2=stereo)
uint32_t sample_rate
Sample rate in Hz (e.g., 48000)
uint32_t total_samples
Total number of samples across all frames.
Audio batch packet structure (Packet Type 28)
Definition packet.h:796
Audio context for full-duplex capture and playback.
bool initialized
True if context has been initialized.
double input_device_rate
Native sample rate of input device.
audio_ring_buffer_t * playback_buffer
Ring buffer for decoded audio from network.
PaStream * input_stream
Separate input stream (when full-duplex unavailable)
bool separate_streams
True if using separate input/output streams.
double sample_rate
Actual sample rate of streams (48kHz)
audio_ring_buffer_t * capture_buffer
Ring buffer for processed capture (after AEC3) for encoder thread.
void * audio_pipeline
Client audio pipeline for AEC3 echo cancellation (opaque pointer)
bool running
True if duplex stream is active.
audio_ring_buffer_t * render_buffer
Ring buffer for render reference (separate streams mode)
double output_device_rate
Native sample rate of output device.
mutex_t state_mutex
Mutex protecting context state.
_Atomic bool shutting_down
True when shutdown started - callback outputs silence.
PaStream * duplex_stream
PortAudio full-duplex stream (simultaneous input+output)
PaStream * output_stream
Separate output stream (when full-duplex unavailable)
Audio device information structure.
int max_output_channels
Maximum output channels (0 if input only)
int max_input_channels
Maximum input channels (0 if output only)
int index
PortAudio device index.
double default_sample_rate
Default sample rate in Hz.
bool is_default_output
True if this is the default output device.
bool is_default_input
True if this is the default input device.
Audio ring buffer for real-time audio streaming.
Definition ringbuffer.h:208
atomic_uint read_index
Read index (consumer position) - LOCK-FREE with atomic operations.
Definition ringbuffer.h:214
float last_sample
Last sample value for smooth fade-out during underrun - NOT atomic (only written by reader)
Definition ringbuffer.h:222
atomic_bool jitter_buffer_filled
True after initial jitter buffer fill.
Definition ringbuffer.h:216
atomic_uint underrun_count
Count of underrun events for diagnostics.
Definition ringbuffer.h:224
atomic_int crossfade_samples_remaining
Samples remaining in crossfade (0 = no crossfade active)
Definition ringbuffer.h:218
mutex_t mutex
Mutex for SLOW PATH only (clear/destroy operations, not regular read/write)
Definition ringbuffer.h:228
bool jitter_buffer_enabled
Whether jitter buffering is enabled (false for capture, true for playback)
Definition ringbuffer.h:226
atomic_uint write_index
Write index (producer position) - LOCK-FREE with atomic operations.
Definition ringbuffer.h:212
float data[192000]
Audio sample data buffer.
Definition ringbuffer.h:210
atomic_bool crossfade_fade_in
True if we're fading in (recovering from underrun)
Definition ringbuffer.h:220
Client audio pipeline state.
Static mutex structure for global mutexes requiring static initialization.
Definition init.h:40