ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
lib/audio/audio.c
Go to the documentation of this file.
1
8#include <ascii-chat/audio/audio.h>
9#include <ascii-chat/audio/client_audio_pipeline.h>
10#include <ascii-chat/util/endian.h>
11#include <ascii-chat/common.h>
12#include <ascii-chat/util/endian.h>
13#include <ascii-chat/util/time.h> // For START_TIMER/STOP_TIMER macros
14#include <ascii-chat/asciichat_errno.h> // For asciichat_errno system
15#include <ascii-chat/buffer_pool.h>
16#include <ascii-chat/options/options.h>
17#include <ascii-chat/platform/init.h> // For static_mutex_t
18#include <ascii-chat/platform/abstraction.h> // For platform_sleep_us
19#include <ascii-chat/network/packet.h> // For audio_batch_packet_t
20#include <ascii-chat/log/logging.h> // For log_* macros
21#include <ascii-chat/media/source.h> // For media_source_read_audio()
22#include <stdlib.h>
23#include <string.h>
24#include <math.h>
25#include <stdatomic.h>
26
27#ifdef _WIN32
28#include <malloc.h> // For _alloca on Windows
29#define alloca _alloca
30#else
31#include <unistd.h> // For dup, dup2, close, STDERR_FILENO
32#include <fcntl.h> // For O_WRONLY
33#endif
34
35// PortAudio initialization reference counter
36// Tracks how many audio contexts are using PortAudio to avoid conflicts
37static unsigned int g_pa_init_refcount = 0;
38static static_mutex_t g_pa_refcount_mutex = STATIC_MUTEX_INIT;
39
40// Track how many times Pa_Initialize and Pa_Terminate are called
41static int g_pa_init_count = 0;
42static int g_pa_terminate_count = 0;
43
53static asciichat_error_t audio_ensure_portaudio_initialized(void) {
54 static_mutex_lock(&g_pa_refcount_mutex);
55
56 // If already initialized, just increment refcount
57 if (g_pa_init_refcount > 0) {
58 g_pa_init_refcount++;
59 static_mutex_unlock(&g_pa_refcount_mutex);
60 return ASCIICHAT_OK;
61 }
62
63 // First initialization - call Pa_Initialize() exactly once
64 // Suppress PortAudio backend probe errors (ALSA/JACK/OSS warnings)
65 // These are harmless - PortAudio tries multiple backends until one works
66 fflush(stderr);
67 fflush(stdout);
68 platform_stderr_redirect_handle_t stdio_handle = platform_stdout_stderr_redirect_to_null();
69
70 g_pa_init_count++;
71 PaError err = Pa_Initialize();
72
73 // Restore stdout and stderr before checking errors
74 platform_stdout_stderr_restore(stdio_handle);
75
76 if (err != paNoError) {
77 static_mutex_unlock(&g_pa_refcount_mutex);
78 return SET_ERRNO(ERROR_AUDIO, "Failed to initialize PortAudio: %s", Pa_GetErrorText(err));
79 }
80
81 g_pa_init_refcount = 1;
82 static_mutex_unlock(&g_pa_refcount_mutex);
83
84 return ASCIICHAT_OK;
85}
86
95static void audio_release_portaudio(void) {
96 static_mutex_lock(&g_pa_refcount_mutex);
97
98 if (g_pa_init_refcount > 0) {
99 g_pa_init_refcount--;
100 log_debug("PortAudio refcount decremented to %u", g_pa_init_refcount);
101 } else {
102 log_warn("audio_release_portaudio() called but refcount is already 0");
103 }
104
105 static_mutex_unlock(&g_pa_refcount_mutex);
106}
107
115 static_mutex_lock(&g_pa_refcount_mutex);
116
117 if (g_pa_init_refcount == 0 && g_pa_init_count > 0 && g_pa_terminate_count == 0) {
118 log_debug("[PORTAUDIO_TERM] Calling Pa_Terminate() to release PortAudio");
119
120 PaError err = Pa_Terminate();
121 g_pa_terminate_count++;
122
123 log_debug("[PORTAUDIO_TERM] Pa_Terminate() returned: %s", Pa_GetErrorText(err));
124 }
125
126 static_mutex_unlock(&g_pa_refcount_mutex);
127}
128
129// Worker thread batch size (in frames, not samples)
130// Reduced from 480 (10ms) to 128 (2.7ms) for lower latency and less jitter
131// Smaller batches mean more frequent processing, reducing audio gaps
132#define WORKER_BATCH_FRAMES 128
133#define WORKER_BATCH_SAMPLES (WORKER_BATCH_FRAMES * AUDIO_CHANNELS)
134#define WORKER_TIMEOUT_MS 1 // Wake up every 1ms to keep up with 48kHz playback (was 3ms)
135
161static void *audio_worker_thread(void *arg) {
162 audio_context_t *ctx = (audio_context_t *)arg;
163 log_debug("Audio worker thread started (batch size: %d frames = %d samples)", WORKER_BATCH_FRAMES,
165
166 // Check for AEC3 bypass (static, checked once)
167 static int bypass_aec3_worker = -1;
168 if (bypass_aec3_worker == -1) {
169 const char *env = platform_getenv("BYPASS_AEC3");
170 bypass_aec3_worker = (env && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0)) ? 1 : 0;
171 if (bypass_aec3_worker) {
172 log_warn("Worker thread: AEC3 BYPASSED via BYPASS_AEC3=1 (worker will skip AEC3 processing)");
173 }
174 }
175
176 // Timing instrumentation for debugging
177 static uint64_t loop_count = 0;
178 static uint64_t timeout_count = 0;
179 static uint64_t signal_count = 0;
180 static uint64_t process_count = 0;
181
182 // Detailed timing stats
183 static double total_wait_ns = 0;
184 static double total_capture_ns = 0;
185 static double total_playback_ns = 0;
186 static double max_wait_ns = 0;
187 static double max_capture_ns = 0;
188 static double max_playback_ns = 0;
189
190 while (true) {
191 loop_count++;
192 START_TIMER("worker_loop_iteration");
193
194 // For output-only mode, don't wait for signal - just write continuously
195 // For duplex/input modes, wait for signal from callback
196 bool is_output_only = ctx->output_stream && !ctx->input_stream && !ctx->duplex_stream;
197
198 int wait_result = 1; // Default: timeout (will trigger processing for output-only)
199 if (!is_output_only) {
200 // Wait for signal from callback or timeout
201 mutex_lock(&ctx->worker_mutex);
202 START_TIMER("worker_cond_wait");
203 wait_result = cond_timedwait(&ctx->worker_cond, &ctx->worker_mutex, WORKER_TIMEOUT_MS * NS_PER_MS_INT);
204 double wait_time_ns = STOP_TIMER("worker_cond_wait");
205 mutex_unlock(&ctx->worker_mutex);
206
207 total_wait_ns += wait_time_ns;
208 if (wait_time_ns > max_wait_ns)
209 max_wait_ns = wait_time_ns;
210 } else {
211 // Output-only mode: no wait needed, callback handles audio delivery
212 // Just sleep briefly to avoid busy-looping
213 Pa_Sleep(5);
214 wait_result = 1;
215 }
216
217 // Check shutdown flag
218 if (atomic_load(&ctx->worker_should_stop)) {
219 log_debug("Worker thread received shutdown signal");
220 break;
221 }
222
223 // Count wake-ups
224 if (wait_result == 0) {
225 signal_count++;
226 } else {
227 timeout_count++;
228 }
229
230 // Skip processing if we timed out and no data available
231 size_t capture_available = audio_ring_buffer_available_read(ctx->raw_capture_rb);
232 size_t render_available = audio_ring_buffer_available_read(ctx->raw_render_rb);
233 size_t playback_available = audio_ring_buffer_available_read(ctx->playback_buffer);
234
235 // Log worker loop state every 100 iterations with detailed timing
236 if (loop_count % 100 == 0) {
237 char avg_wait_str[32], max_wait_str[32];
238 char avg_capture_str[32], max_capture_str[32];
239 char avg_playback_str[32], max_playback_str[32];
240 format_duration_ns(total_wait_ns / loop_count, avg_wait_str, sizeof(avg_wait_str));
241 format_duration_ns(max_wait_ns, max_wait_str, sizeof(max_wait_str));
242 format_duration_ns(total_capture_ns / (process_count > 0 ? process_count : 1), avg_capture_str,
243 sizeof(avg_capture_str));
244 format_duration_ns(max_capture_ns, max_capture_str, sizeof(max_capture_str));
245 format_duration_ns(total_playback_ns / (process_count > 0 ? process_count : 1), avg_playback_str,
246 sizeof(avg_playback_str));
247 format_duration_ns(max_playback_ns, max_playback_str, sizeof(max_playback_str));
248
249 log_info("Worker stats: loops=%lu, signals=%lu, timeouts=%lu, processed=%lu", loop_count, signal_count,
250 timeout_count, process_count);
251 log_info("Worker timing: wait avg=%s max=%s, capture avg=%s max=%s, playback avg=%s max=%s", avg_wait_str,
252 max_wait_str, avg_capture_str, max_capture_str, avg_playback_str, max_playback_str);
253 log_info("Worker buffers: capture=%zu, render=%zu, playback=%zu (need >= %d to process)", capture_available,
254 render_available, playback_available, WORKER_BATCH_SAMPLES);
255 }
256
257 if (wait_result != 0 && capture_available == 0 && playback_available == 0) {
258 // Timeout with no data - continue waiting
259 STOP_TIMER("worker_loop_iteration"); // Must stop before loop repeats
260 continue;
261 }
262
263 process_count++;
264
265 // STEP 1: Process capture path (mic → AEC3 → encoder)
266 // Process capture samples if available (don't wait for full batch - reduces latency)
267 // Minimum: 64 samples (1.3ms @ 48kHz) to avoid excessive overhead
268 const size_t MIN_PROCESS_SAMPLES = 64;
269 if (capture_available >= MIN_PROCESS_SAMPLES) {
270 START_TIMER("worker_capture_processing");
271
272 // Read up to WORKER_BATCH_SAMPLES, but process whatever is available
273 size_t samples_to_process = (capture_available > WORKER_BATCH_SAMPLES) ? WORKER_BATCH_SAMPLES : capture_available;
274 // Read raw capture samples from callbacks (variable size batch)
275 size_t capture_read = audio_ring_buffer_read(ctx->raw_capture_rb, ctx->worker_capture_batch, samples_to_process);
276
277 if (capture_read > 0) {
278 // For AEC3, we need render samples too. If not available, skip AEC3 but still process capture.
279 if (!bypass_aec3_worker && ctx->audio_pipeline && render_available >= capture_read) {
280 // Read render samples for AEC3 (match capture size)
281 size_t render_read = audio_ring_buffer_read(ctx->raw_render_rb, ctx->worker_render_batch, capture_read);
282
283 if (render_read > 0) {
284 // Measure AEC3 processing time
285 uint64_t aec3_start_ns = time_get_ns();
286
287 // AEC3 processing - should be fast enough for real-time on Pi 5
288 // Output is written back to worker_capture_batch (in-place processing)
289 client_audio_pipeline_process_duplex(ctx->audio_pipeline, ctx->worker_render_batch, (int)render_read,
290 ctx->worker_capture_batch, (int)capture_read,
291 ctx->worker_capture_batch); // Process in-place
292
293 long aec3_ns = (long)time_elapsed_ns(aec3_start_ns, time_get_ns());
294
295 // Log AEC3 timing periodically
296 static int aec3_count = 0;
297 static long aec3_total_ns = 0;
298 static long aec3_max_ns = 0;
299 aec3_count++;
300 aec3_total_ns += aec3_ns;
301 if (aec3_ns > aec3_max_ns)
302 aec3_max_ns = aec3_ns;
303
304 if (aec3_count % 100 == 0) {
305 long avg_ns = aec3_total_ns / aec3_count;
306 log_info("AEC3 performance: avg=%.2fms, max=%.2fms, latest=%.2fms (samples=%zu, %d calls)",
307 avg_ns / NS_PER_MS, aec3_max_ns / NS_PER_MS, aec3_ns / NS_PER_MS, capture_read, aec3_count);
308 }
309 }
310 }
311
312 // TODO: Add optional resampling here if input device rate != 48kHz
313 // For now, assume 48kHz (most common for professional audio)
314
315 // Apply microphone sensitivity (volume control)
316 float mic_sensitivity = GET_OPTION(microphone_sensitivity);
317 if (mic_sensitivity != 1.0f) {
318 // Clamp to valid range [0.0, 1.0]
319 if (mic_sensitivity < 0.0f)
320 mic_sensitivity = 0.0f;
321 if (mic_sensitivity > 1.0f)
322 mic_sensitivity = 1.0f;
323
324 for (size_t i = 0; i < capture_read; i++) {
325 ctx->worker_capture_batch[i] *= mic_sensitivity;
326 }
327 }
328
329 // Write processed capture to encoder buffer
330 audio_ring_buffer_write(ctx->capture_buffer, ctx->worker_capture_batch, (int)capture_read);
331
332 log_debug_every(NS_PER_MS_INT, "Worker processed %zu capture samples (AEC3 %s)", capture_read,
333 bypass_aec3_worker
334 ? "BYPASSED"
335 : (render_available >= WORKER_BATCH_SAMPLES ? "applied" : "skipped-no-render"));
336 }
337
338 double capture_time_ns = STOP_TIMER("worker_capture_processing");
339 total_capture_ns += capture_time_ns;
340 if (capture_time_ns > max_capture_ns)
341 max_capture_ns = capture_time_ns;
342 }
343
344 // STEP 2: Output/Playback handling
345 // With output_callback registered, PortAudio handles calling it automatically
346 // No need for worker thread to manually write data via Pa_WriteStream
347 (void)playback_available; // Suppress unused variable warning if not used in this build
348
349 // Log overall loop iteration time
350 double loop_time_ns = STOP_TIMER("worker_loop_iteration");
351 static double total_loop_ns = 0;
352 static double max_loop_ns = 0;
353 total_loop_ns += loop_time_ns;
354 if (loop_time_ns > max_loop_ns)
355 max_loop_ns = loop_time_ns;
356
357 if (loop_count % 100 == 0) {
358 char avg_loop_str[32], max_loop_str[32];
359 format_duration_ns(total_loop_ns / loop_count, avg_loop_str, sizeof(avg_loop_str));
360 format_duration_ns(max_loop_ns, max_loop_str, sizeof(max_loop_str));
361 log_info("Worker loop timing: avg=%s max=%s", avg_loop_str, max_loop_str);
362 }
363 }
364
365 log_debug("Audio worker thread exiting");
366 return NULL;
367}
368
383static int duplex_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
384 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags,
385 void *userData) {
386 (void)timeInfo;
387
388 static uint64_t duplex_invoke_count = 0;
389 duplex_invoke_count++;
390 if (duplex_invoke_count == 1) {
391 log_warn("!!! DUPLEX_CALLBACK INVOKED FOR FIRST TIME !!!");
392 }
393
394 START_TIMER("duplex_callback");
395
396 static uint64_t total_callbacks = 0;
397 total_callbacks++;
398 if (total_callbacks == 1) {
399 log_warn("FIRST CALLBACK RECEIVED! total=%llu frames=%lu", (unsigned long long)total_callbacks, framesPerBuffer);
400 }
401
402 audio_context_t *ctx = (audio_context_t *)userData;
403 if (!ctx) {
404 SET_ERRNO(ERROR_INVALID_PARAM, "duplex_callback: ctx is NULL");
405 return paAbort;
406 }
407
408 log_info_every(100 * NS_PER_MS_INT, "CB_START: ctx=%p output=%p inputBuffer=%p", (void *)ctx, (void *)outputBuffer,
409 inputBuffer);
410
411 const float *input = (const float *)inputBuffer;
412 float *output = (float *)outputBuffer;
413 size_t num_samples = framesPerBuffer * AUDIO_CHANNELS;
414
415 static uint64_t audio_callback_debug_count = 0;
416 audio_callback_debug_count++;
417 if (audio_callback_debug_count <= 10 || audio_callback_debug_count % 100 == 0) {
418 log_info("AUDIO_CALLBACK #%lu: frames=%lu samples=%zu media_source=%p", audio_callback_debug_count, framesPerBuffer,
419 num_samples, (void *)ctx->media_source);
420 }
421
422 // Silence on shutdown
423 if (atomic_load(&ctx->shutting_down)) {
424 if (output) {
425 SAFE_MEMSET(output, num_samples * sizeof(float), 0, num_samples * sizeof(float));
426 }
427 STOP_TIMER("duplex_callback");
428 return paContinue;
429 }
430
431 // Log status flags (rate-limited to avoid spam)
432 if (statusFlags != 0) {
433 if (statusFlags & paOutputUnderflow) {
434 log_warn_every(LOG_RATE_FAST, "PortAudio output underflow");
435 }
436 if (statusFlags & paInputOverflow) {
437 log_warn_every(LOG_RATE_FAST, "PortAudio input overflow");
438 }
439 }
440
441 // Static counters for playback tracking (used in logging below)
442 static uint64_t total_samples_read_local = 0;
443 static uint64_t underrun_count_local = 0;
444
445 // STEP 1: Read playback from media source (mirror mode) or network buffer
446 if (output) {
447 size_t samples_read = 0;
448
449 // For mirror mode with media file: read audio directly from media source
450 // This bypasses buffering and provides audio at the exact sample rate PortAudio needs
451 if (ctx->media_source) {
452 samples_read = media_source_read_audio((void *)ctx->media_source, output, num_samples);
453
454 static uint64_t cb_count = 0;
455 cb_count++;
456 if (cb_count <= 5 || cb_count % 500 == 0) {
457 log_info("Callback #%lu: media_source path, read %zu samples", cb_count, samples_read);
458 }
459 } else if (ctx->playback_buffer) {
460 // Network mode: read from playback buffer with jitter buffering logic
461 samples_read = audio_ring_buffer_read(ctx->playback_buffer, output, num_samples);
462
463 static uint64_t playback_count = 0;
464 playback_count++;
465 if (playback_count <= 5 || playback_count % 500 == 0) {
466 log_info("Callback #%lu: playback_buffer path, read %zu samples", playback_count, samples_read);
467 }
468 } else {
469 static uint64_t null_count = 0;
470 if (++null_count == 1) {
471 log_warn("Callback: BOTH media_source AND playback_buffer are NULL!");
472 }
473 }
474
475 total_samples_read_local += samples_read;
476
477 if (samples_read < num_samples) {
478 // Fill remaining with silence if underrun
479 SAFE_MEMSET(output + samples_read, (num_samples - samples_read) * sizeof(float), 0,
480 (num_samples - samples_read) * sizeof(float));
481 if (ctx->media_source) {
482 log_debug_every(5 * NS_PER_MS_INT, "Media playback: got %zu/%zu samples", samples_read, num_samples);
483 } else {
484 log_debug_every(NS_PER_MS_INT, "Network playback underrun: got %zu/%zu samples", samples_read, num_samples);
485 underrun_count_local++;
486 }
487 }
488
489 // Apply speaker volume control
490 if (samples_read > 0) {
491 float speaker_volume = GET_OPTION(speakers_volume);
492 // Clamp to valid range [0.0, 1.0]
493 if (speaker_volume < 0.0f) {
494 speaker_volume = 0.0f;
495 } else if (speaker_volume > 1.0f) {
496 speaker_volume = 1.0f;
497 }
498 // Apply volume scaling if not at 100%
499 if (speaker_volume != 1.0f) {
500 log_debug_every(48000, "Applying audio volume %.1f%% to %zu samples", speaker_volume * 100.0, samples_read);
501 for (size_t i = 0; i < samples_read; i++) {
502 output[i] *= speaker_volume;
503 }
504 } else {
505 log_debug_every(48000, "Audio at 100%% volume, no scaling needed");
506 }
507 }
508 }
509
510 // STEP 2: Copy raw mic samples → worker for AEC3 processing (~0.5ms)
511 // Skip microphone capture in playback-only mode (mirror with file/URL audio)
512 // When files (--file) or URLs (--url) are being played, microphone input is completely disabled
513 // to prevent feedback loops and interference with playback audio
514 if (!ctx->playback_only && input && ctx->raw_capture_rb) {
515 audio_ring_buffer_write(ctx->raw_capture_rb, input, (int)num_samples);
516 } else if (ctx->playback_only && input) {
517 // Explicitly discard microphone input when in playback-only mode
518 // This ensures complete isolation between microphone and media playback
519 (void)input; // Suppress unused parameter warning
520 }
521
522 // STEP 3: Copy raw speaker samples → worker for AEC3 reference (~0.5ms)
523 // This is CRITICAL for AEC3 - worker needs exact render signal at same time as capture
524 // In playback-only mode, we still write the render reference for consistency
525 if (output && ctx->raw_render_rb) {
526 audio_ring_buffer_write(ctx->raw_render_rb, output, (int)num_samples);
527 }
528
529 // STEP 4: Signal worker thread (non-blocking, ~0.1ms)
530 // Worker wakes up, processes batch, writes back to processed buffers
531 cond_signal(&ctx->worker_cond);
532
533 // Log callback timing and playback stats periodically
534 double callback_time_ns = STOP_TIMER("duplex_callback");
535 static double total_callback_ns = 0;
536 static double max_callback_ns = 0;
537 static uint64_t callback_count = 0;
538
539 callback_count++;
540 total_callback_ns += callback_time_ns;
541 if (callback_time_ns > max_callback_ns)
542 max_callback_ns = callback_time_ns;
543
544 if (callback_count % 500 == 0) { // Log every ~10 seconds @ 48 FPS
545 char avg_str[32], max_str[32];
546 format_duration_ns(total_callback_ns / callback_count, avg_str, sizeof(avg_str));
547 format_duration_ns(max_callback_ns, max_str, sizeof(max_str));
548 log_info("Duplex callback timing: count=%lu, avg=%s, max=%s (budget: 2ms)", callback_count, avg_str, max_str);
549 log_info("Playback stats: total_samples_read=%lu, underruns=%lu, read_success_rate=%.1f%%",
550 total_samples_read_local, underrun_count_local,
551 100.0 * (double)(callback_count - underrun_count_local) / (double)callback_count);
552
553 // DEBUG: Log first few output samples to verify they're not zero
554 if (output && num_samples >= 4) {
555 log_info("Output sample check: first4=[%.4f, %.4f, %.4f, %.4f] (verifying audio is not silent)", output[0],
556 output[1], output[2], output[3]);
557 }
558 }
559
560 return paContinue;
561}
562
574void resample_linear(const float *src, size_t src_samples, float *dst, size_t dst_samples, double src_rate,
575 double dst_rate) {
576 if (src_samples == 0 || dst_samples == 0) {
577 SAFE_MEMSET(dst, dst_samples * sizeof(float), 0, dst_samples * sizeof(float));
578 return;
579 }
580
581 double ratio = src_rate / dst_rate;
582
583 for (size_t i = 0; i < dst_samples; i++) {
584 double src_pos = (double)i * ratio;
585 size_t idx0 = (size_t)src_pos;
586 size_t idx1 = idx0 + 1;
587 double frac = src_pos - (double)idx0;
588
589 // Clamp indices to valid range
590 if (idx0 >= src_samples)
591 idx0 = src_samples - 1;
592 if (idx1 >= src_samples)
593 idx1 = src_samples - 1;
594
595 // Linear interpolation
596 dst[i] = (float)((1.0 - frac) * src[idx0] + frac * src[idx1]);
597 }
598}
599
611static int output_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
612 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags,
613 void *userData) {
614 (void)inputBuffer;
615 (void)timeInfo;
616
617 static uint64_t output_cb_invoke_count = 0;
618 output_cb_invoke_count++;
619 if (output_cb_invoke_count == 1) {
620 log_warn("!!! OUTPUT_CALLBACK INVOKED FOR FIRST TIME !!!");
621 }
622
623 audio_context_t *ctx = (audio_context_t *)userData;
624 float *output = (float *)outputBuffer;
625 size_t num_samples = framesPerBuffer * AUDIO_CHANNELS;
626
627 static uint64_t output_cb_count = 0;
628 output_cb_count++;
629 if (output_cb_count == 1) {
630 log_warn("FIRST OUTPUT_CALLBACK! frames=%lu ctx->media_source=%p", framesPerBuffer, (void *)ctx->media_source);
631 }
632
633 // Silence on shutdown
634 if (atomic_load(&ctx->shutting_down)) {
635 if (output) {
636 SAFE_MEMSET(output, num_samples * sizeof(float), 0, num_samples * sizeof(float));
637 }
638 return paContinue;
639 }
640
641 if (statusFlags & paOutputUnderflow) {
642 log_warn_every(LOG_RATE_FAST, "PortAudio output underflow (separate stream)");
643 }
644
645 // STEP 1: Read audio source
646 size_t samples_read = 0;
647 if (output) {
648 if (ctx->media_source) {
649 // Mirror mode: read audio directly from media source
650 samples_read = media_source_read_audio((void *)ctx->media_source, output, num_samples);
651 if (output_cb_count <= 3) {
652 log_warn("OUTPUT_CB: media_source path, read %zu samples", samples_read);
653 }
654 } else if (ctx->processed_playback_rb) {
655 // Network mode: read from processed playback buffer (worker output)
656 samples_read = audio_ring_buffer_read(ctx->processed_playback_rb, output, num_samples);
657 if (output_cb_count <= 3) {
658 log_warn("OUTPUT_CB: processed_playback_rb path, read %zu samples", samples_read);
659 }
660 } else if (ctx->playback_buffer) {
661 // Fallback: read from playback buffer if available
662 samples_read = audio_ring_buffer_read(ctx->playback_buffer, output, num_samples);
663 if (output_cb_count <= 3) {
664 log_warn("OUTPUT_CB: playback_buffer path, read %zu samples", samples_read);
665 }
666 } else {
667 if (output_cb_count <= 3) {
668 log_warn("OUTPUT_CB: NO BUFFERS! media_source=%p processed_rb=%p playback_buf=%p", (void *)ctx->media_source,
669 (void *)ctx->processed_playback_rb, (void *)ctx->playback_buffer);
670 }
671 }
672
673 // Apply speaker volume control
674 if (samples_read > 0) {
675 float speaker_volume = GET_OPTION(speakers_volume);
676 // Clamp to valid range [0.0, 1.0]
677 if (speaker_volume < 0.0f) {
678 speaker_volume = 0.0f;
679 } else if (speaker_volume > 1.0f) {
680 speaker_volume = 1.0f;
681 }
682 // Apply volume scaling if not at 100%
683 if (speaker_volume != 1.0f) {
684 log_debug_every(48000, "OUTPUT_CALLBACK: Applying volume %.0f%% to %zu samples", speaker_volume * 100.0,
685 samples_read);
686 for (size_t i = 0; i < samples_read; i++) {
687 output[i] *= speaker_volume;
688 }
689 }
690 }
691
692 // Fill remaining with silence if underrun
693 if (samples_read < num_samples) {
694 SAFE_MEMSET(output + samples_read, (num_samples - samples_read) * sizeof(float), 0,
695 (num_samples - samples_read) * sizeof(float));
696 }
697
698 // STEP 2: Copy to render buffer for input callback (AEC3 reference)
699 if (ctx->render_buffer && samples_read > 0) {
700 audio_ring_buffer_write(ctx->render_buffer, output, (int)samples_read);
701 }
702 }
703
704 // STEP 3: Signal worker
705 cond_signal(&ctx->worker_cond);
706
707 return paContinue;
708}
709
723static int input_callback(const void *inputBuffer, void *outputBuffer, unsigned long framesPerBuffer,
724 const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData) {
725 (void)outputBuffer;
726 (void)timeInfo;
727
728 static uint64_t input_invoke_count = 0;
729 input_invoke_count++;
730 if (input_invoke_count == 1) {
731 log_warn("!!! INPUT_CALLBACK INVOKED FOR FIRST TIME !!!");
732 }
733
734 audio_context_t *ctx = (audio_context_t *)userData;
735 const float *input = (const float *)inputBuffer;
736 size_t num_samples = framesPerBuffer * AUDIO_CHANNELS;
737
738 // Track callback frequency
739 static uint64_t callback_count = 0;
740 static uint64_t last_log_time_ns = 0;
741 callback_count++;
742
743 uint64_t now_ns = time_get_ns();
744
745 if (last_log_time_ns == 0) {
746 last_log_time_ns = now_ns;
747 } else {
748 long elapsed_ms = (long)time_ns_to_ms(time_elapsed_ns(last_log_time_ns, now_ns));
749 if (elapsed_ms >= 1000) {
750 log_info("Input callback: %lu calls/sec, %lu frames/call, %zu samples/call", callback_count, framesPerBuffer,
751 num_samples);
752 callback_count = 0;
753 last_log_time_ns = now_ns;
754 }
755 }
756
757 // Silence on shutdown
758 if (atomic_load(&ctx->shutting_down)) {
759 return paContinue;
760 }
761
762 if (statusFlags & paInputOverflow) {
763 log_warn_every(LOG_RATE_FAST, "PortAudio input overflow (separate stream)");
764 }
765
766 // STEP 1: Copy raw mic samples → worker for AEC3 processing
767 // Skip microphone capture in playback-only mode (mirror)
768 if (!ctx->playback_only && input && ctx->raw_capture_rb) {
769 audio_ring_buffer_write(ctx->raw_capture_rb, input, (int)num_samples);
770 }
771
772 // STEP 2: Read render reference from render_buffer and copy to worker
773 // (render_buffer is written by output_callback, read here for synchronization)
774 if (ctx->render_buffer && ctx->raw_render_rb) {
775 // Worker will handle the AEC3 processing using this render reference
776 size_t render_available = audio_ring_buffer_available_read(ctx->render_buffer);
777 if (render_available >= num_samples) {
778 // Read exactly what we need
779 float render_temp[AUDIO_BUFFER_SIZE]; // Stack allocation OK - fixed small size
780 size_t render_read = audio_ring_buffer_read(ctx->render_buffer, render_temp, num_samples);
781 if (render_read > 0) {
782 audio_ring_buffer_write(ctx->raw_render_rb, render_temp, (int)render_read);
783 }
784 }
785 }
786
787 // STEP 3: Signal worker thread
788 cond_signal(&ctx->worker_cond);
789
790 return paContinue;
791}
792
793// Forward declaration for internal helper function
794static audio_ring_buffer_t *audio_ring_buffer_create_internal(bool jitter_buffer_enabled);
795
796static audio_ring_buffer_t *audio_ring_buffer_create_internal(bool jitter_buffer_enabled) {
797 size_t rb_size = sizeof(audio_ring_buffer_t);
798 audio_ring_buffer_t *rb = (audio_ring_buffer_t *)buffer_pool_alloc(NULL, rb_size);
799
800 if (!rb) {
801 SET_ERRNO(ERROR_MEMORY, "Failed to allocate audio ring buffer from buffer pool");
802 return NULL;
803 }
804
805 SAFE_MEMSET(rb->data, sizeof(rb->data), 0, sizeof(rb->data));
806 rb->write_index = 0;
807 rb->read_index = 0;
808 // For capture buffers (jitter_buffer_enabled=false), mark as already filled to bypass jitter logic
809 // For playback buffers (jitter_buffer_enabled=true), start unfilled to wait for threshold
810 rb->jitter_buffer_filled = !jitter_buffer_enabled;
811 rb->crossfade_samples_remaining = 0;
812 rb->crossfade_fade_in = false;
813 rb->last_sample = 0.0f;
814 rb->underrun_count = 0;
815 rb->jitter_buffer_enabled = jitter_buffer_enabled;
816
817 if (mutex_init(&rb->mutex) != 0) {
818 SET_ERRNO(ERROR_THREAD, "Failed to initialize audio ring buffer mutex");
819 buffer_pool_free(NULL, rb, sizeof(audio_ring_buffer_t));
820 return NULL;
821 }
822
823 return rb;
824}
825
826audio_ring_buffer_t *audio_ring_buffer_create(void) {
827 return audio_ring_buffer_create_internal(true); // Default: enable jitter buffering for playback
828}
829
830audio_ring_buffer_t *audio_ring_buffer_create_for_capture(void) {
831 return audio_ring_buffer_create_internal(false); // Disable jitter buffering for capture
832}
833
834void audio_ring_buffer_destroy(audio_ring_buffer_t *rb) {
835 if (!rb)
836 return;
837
838 mutex_destroy(&rb->mutex);
839 buffer_pool_free(NULL, rb, sizeof(audio_ring_buffer_t));
840}
841
842void audio_ring_buffer_clear(audio_ring_buffer_t *rb) {
843 if (!rb)
844 return;
845
846 mutex_lock(&rb->mutex);
847 // Reset buffer to empty state (no audio to play = silence at shutdown)
848 rb->write_index = 0;
849 rb->read_index = 0;
850 rb->last_sample = 0.0f;
851 // Clear the actual data to zeros to prevent any stale audio
852 SAFE_MEMSET(rb->data, sizeof(rb->data), 0, sizeof(rb->data));
853 mutex_unlock(&rb->mutex);
854}
855
856asciichat_error_t audio_ring_buffer_write(audio_ring_buffer_t *rb, const float *data, int samples) {
857 if (!rb || !data || samples <= 0)
858 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb=%p, data=%p, samples=%d", rb, data, samples);
859
860 // Validate samples doesn't exceed our buffer size
861 if (samples > AUDIO_RING_BUFFER_SIZE) {
862 return SET_ERRNO(ERROR_BUFFER, "Attempted to write %d samples, but buffer size is only %d", samples,
863 AUDIO_RING_BUFFER_SIZE);
864 }
865
866 // LOCK-FREE: Load indices with proper memory ordering
867 // - Load our own write_index with relaxed (no sync needed with ourselves)
868 // - Load reader's read_index with acquire (see reader's updates to free space)
869 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_relaxed);
870 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_acquire);
871
872 // Calculate current buffer level (how many samples are buffered)
873 int buffer_level;
874 if (write_idx >= read_idx) {
875 buffer_level = (int)(write_idx - read_idx);
876 } else {
877 buffer_level = AUDIO_RING_BUFFER_SIZE - (int)(read_idx - write_idx);
878 }
879 // Reserve 1 slot to distinguish between full and empty states
880 // (when buffer is full, write_idx will be just before read_idx, not equal to it)
881 int available = AUDIO_RING_BUFFER_SIZE - 1 - buffer_level;
882
883 // HIGH WATER MARK: Drop INCOMING samples to prevent latency accumulation
884 // Writer must not modify read_index (race condition with reader).
885 // Instead, we drop incoming samples to keep buffer bounded.
886 // This sacrifices newest data to prevent unbounded latency growth.
887 if (buffer_level > AUDIO_JITTER_HIGH_WATER_MARK) {
888 // Buffer is already too full - drop incoming samples to maintain target level
889 int target_writes = AUDIO_JITTER_TARGET_LEVEL - buffer_level;
890 if (target_writes < 0) {
891 target_writes = 0; // Buffer is way over - drop everything
892 }
893
894 if (samples > target_writes) {
895 int dropped = samples - target_writes;
896 log_warn_every(LOG_RATE_FAST,
897 "Audio buffer high water mark exceeded (%d > %d): dropping %d INCOMING samples "
898 "(keeping newest %d to maintain target %d)",
899 buffer_level, AUDIO_JITTER_HIGH_WATER_MARK, dropped, target_writes, AUDIO_JITTER_TARGET_LEVEL);
900 samples = target_writes; // Only write what fits within target level
901 }
902 }
903
904 // Now write the new samples - should always have enough space after above
905 int samples_to_write = samples;
906 if (samples > available) {
907 // This should rarely happen after the high water mark logic above
908 int samples_dropped = samples - available;
909 samples_to_write = available;
910 log_warn_every(LOG_RATE_FAST, "Audio buffer overflow: dropping %d of %d incoming samples (buffer_used=%d/%d)",
911 samples_dropped, samples, AUDIO_RING_BUFFER_SIZE - available, AUDIO_RING_BUFFER_SIZE);
912 }
913
914 // Write only the samples that fit (preserves existing data integrity)
915 if (samples_to_write > 0) {
916 int remaining = AUDIO_RING_BUFFER_SIZE - (int)write_idx;
917
918 if (samples_to_write <= remaining) {
919 // Can copy in one chunk
920 SAFE_MEMCPY(&rb->data[write_idx], samples_to_write * sizeof(float), data, samples_to_write * sizeof(float));
921 } else {
922 // Need to wrap around - copy in two chunks
923 SAFE_MEMCPY(&rb->data[write_idx], remaining * sizeof(float), data, remaining * sizeof(float));
924 SAFE_MEMCPY(&rb->data[0], (samples_to_write - remaining) * sizeof(float), &data[remaining],
925 (samples_to_write - remaining) * sizeof(float));
926 }
927
928 // LOCK-FREE: Store new write_index with release ordering
929 // This ensures all data writes above are visible before the index update
930 unsigned int new_write_idx = (write_idx + (unsigned int)samples_to_write) % AUDIO_RING_BUFFER_SIZE;
931 atomic_store_explicit(&rb->write_index, new_write_idx, memory_order_release);
932 }
933
934 // Note: jitter buffer fill check is now done in read function for better control
935
936 return ASCIICHAT_OK; // Success
937}
938
939size_t audio_ring_buffer_read(audio_ring_buffer_t *rb, float *data, size_t samples) {
940 if (!rb || !data || samples <= 0) {
941 SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb=%p, data=%p, samples=%d", rb, data, samples);
942 return 0; // Return 0 samples read on error
943 }
944
945 // LOCK-FREE: Load indices with proper memory ordering
946 // - Load writer's write_index with acquire (see writer's data updates)
947 // - Load our own read_index with relaxed (no sync needed with ourselves)
948 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
949 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
950
951 // Calculate available samples
952 size_t available;
953 if (write_idx >= read_idx) {
954 available = write_idx - read_idx;
955 } else {
956 available = AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
957 }
958
959 // LOCK-FREE: Load jitter buffer state with acquire ordering
960 bool jitter_filled = atomic_load_explicit(&rb->jitter_buffer_filled, memory_order_acquire);
961 int crossfade_remaining = atomic_load_explicit(&rb->crossfade_samples_remaining, memory_order_acquire);
962 bool fade_in = atomic_load_explicit(&rb->crossfade_fade_in, memory_order_acquire);
963
964 // Jitter buffer: don't read until initial fill threshold is reached
965 // (only for playback buffers - capture buffers have jitter_buffer_enabled = false)
966 if (!jitter_filled && rb->jitter_buffer_enabled) {
967 // Check if we've accumulated enough samples to start playback
968 if (available >= AUDIO_JITTER_BUFFER_THRESHOLD) {
969 atomic_store_explicit(&rb->jitter_buffer_filled, true, memory_order_release);
970 atomic_store_explicit(&rb->crossfade_samples_remaining, AUDIO_CROSSFADE_SAMPLES, memory_order_release);
971 atomic_store_explicit(&rb->crossfade_fade_in, true, memory_order_release);
972 log_info("Jitter buffer filled (%zu samples), starting playback with fade-in", available);
973 // Reload state for processing below
974 jitter_filled = true;
975 crossfade_remaining = AUDIO_CROSSFADE_SAMPLES;
976 fade_in = true;
977 } else {
978 // Log buffer fill progress every second
979 log_debug_every(NS_PER_MS_INT, "Jitter buffer filling: %zu/%d samples (%.1f%%)", available,
980 AUDIO_JITTER_BUFFER_THRESHOLD, (100.0f * available) / AUDIO_JITTER_BUFFER_THRESHOLD);
981 return 0; // Return 0 samples - caller will pad with silence
982 }
983 }
984
985 // Periodic buffer health logging (every 5 seconds when healthy)
986 unsigned int underruns = atomic_load_explicit(&rb->underrun_count, memory_order_relaxed);
987 log_dev_every(5 * NS_PER_MS_INT, "Buffer health: %zu/%d samples (%.1f%%), underruns=%u", available,
988 AUDIO_RING_BUFFER_SIZE, (100.0f * available) / AUDIO_RING_BUFFER_SIZE, underruns);
989
990 // Low buffer handling: DON'T pause playback - continue reading what's available
991 // and fill the rest with silence. Pausing causes a feedback loop where:
992 // 1. Underrun -> pause reading -> buffer overflows from incoming samples
993 // 2. Threshold reached -> resume reading -> drains too fast -> underrun again
994 //
995 // Instead: always consume samples to prevent overflow, use silence for missing data
996 if (rb->jitter_buffer_enabled && available < AUDIO_JITTER_LOW_WATER_MARK) {
997 unsigned int underrun_count = atomic_fetch_add_explicit(&rb->underrun_count, 1, memory_order_relaxed) + 1;
998 log_warn_every(LOG_RATE_FAST,
999 "Audio buffer low #%u: only %zu samples available (low water mark: %d), padding with silence",
1000 underrun_count, available, AUDIO_JITTER_LOW_WATER_MARK);
1001 // Don't set jitter_buffer_filled = false - keep reading to prevent overflow
1002 }
1003
1004 size_t to_read = (samples > available) ? available : samples;
1005
1006 // Optimize: copy in chunks instead of one sample at a time
1007 size_t remaining = AUDIO_RING_BUFFER_SIZE - read_idx;
1008
1009 if (to_read <= remaining) {
1010 // Can copy in one chunk
1011 SAFE_MEMCPY(data, to_read * sizeof(float), &rb->data[read_idx], to_read * sizeof(float));
1012 } else {
1013 // Need to wrap around - copy in two chunks
1014 SAFE_MEMCPY(data, remaining * sizeof(float), &rb->data[read_idx], remaining * sizeof(float));
1015 SAFE_MEMCPY(&data[remaining], (to_read - remaining) * sizeof(float), &rb->data[0],
1016 (to_read - remaining) * sizeof(float));
1017 }
1018
1019 // LOCK-FREE: Store new read_index with release ordering
1020 // This ensures all data reads above complete before the index update
1021 unsigned int new_read_idx = (read_idx + (unsigned int)to_read) % AUDIO_RING_BUFFER_SIZE;
1022 atomic_store_explicit(&rb->read_index, new_read_idx, memory_order_release);
1023
1024 // Apply fade-in if recovering from underrun
1025 if (fade_in && crossfade_remaining > 0) {
1026 int fade_start = AUDIO_CROSSFADE_SAMPLES - crossfade_remaining;
1027 size_t fade_samples = (to_read < (size_t)crossfade_remaining) ? to_read : (size_t)crossfade_remaining;
1028
1029 for (size_t i = 0; i < fade_samples; i++) {
1030 float fade_factor = (float)(fade_start + (int)i + 1) / (float)AUDIO_CROSSFADE_SAMPLES;
1031 data[i] *= fade_factor;
1032 }
1033
1034 int new_crossfade_remaining = crossfade_remaining - (int)fade_samples;
1035 atomic_store_explicit(&rb->crossfade_samples_remaining, new_crossfade_remaining, memory_order_release);
1036 if (new_crossfade_remaining <= 0) {
1037 atomic_store_explicit(&rb->crossfade_fade_in, false, memory_order_release);
1038 log_debug("Audio fade-in complete");
1039 }
1040 }
1041
1042 // Save last sample for potential fade-out
1043 // Note: only update if we actually read some data
1044 // This is NOT atomic - only the reader thread writes this
1045 if (to_read > 0) {
1046 rb->last_sample = data[to_read - 1];
1047 }
1048
1049 // Return ACTUAL number of samples read, not padded count
1050 // The caller (mixer) expects truthful return values to detect underruns
1051 // and handle silence padding externally. Internal padding creates double-padding bugs.
1052 //
1053 // This function was incorrectly returning `samples` even when
1054 // it only read `to_read` samples. This broke the mixer's underrun detection.
1055 return to_read;
1056}
1057
1069size_t audio_ring_buffer_peek(audio_ring_buffer_t *rb, float *data, size_t samples) {
1070 if (!rb || !data || samples <= 0) {
1071 return 0;
1072 }
1073
1074 // LOCK-FREE: Load indices with proper memory ordering
1075 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
1076 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
1077
1078 // Calculate available samples
1079 size_t available;
1080 if (write_idx >= read_idx) {
1081 available = write_idx - read_idx;
1082 } else {
1083 available = AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
1084 }
1085
1086 size_t to_peek = (samples > available) ? available : samples;
1087
1088 if (to_peek == 0) {
1089 return 0;
1090 }
1091
1092 // Copy samples in chunks (handle wraparound)
1093 size_t first_chunk = (read_idx + to_peek <= AUDIO_RING_BUFFER_SIZE) ? to_peek : (AUDIO_RING_BUFFER_SIZE - read_idx);
1094
1095 SAFE_MEMCPY(data, first_chunk * sizeof(float), rb->data + read_idx, first_chunk * sizeof(float));
1096
1097 if (first_chunk < to_peek) {
1098 // Wraparound: copy second chunk from beginning of buffer
1099 size_t second_chunk = to_peek - first_chunk;
1100 SAFE_MEMCPY(data + first_chunk, second_chunk * sizeof(float), rb->data, second_chunk * sizeof(float));
1101 }
1102
1103 return to_peek;
1104}
1105
1106size_t audio_ring_buffer_available_read(audio_ring_buffer_t *rb) {
1107 if (!rb)
1108 return 0;
1109
1110 // LOCK-FREE: Load indices with proper memory ordering
1111 // Use acquire for write_index to see writer's updates
1112 // Use relaxed for read_index (our own index)
1113 unsigned int write_idx = atomic_load_explicit(&rb->write_index, memory_order_acquire);
1114 unsigned int read_idx = atomic_load_explicit(&rb->read_index, memory_order_relaxed);
1115
1116 if (write_idx >= read_idx) {
1117 return write_idx - read_idx;
1118 }
1119
1120 return AUDIO_RING_BUFFER_SIZE - read_idx + write_idx;
1121}
1122
1123size_t audio_ring_buffer_available_write(audio_ring_buffer_t *rb) {
1124 if (!rb) {
1125 SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: rb is NULL");
1126 return 0;
1127 }
1128
1129 return AUDIO_RING_BUFFER_SIZE - audio_ring_buffer_available_read(rb) - 1;
1130}
1131
1132asciichat_error_t audio_init(audio_context_t *ctx) {
1133 log_debug("audio_init: starting, ctx=%p", (void *)ctx);
1134 if (!ctx) {
1135 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx is NULL");
1136 }
1137
1138 SAFE_MEMSET(ctx, sizeof(audio_context_t), 0, sizeof(audio_context_t));
1139
1140 if (mutex_init(&ctx->state_mutex) != 0) {
1141 return SET_ERRNO(ERROR_THREAD, "Failed to initialize audio context mutex");
1142 }
1143
1144 // NOTE: PortAudio initialization deferred to audio_start_duplex() where streams are actually opened
1145 // This avoids Pa_Initialize() overhead for contexts that might not start duplex
1146 // and prevents premature ALSA device allocation and memory leaks
1147
1148 // Create capture buffer WITHOUT jitter buffering (PortAudio writes directly from microphone)
1149 ctx->capture_buffer = audio_ring_buffer_create_for_capture();
1150 if (!ctx->capture_buffer) {
1151 mutex_destroy(&ctx->state_mutex);
1152 return SET_ERRNO(ERROR_MEMORY, "Failed to create capture buffer");
1153 }
1154
1155 ctx->playback_buffer = audio_ring_buffer_create();
1156 if (!ctx->playback_buffer) {
1157 audio_ring_buffer_destroy(ctx->capture_buffer);
1158 mutex_destroy(&ctx->state_mutex);
1159 return SET_ERRNO(ERROR_MEMORY, "Failed to create playback buffer");
1160 }
1161
1162 // Create new ring buffers for worker thread architecture
1163 ctx->raw_capture_rb = audio_ring_buffer_create_for_capture();
1164 if (!ctx->raw_capture_rb) {
1165 audio_ring_buffer_destroy(ctx->playback_buffer);
1166 audio_ring_buffer_destroy(ctx->capture_buffer);
1167 mutex_destroy(&ctx->state_mutex);
1168 return SET_ERRNO(ERROR_MEMORY, "Failed to create raw capture buffer");
1169 }
1170
1171 ctx->raw_render_rb = audio_ring_buffer_create_for_capture();
1172 if (!ctx->raw_render_rb) {
1173 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1174 audio_ring_buffer_destroy(ctx->playback_buffer);
1175 audio_ring_buffer_destroy(ctx->capture_buffer);
1176 mutex_destroy(&ctx->state_mutex);
1177 return SET_ERRNO(ERROR_MEMORY, "Failed to create raw render buffer");
1178 }
1179
1180 ctx->processed_playback_rb = audio_ring_buffer_create();
1181 if (!ctx->processed_playback_rb) {
1182 audio_ring_buffer_destroy(ctx->raw_render_rb);
1183 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1184 audio_ring_buffer_destroy(ctx->playback_buffer);
1185 audio_ring_buffer_destroy(ctx->capture_buffer);
1186 mutex_destroy(&ctx->state_mutex);
1187 return SET_ERRNO(ERROR_MEMORY, "Failed to create processed playback buffer");
1188 }
1189
1190 // Initialize worker thread infrastructure
1191 if (mutex_init(&ctx->worker_mutex) != 0) {
1192 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1193 audio_ring_buffer_destroy(ctx->raw_render_rb);
1194 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1195 audio_ring_buffer_destroy(ctx->playback_buffer);
1196 audio_ring_buffer_destroy(ctx->capture_buffer);
1197 audio_release_portaudio();
1198 mutex_destroy(&ctx->state_mutex);
1199 return SET_ERRNO(ERROR_THREAD, "Failed to initialize worker mutex");
1200 }
1201
1202 if (cond_init(&ctx->worker_cond) != 0) {
1203 mutex_destroy(&ctx->worker_mutex);
1204 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1205 audio_ring_buffer_destroy(ctx->raw_render_rb);
1206 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1207 audio_ring_buffer_destroy(ctx->playback_buffer);
1208 audio_ring_buffer_destroy(ctx->capture_buffer);
1209 audio_release_portaudio();
1210 mutex_destroy(&ctx->state_mutex);
1211 return SET_ERRNO(ERROR_THREAD, "Failed to initialize worker condition variable");
1212 }
1213
1214 // Allocate pre-allocated worker buffers (avoid malloc in worker loop)
1215 ctx->worker_capture_batch = SAFE_MALLOC(WORKER_BATCH_SAMPLES * sizeof(float), float *);
1216 if (!ctx->worker_capture_batch) {
1217 cond_destroy(&ctx->worker_cond);
1218 mutex_destroy(&ctx->worker_mutex);
1219 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1220 audio_ring_buffer_destroy(ctx->raw_render_rb);
1221 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1222 audio_ring_buffer_destroy(ctx->playback_buffer);
1223 audio_ring_buffer_destroy(ctx->capture_buffer);
1224 audio_release_portaudio();
1225 mutex_destroy(&ctx->state_mutex);
1226 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate worker capture batch buffer");
1227 }
1228
1229 ctx->worker_render_batch = SAFE_MALLOC(WORKER_BATCH_SAMPLES * sizeof(float), float *);
1230 if (!ctx->worker_render_batch) {
1231 SAFE_FREE(ctx->worker_capture_batch);
1232 cond_destroy(&ctx->worker_cond);
1233 mutex_destroy(&ctx->worker_mutex);
1234 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1235 audio_ring_buffer_destroy(ctx->raw_render_rb);
1236 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1237 audio_ring_buffer_destroy(ctx->playback_buffer);
1238 audio_ring_buffer_destroy(ctx->capture_buffer);
1239 audio_release_portaudio();
1240 mutex_destroy(&ctx->state_mutex);
1241 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate worker render batch buffer");
1242 }
1243
1244 ctx->worker_playback_batch = SAFE_MALLOC(WORKER_BATCH_SAMPLES * sizeof(float), float *);
1245 if (!ctx->worker_playback_batch) {
1246 SAFE_FREE(ctx->worker_render_batch);
1247 SAFE_FREE(ctx->worker_capture_batch);
1248 cond_destroy(&ctx->worker_cond);
1249 mutex_destroy(&ctx->worker_mutex);
1250 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1251 audio_ring_buffer_destroy(ctx->raw_render_rb);
1252 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1253 audio_ring_buffer_destroy(ctx->playback_buffer);
1254 audio_ring_buffer_destroy(ctx->capture_buffer);
1255 audio_release_portaudio();
1256 mutex_destroy(&ctx->state_mutex);
1257 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate worker playback batch buffer");
1258 }
1259
1260 // Initialize worker thread state (thread will be started in audio_start_duplex)
1261 ctx->worker_running = false;
1262 atomic_store(&ctx->worker_should_stop, false);
1263
1264 ctx->initialized = true;
1265 atomic_store(&ctx->shutting_down, false);
1266 log_info("Audio system initialized successfully (worker thread architecture enabled)");
1267 return ASCIICHAT_OK;
1268}
1269
1270void audio_destroy(audio_context_t *ctx) {
1271 if (!ctx) {
1272 return;
1273 }
1274
1275 // Always release PortAudio refcount if it was incremented
1276 // audio_init() calls Pa_Initialize() very early, and if it fails partway through,
1277 // ctx->initialized will be false. But we MUST still call audio_release_portaudio()
1278 // to properly decrement the refcount and allow Pa_Terminate() to be called.
1279 if (ctx->initialized) {
1280
1281 // Stop duplex stream if running (this also stops the worker thread)
1282 if (ctx->running) {
1283 audio_stop_duplex(ctx);
1284 }
1285
1286 // Ensure worker thread is stopped even if streams weren't running
1287 if (ctx->worker_running) {
1288 log_debug("Stopping worker thread during audio_destroy");
1289 atomic_store(&ctx->worker_should_stop, true);
1290 cond_signal(&ctx->worker_cond); // Wake up worker if waiting
1291 asciichat_thread_join(&ctx->worker_thread, NULL);
1292 ctx->worker_running = false;
1293 }
1294
1295 mutex_lock(&ctx->state_mutex);
1296
1297 // Destroy all ring buffers (old + new)
1298 audio_ring_buffer_destroy(ctx->capture_buffer);
1299 audio_ring_buffer_destroy(ctx->playback_buffer);
1300 audio_ring_buffer_destroy(ctx->raw_capture_rb);
1301 audio_ring_buffer_destroy(ctx->raw_render_rb);
1302 audio_ring_buffer_destroy(ctx->processed_playback_rb);
1303 audio_ring_buffer_destroy(ctx->render_buffer); // May be NULL, that's OK
1304
1305 // Free pre-allocated worker buffers
1306 SAFE_FREE(ctx->worker_capture_batch);
1307 SAFE_FREE(ctx->worker_render_batch);
1308 SAFE_FREE(ctx->worker_playback_batch);
1309
1310 // Destroy worker synchronization primitives
1311 cond_destroy(&ctx->worker_cond);
1312 mutex_destroy(&ctx->worker_mutex);
1313
1314 ctx->initialized = false;
1315
1316 mutex_unlock(&ctx->state_mutex);
1317 mutex_destroy(&ctx->state_mutex);
1318
1319 log_debug("Audio system cleanup complete (all resources released)");
1320 } else {
1321 }
1322
1323 // MUST happen for both initialized and non-initialized contexts
1324 // If audio_init() called Pa_Initialize() but failed partway, refcount must be decremented
1325 audio_release_portaudio();
1326}
1327
1328void audio_set_pipeline(audio_context_t *ctx, void *pipeline) {
1329 if (!ctx)
1330 return;
1331 ctx->audio_pipeline = pipeline;
1332}
1333
1334void audio_flush_playback_buffers(audio_context_t *ctx) {
1335 if (!ctx || !ctx->initialized) {
1336 return;
1337 }
1338
1339 if (ctx->playback_buffer) {
1340 audio_ring_buffer_clear(ctx->playback_buffer);
1341 }
1342 if (ctx->processed_playback_rb) {
1343 audio_ring_buffer_clear(ctx->processed_playback_rb);
1344 }
1345 if (ctx->render_buffer) {
1346 audio_ring_buffer_clear(ctx->render_buffer);
1347 }
1348 if (ctx->raw_render_rb) {
1349 audio_ring_buffer_clear(ctx->raw_render_rb);
1350 }
1351}
1352
1353asciichat_error_t audio_start_duplex(audio_context_t *ctx) {
1354 if (!ctx || !ctx->initialized) {
1355 return SET_ERRNO(ERROR_INVALID_STATE, "Audio context not initialized");
1356 }
1357
1358 // Initialize PortAudio here, when we actually need to open streams
1359 // This defers Pa_Initialize() until necessary, avoiding premature ALSA allocation
1360 asciichat_error_t pa_result = audio_ensure_portaudio_initialized();
1361 if (pa_result != ASCIICHAT_OK) {
1362 return pa_result;
1363 }
1364
1365 mutex_lock(&ctx->state_mutex);
1366
1367 // Already running?
1368 if (ctx->duplex_stream || ctx->input_stream || ctx->output_stream) {
1369 mutex_unlock(&ctx->state_mutex);
1370 return ASCIICHAT_OK;
1371 }
1372
1373 // Setup input parameters (skip if playback-only mode)
1374 PaStreamParameters inputParams = {0};
1375 const PaDeviceInfo *inputInfo = NULL;
1376 bool has_input = false;
1377
1378 if (!ctx->playback_only) {
1379 if (GET_OPTION(microphone_index) >= 0) {
1380 inputParams.device = GET_OPTION(microphone_index);
1381 } else {
1382 inputParams.device = Pa_GetDefaultInputDevice();
1383 }
1384
1385 if (inputParams.device == paNoDevice) {
1386 mutex_unlock(&ctx->state_mutex);
1387 return SET_ERRNO(ERROR_AUDIO, "No input device available");
1388 }
1389
1390 inputInfo = Pa_GetDeviceInfo(inputParams.device);
1391 if (!inputInfo) {
1392 mutex_unlock(&ctx->state_mutex);
1393 return SET_ERRNO(ERROR_AUDIO, "Input device info not found");
1394 }
1395
1396 has_input = true;
1397 inputParams.channelCount = AUDIO_CHANNELS;
1398 inputParams.sampleFormat = paFloat32;
1399 inputParams.suggestedLatency = inputInfo->defaultLowInputLatency;
1400 inputParams.hostApiSpecificStreamInfo = NULL;
1401 }
1402
1403 // Setup output parameters
1404 PaStreamParameters outputParams;
1405 const PaDeviceInfo *outputInfo = NULL;
1406 bool has_output = false;
1407
1408 if (GET_OPTION(speakers_index) >= 0) {
1409 outputParams.device = GET_OPTION(speakers_index);
1410 } else {
1411 outputParams.device = Pa_GetDefaultOutputDevice();
1412 }
1413
1414 if (outputParams.device != paNoDevice) {
1415 outputInfo = Pa_GetDeviceInfo(outputParams.device);
1416 if (outputInfo) {
1417 has_output = true;
1418 outputParams.channelCount = AUDIO_CHANNELS;
1419 outputParams.sampleFormat = paFloat32;
1420 outputParams.suggestedLatency = outputInfo->defaultLowOutputLatency;
1421 outputParams.hostApiSpecificStreamInfo = NULL;
1422 } else {
1423 log_warn("Output device info not found for device %d", outputParams.device);
1424 }
1425 }
1426
1427 // Store device rates for diagnostics (only access if device info was retrieved)
1428 ctx->input_device_rate = (has_input && inputInfo) ? inputInfo->defaultSampleRate : 0;
1429 ctx->output_device_rate = (has_output && outputInfo) ? outputInfo->defaultSampleRate : 0;
1430
1431 log_debug("Opening audio:");
1432 if (has_input) {
1433 log_info(" Input: %s (%.0f Hz)", inputInfo->name, inputInfo->defaultSampleRate);
1434 } else if (ctx->playback_only) {
1435 log_debug(" Input: (playback-only mode - no microphone)");
1436 } else {
1437 log_debug(" Input: (none)");
1438 }
1439 if (has_output) {
1440 log_info(" Output: %s (%.0f Hz)", outputInfo->name, outputInfo->defaultSampleRate);
1441 } else {
1442 log_debug(" Output: None (input-only mode - will send audio to server)");
1443 }
1444
1445 // Check if sample rates differ - ALSA full-duplex doesn't handle this well
1446 // If no input or no output, always use separate streams
1447 bool rates_differ = has_input && has_output && (inputInfo->defaultSampleRate != outputInfo->defaultSampleRate);
1448 bool try_separate = rates_differ || !has_input || !has_output;
1449 PaError err = paNoError;
1450
1451 if (!try_separate) {
1452 // Try full-duplex first (preferred - perfect AEC3 timing)
1453 err = Pa_OpenStream(&ctx->duplex_stream, &inputParams, &outputParams, AUDIO_SAMPLE_RATE, AUDIO_FRAMES_PER_BUFFER,
1454 paClipOff, duplex_callback, ctx);
1455
1456 if (err == paNoError) {
1457 err = Pa_StartStream(ctx->duplex_stream);
1458 if (err != paNoError) {
1459 Pa_CloseStream(ctx->duplex_stream);
1460 ctx->duplex_stream = NULL;
1461 log_warn("Full-duplex stream failed to start: %s", Pa_GetErrorText(err));
1462 try_separate = true;
1463 }
1464 } else {
1465 log_warn("Full-duplex stream failed to open: %s", Pa_GetErrorText(err));
1466 try_separate = true;
1467 }
1468 }
1469
1470 if (try_separate) {
1471 // Fall back to separate streams (needed when sample rates differ or input-only/playback-only mode)
1472 if (has_output && has_input) {
1473 log_info("Using separate input/output streams (sample rates differ: %.0f vs %.0f Hz)",
1474 inputInfo->defaultSampleRate, outputInfo->defaultSampleRate);
1475 log_info(" Will resample: buffer at %.0f Hz → output at %.0f Hz", (double)AUDIO_SAMPLE_RATE,
1476 outputInfo->defaultSampleRate);
1477 } else if (has_output) {
1478 log_debug("Using output-only mode (playback-only for mirror/media)");
1479 } else if (has_input) {
1480 log_info("Using input-only mode (no output device available)");
1481 }
1482
1483 // Store the internal sample rate (buffer rate)
1484 ctx->sample_rate = AUDIO_SAMPLE_RATE;
1485
1486 // Create render buffer for AEC3 reference synchronization
1487 ctx->render_buffer = audio_ring_buffer_create_for_capture();
1488 if (!ctx->render_buffer) {
1489 mutex_unlock(&ctx->state_mutex);
1490 return SET_ERRNO(ERROR_MEMORY, "Failed to create render buffer");
1491 }
1492
1493 // Open output stream only if output device exists
1494 bool output_ok = false;
1495 double actual_output_rate = 0;
1496 if (has_output) {
1497 // Try to use AUDIO_SAMPLE_RATE (48kHz) first for best quality and duplex compatibility
1498 // Fall back to native rate if 48kHz not supported
1499 double preferred_rate = AUDIO_SAMPLE_RATE;
1500 double native_rate = outputInfo->defaultSampleRate;
1501
1502 log_debug("Attempting output at %.0f Hz (preferred) vs %.0f Hz (native)", preferred_rate, native_rate);
1503
1504 // Always use output_callback for output streams (both output-only and duplex modes)
1505 // PortAudio will invoke the callback whenever it needs audio data
1506 // The callback reads from media_source (mirror mode) or playback buffers (network mode)
1507 PaStreamCallback *callback = output_callback;
1508
1509 // Try preferred rate first
1510 err = Pa_OpenStream(&ctx->output_stream, NULL, &outputParams, preferred_rate, AUDIO_FRAMES_PER_BUFFER, paClipOff,
1511 callback, ctx);
1512
1513 if (err == paNoError) {
1514 actual_output_rate = preferred_rate;
1515 output_ok = true;
1516 log_info("✓ Output opened at preferred rate: %.0f Hz (matches input - optimal!)", preferred_rate);
1517 } else {
1518 log_warn("Failed to open output at %.0f Hz: %s, trying native rate %.0f Hz", preferred_rate,
1519 Pa_GetErrorText(err), native_rate);
1520
1521 // If first Pa_OpenStream call left a partial stream, clean it up before retrying
1522 if (ctx->output_stream) {
1523 log_debug("Closing partially-opened output stream from failed preferred rate");
1524 Pa_CloseStream(ctx->output_stream);
1525 ctx->output_stream = NULL;
1526 }
1527
1528 // Fall back to native rate (still using blocking mode for output-only)
1529 err = Pa_OpenStream(&ctx->output_stream, NULL, &outputParams, native_rate, AUDIO_FRAMES_PER_BUFFER, paClipOff,
1530 callback, ctx);
1531
1532 if (err == paNoError) {
1533 actual_output_rate = native_rate;
1534 output_ok = true;
1535 log_info("✓ Output opened at native rate: %.0f Hz (will need resampling)", native_rate);
1536 } else {
1537 log_warn("Failed to open output stream at native rate: %s", Pa_GetErrorText(err));
1538 // Clean up if fallback also failed
1539 if (ctx->output_stream) {
1540 log_debug("Closing partially-opened output stream from failed native rate");
1541 Pa_CloseStream(ctx->output_stream);
1542 ctx->output_stream = NULL;
1543 }
1544 }
1545 }
1546
1547 // Store actual output rate for resampling
1548 if (output_ok) {
1549 ctx->output_device_rate = actual_output_rate;
1550 if (actual_output_rate != AUDIO_SAMPLE_RATE) {
1551 log_warn("⚠️ Output rate mismatch: %.0f Hz output vs %.0f Hz input - resampling will be used",
1552 actual_output_rate, (double)AUDIO_SAMPLE_RATE);
1553 }
1554 }
1555 }
1556
1557 // Open input stream only if we have input (skip for playback-only mode)
1558 bool input_ok = !has_input; // If no input, mark as OK (skip)
1559 if (has_input) {
1560 // Use pipeline sample rate (AUDIO_SAMPLE_RATE)
1561 // In input-only mode, we don't need to match output device rate
1562 double input_stream_rate = AUDIO_SAMPLE_RATE;
1563 err = Pa_OpenStream(&ctx->input_stream, &inputParams, NULL, input_stream_rate, AUDIO_FRAMES_PER_BUFFER, paClipOff,
1564 input_callback, ctx);
1565 input_ok = (err == paNoError);
1566
1567 // If input failed, try device 0 as fallback (HDMI on BeaglePlay)
1568 if (!input_ok) {
1569 log_debug("Input failed - trying device 0 as fallback");
1570
1571 // Clean up partial stream from first attempt before retrying
1572 if (ctx->input_stream) {
1573 log_debug("Closing partially-opened input stream from failed primary device");
1574 Pa_CloseStream(ctx->input_stream);
1575 ctx->input_stream = NULL;
1576 }
1577
1578 PaStreamParameters fallback_input_params = inputParams;
1579 fallback_input_params.device = 0;
1580 const PaDeviceInfo *device_0_info = Pa_GetDeviceInfo(0);
1581 if (device_0_info && device_0_info->maxInputChannels > 0) {
1582 err = Pa_OpenStream(&ctx->input_stream, &fallback_input_params, NULL, input_stream_rate,
1583 AUDIO_FRAMES_PER_BUFFER, paClipOff, input_callback, ctx);
1584 if (err == paNoError) {
1585 log_info("Input stream opened on device 0 (fallback from default)");
1586 input_ok = true;
1587 } else {
1588 log_warn("Fallback also failed on device 0: %s", Pa_GetErrorText(err));
1589 // Clean up if fallback also failed
1590 if (ctx->input_stream) {
1591 log_debug("Closing partially-opened input stream from failed fallback device");
1592 Pa_CloseStream(ctx->input_stream);
1593 ctx->input_stream = NULL;
1594 }
1595 }
1596 }
1597 }
1598
1599 if (!input_ok) {
1600 log_warn("Failed to open input stream: %s", Pa_GetErrorText(err));
1601 }
1602 }
1603
1604 // Check if we got at least one stream working
1605 if (!input_ok && !output_ok) {
1606 // Neither stream works - fail completely
1607 audio_ring_buffer_destroy(ctx->render_buffer);
1608 ctx->render_buffer = NULL;
1609 mutex_unlock(&ctx->state_mutex);
1610 return SET_ERRNO(ERROR_AUDIO, "Failed to open both input and output streams");
1611 }
1612
1613 // If output failed but input works, we can still send audio to server
1614 if (!output_ok && input_ok) {
1615 log_info("Output stream unavailable - continuing with input-only (can send audio to server)");
1616 ctx->output_stream = NULL;
1617 }
1618 // If input failed but output works, we can still receive audio from server
1619 if (!input_ok && output_ok) {
1620 log_info("Input stream unavailable - continuing with output-only (can receive audio from server)");
1621 ctx->input_stream = NULL;
1622 }
1623
1624 // Start output stream if it's open
1625 if (ctx->output_stream) {
1626 err = Pa_StartStream(ctx->output_stream);
1627 if (err != paNoError) {
1628 if (ctx->input_stream)
1629 Pa_CloseStream(ctx->input_stream);
1630 Pa_CloseStream(ctx->output_stream);
1631 ctx->input_stream = NULL;
1632 ctx->output_stream = NULL;
1633 audio_ring_buffer_destroy(ctx->render_buffer);
1634 ctx->render_buffer = NULL;
1635 mutex_unlock(&ctx->state_mutex);
1636 return SET_ERRNO(ERROR_AUDIO, "Failed to start output stream: %s", Pa_GetErrorText(err));
1637 }
1638 }
1639
1640 // Start input stream if it's open
1641 if (ctx->input_stream) {
1642 err = Pa_StartStream(ctx->input_stream);
1643 if (err != paNoError) {
1644 if (ctx->output_stream)
1645 Pa_StopStream(ctx->output_stream);
1646 if (ctx->input_stream)
1647 Pa_CloseStream(ctx->input_stream);
1648 if (ctx->output_stream)
1649 Pa_CloseStream(ctx->output_stream);
1650 ctx->input_stream = NULL;
1651 ctx->output_stream = NULL;
1652 audio_ring_buffer_destroy(ctx->render_buffer);
1653 ctx->render_buffer = NULL;
1654 mutex_unlock(&ctx->state_mutex);
1655 return SET_ERRNO(ERROR_AUDIO, "Failed to start input stream: %s", Pa_GetErrorText(err));
1656 }
1657 }
1658
1659 ctx->separate_streams = true;
1660 log_debug("Separate streams started successfully");
1661 } else {
1662 ctx->separate_streams = false;
1663 log_info("Full-duplex stream started (single callback, perfect AEC3 timing)");
1664 }
1665
1667
1668 // Start worker thread for heavy audio processing
1669 if (!ctx->worker_running) {
1670 atomic_store(&ctx->worker_should_stop, false);
1671 if (asciichat_thread_create(&ctx->worker_thread, audio_worker_thread, ctx) != 0) {
1672 // Failed to create worker thread - stop streams and cleanup
1673 if (ctx->duplex_stream) {
1674 Pa_StopStream(ctx->duplex_stream);
1675 Pa_CloseStream(ctx->duplex_stream);
1676 ctx->duplex_stream = NULL;
1677 }
1678 if (ctx->input_stream) {
1679 Pa_StopStream(ctx->input_stream);
1680 Pa_CloseStream(ctx->input_stream);
1681 ctx->input_stream = NULL;
1682 }
1683 if (ctx->output_stream) {
1684 Pa_StopStream(ctx->output_stream);
1685 Pa_CloseStream(ctx->output_stream);
1686 ctx->output_stream = NULL;
1687 }
1688 audio_ring_buffer_destroy(ctx->render_buffer);
1689 ctx->render_buffer = NULL;
1690 mutex_unlock(&ctx->state_mutex);
1691 return SET_ERRNO(ERROR_THREAD, "Failed to create worker thread");
1692 }
1693 ctx->worker_running = true;
1694 log_debug("Worker thread started successfully");
1695 }
1696
1697 ctx->running = true;
1698 ctx->sample_rate = AUDIO_SAMPLE_RATE;
1699 mutex_unlock(&ctx->state_mutex);
1700 return ASCIICHAT_OK;
1701}
1702
1703asciichat_error_t audio_stop_duplex(audio_context_t *ctx) {
1704 if (!ctx || !ctx->initialized) {
1705 return SET_ERRNO(ERROR_INVALID_STATE, "Audio context not initialized");
1706 }
1707
1708 atomic_store(&ctx->shutting_down, true);
1709
1710 // Stop worker thread before stopping streams
1711 if (ctx->worker_running) {
1712 log_debug("Stopping worker thread");
1713 atomic_store(&ctx->worker_should_stop, true);
1714 cond_signal(&ctx->worker_cond); // Wake up worker if waiting
1715 asciichat_thread_join(&ctx->worker_thread, NULL);
1716 ctx->worker_running = false;
1717 log_debug("Worker thread stopped successfully");
1718 }
1719
1720 if (ctx->playback_buffer) {
1721 audio_ring_buffer_clear(ctx->playback_buffer);
1722 }
1723
1724 Pa_Sleep(50); // Let callbacks drain
1725
1726 mutex_lock(&ctx->state_mutex);
1727
1728 if (ctx->duplex_stream) {
1729 log_debug("Stopping duplex stream");
1730 PaError err = Pa_StopStream(ctx->duplex_stream);
1731 if (err != paNoError) {
1732 log_warn("Pa_StopStream failed: %s", Pa_GetErrorText(err));
1733 }
1734 log_debug("Closing duplex stream");
1735 err = Pa_CloseStream(ctx->duplex_stream);
1736 if (err != paNoError) {
1737 log_warn("Pa_CloseStream failed: %s", Pa_GetErrorText(err));
1738 } else {
1739 log_debug("Duplex stream closed successfully");
1740 }
1741 ctx->duplex_stream = NULL;
1742 }
1743
1744 // Stop separate streams if used
1745 if (ctx->input_stream) {
1746 log_debug("Stopping input stream");
1747 PaError err = Pa_StopStream(ctx->input_stream);
1748 if (err != paNoError) {
1749 log_warn("Pa_StopStream input failed: %s", Pa_GetErrorText(err));
1750 }
1751 log_debug("Closing input stream");
1752 err = Pa_CloseStream(ctx->input_stream);
1753 if (err != paNoError) {
1754 log_warn("Pa_CloseStream input failed: %s", Pa_GetErrorText(err));
1755 } else {
1756 log_debug("Input stream closed successfully");
1757 }
1758 ctx->input_stream = NULL;
1759 }
1760
1761 if (ctx->output_stream) {
1762 log_debug("Stopping output stream");
1763 PaError err = Pa_StopStream(ctx->output_stream);
1764 if (err != paNoError) {
1765 log_warn("Pa_StopStream output failed: %s", Pa_GetErrorText(err));
1766 }
1767 log_debug("Closing output stream");
1768 err = Pa_CloseStream(ctx->output_stream);
1769 if (err != paNoError) {
1770 log_warn("Pa_CloseStream output failed: %s", Pa_GetErrorText(err));
1771 } else {
1772 log_debug("Output stream closed successfully");
1773 }
1774 ctx->output_stream = NULL;
1775 }
1776
1777 // Cleanup render buffer
1778 if (ctx->render_buffer) {
1779 audio_ring_buffer_destroy(ctx->render_buffer);
1780 ctx->render_buffer = NULL;
1781 }
1782
1783 ctx->running = false;
1784 ctx->separate_streams = false;
1785 mutex_unlock(&ctx->state_mutex);
1786
1787 log_debug("Audio stopped");
1788 return ASCIICHAT_OK;
1789}
1790
1791asciichat_error_t audio_read_samples(audio_context_t *ctx, float *buffer, int num_samples) {
1792 if (!ctx || !ctx->initialized || !buffer || num_samples <= 0) {
1793 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx=%p, buffer=%p, num_samples=%d", ctx, buffer,
1794 num_samples);
1795 }
1796
1797 // audio_ring_buffer_read now returns number of samples read, not error code
1798 int samples_read = audio_ring_buffer_read(ctx->capture_buffer, buffer, num_samples);
1799 return (samples_read >= 0) ? ASCIICHAT_OK : ERROR_AUDIO;
1800}
1801
1802asciichat_error_t audio_write_samples(audio_context_t *ctx, const float *buffer, int num_samples) {
1803 if (!ctx || !ctx->initialized || !buffer || num_samples <= 0) {
1804 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters: ctx=%p, buffer=%p, num_samples=%d", ctx, buffer,
1805 num_samples);
1806 }
1807
1808 // Don't accept new audio data during shutdown - this prevents garbage/beeps
1809 if (atomic_load(&ctx->shutting_down)) {
1810 return ASCIICHAT_OK; // Silently discard
1811 }
1812
1813 asciichat_error_t result = audio_ring_buffer_write(ctx->playback_buffer, buffer, num_samples);
1814
1815 return result;
1816}
1817
1818// Internal helper to list audio devices (input or output)
1819static asciichat_error_t audio_list_devices_internal(audio_device_info_t **out_devices, unsigned int *out_count,
1820 bool list_inputs) {
1821 if (!out_devices || !out_count) {
1822 return SET_ERRNO(ERROR_INVALID_PARAM, "audio_list_devices: invalid parameters");
1823 }
1824
1825 *out_devices = NULL;
1826 *out_count = 0;
1827
1828 // Ensure PortAudio is initialized (centralized initialization)
1829 asciichat_error_t pa_result = audio_ensure_portaudio_initialized();
1830 if (pa_result != ASCIICHAT_OK) {
1831 return pa_result;
1832 }
1833
1834 int num_devices = Pa_GetDeviceCount();
1835 if (num_devices < 0) {
1836 audio_release_portaudio();
1837 return SET_ERRNO(ERROR_AUDIO, "Failed to get device count: %s", Pa_GetErrorText(num_devices));
1838 }
1839
1840 if (num_devices == 0) {
1841 audio_release_portaudio();
1842 return ASCIICHAT_OK; // No devices found
1843 }
1844
1845 // Get default device indices
1846 PaDeviceIndex default_input = Pa_GetDefaultInputDevice();
1847 PaDeviceIndex default_output = Pa_GetDefaultOutputDevice();
1848
1849 // First pass: count matching devices
1850 unsigned int device_count = 0;
1851 for (int i = 0; i < num_devices; i++) {
1852 const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
1853 if (info) {
1854 bool matches = list_inputs ? (info->maxInputChannels > 0) : (info->maxOutputChannels > 0);
1855 if (matches) {
1856 device_count++;
1857 }
1858 }
1859 }
1860
1861 if (device_count == 0) {
1862 audio_release_portaudio();
1863 return ASCIICHAT_OK; // No matching devices
1864 }
1865
1866 // Allocate device array
1867 audio_device_info_t *devices = SAFE_CALLOC(device_count, sizeof(audio_device_info_t), audio_device_info_t *);
1868 if (!devices) {
1869 audio_release_portaudio();
1870 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate audio device info array");
1871 }
1872
1873 // Second pass: populate device info
1874 unsigned int idx = 0;
1875 for (int i = 0; i < num_devices && idx < device_count; i++) {
1876 const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
1877 if (!info)
1878 continue;
1879
1880 bool match = list_inputs ? (info->maxInputChannels > 0) : (info->maxOutputChannels > 0);
1881 if (!match)
1882 continue;
1883
1884 devices[idx].index = i;
1885 if (info->name) {
1886 SAFE_STRNCPY(devices[idx].name, info->name, AUDIO_DEVICE_NAME_MAX);
1887 } else {
1888 SAFE_STRNCPY(devices[idx].name, "<Unknown>", AUDIO_DEVICE_NAME_MAX);
1889 }
1890 devices[idx].max_input_channels = info->maxInputChannels;
1891 devices[idx].max_output_channels = info->maxOutputChannels;
1892 devices[idx].default_sample_rate = info->defaultSampleRate;
1893 devices[idx].is_default_input = (i == default_input);
1894 devices[idx].is_default_output = (i == default_output);
1895 idx++;
1896 }
1897
1898 // Release PortAudio (centralized refcount management)
1899 audio_release_portaudio();
1900
1901 *out_devices = devices;
1902 *out_count = idx;
1903 return ASCIICHAT_OK;
1904}
1905
1906asciichat_error_t audio_list_input_devices(audio_device_info_t **out_devices, unsigned int *out_count) {
1907 return audio_list_devices_internal(out_devices, out_count, true);
1908}
1909
1910asciichat_error_t audio_list_output_devices(audio_device_info_t **out_devices, unsigned int *out_count) {
1911 return audio_list_devices_internal(out_devices, out_count, false);
1912}
1913
1914void audio_free_device_list(audio_device_info_t *devices) {
1915 SAFE_FREE(devices);
1916}
1917
1918asciichat_error_t audio_dequantize_samples(const uint8_t *samples_ptr, uint32_t total_samples, float *out_samples) {
1919 if (!samples_ptr || !out_samples || total_samples == 0) {
1920 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters for audio dequantization");
1921 }
1922
1923 for (uint32_t i = 0; i < total_samples; i++) {
1924 uint32_t network_sample;
1925 // Use memcpy to safely handle potential misalignment from packet header
1926 memcpy(&network_sample, samples_ptr + i * sizeof(uint32_t), sizeof(uint32_t));
1927 int32_t scaled = (int32_t)NET_TO_HOST_U32(network_sample);
1928 out_samples[i] = (float)scaled / 2147483647.0f;
1929 }
1930
1931 return ASCIICHAT_OK;
1932}
1933
1934asciichat_error_t audio_set_realtime_priority(void) {
1935 // Delegate to platform abstraction layer
1936 asciichat_error_t result = asciichat_thread_set_realtime_priority();
1937 if (result == ASCIICHAT_OK) {
1938 log_debug("✓ Audio thread real-time priority set successfully");
1939 }
1940 return result;
1941}
1942
1943/* ============================================================================
1944 * Audio Batch Packet Parsing
1945 * ============================================================================
1946 */
1947
1948asciichat_error_t audio_parse_batch_header(const void *data, size_t len, audio_batch_info_t *out_batch) {
1949 if (!data) {
1950 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch header data pointer is NULL");
1951 }
1952
1953 if (!out_batch) {
1954 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch info output pointer is NULL");
1955 }
1956
1957 if (len < sizeof(audio_batch_packet_t)) {
1958 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch header too small (len=%zu, expected=%zu)", len,
1959 sizeof(audio_batch_packet_t));
1960 }
1961
1962 const audio_batch_packet_t *batch_header = (const audio_batch_packet_t *)data;
1963
1964 // Unpack network byte order values to host byte order
1965 out_batch->batch_count = ntohl(batch_header->batch_count);
1966 out_batch->total_samples = ntohl(batch_header->total_samples);
1967 out_batch->sample_rate = ntohl(batch_header->sample_rate);
1968 out_batch->channels = ntohl(batch_header->channels);
1969
1970 return ASCIICHAT_OK;
1971}
1972
1973asciichat_error_t audio_validate_batch_params(const audio_batch_info_t *batch) {
1974 if (!batch) {
1975 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch info pointer is NULL");
1976 }
1977
1978 // Validate batch_count
1979 if (batch->batch_count == 0) {
1980 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch count cannot be zero");
1981 }
1982
1983 // Check for reasonable max (256 frames per batch is very generous)
1984 if (batch->batch_count > 256) {
1985 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch count too large (batch_count=%u, max=256)", batch->batch_count);
1986 }
1987
1988 // Validate channels (1=mono, 2=stereo, max 8 for multi-channel)
1989 if (batch->channels == 0 || batch->channels > 8) {
1990 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid channel count (channels=%u, valid=1-8)", batch->channels);
1991 }
1992
1993 // Validate sample rate
1995 return SET_ERRNO(ERROR_INVALID_PARAM, "Unsupported sample rate (sample_rate=%u)", batch->sample_rate);
1996 }
1997
1998 // Check for reasonable sample counts
1999 if (batch->total_samples == 0) {
2000 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch has zero samples");
2001 }
2002
2003 // Each batch typically has samples_per_frame worth of samples
2004 // For 48kHz at 20ms per frame: 48000 * 0.02 = 960 samples per frame
2005 // With max 256 frames, that's up to ~245k samples per batch
2006 if (batch->total_samples > 1000000) {
2007 return SET_ERRNO(ERROR_INVALID_PARAM, "Audio batch sample count suspiciously large (total_samples=%u)",
2008 batch->total_samples);
2009 }
2010
2011 return ASCIICHAT_OK;
2012}
2013
2014bool audio_is_supported_sample_rate(uint32_t sample_rate) {
2015 // List of commonly supported audio sample rates
2016 static const uint32_t supported_rates[] = {
2017 8000, // Telephone quality
2018 16000, // Wideband telephony
2019 24000, // High quality speech
2020 32000, // Good for video
2021 44100, // CD quality (less common in VoIP)
2022 48000, // Standard professional
2023 96000, // High-end professional
2024 192000, // Ultra-high-end mastering
2025 };
2026
2027 const size_t rate_count = sizeof(supported_rates) / sizeof(supported_rates[0]);
2028 for (size_t i = 0; i < rate_count; i++) {
2029 if (sample_rate == supported_rates[i]) {
2030 return true;
2031 }
2032 }
2033
2034 return false;
2035}
2036
2037bool audio_should_enable_microphone(audio_source_t source, bool has_media_audio) {
2038 switch (source) {
2039 case AUDIO_SOURCE_AUTO:
2040 return !has_media_audio;
2041
2042 case AUDIO_SOURCE_MIC:
2043 return true;
2044
2045 case AUDIO_SOURCE_MEDIA:
2046 return false;
2047
2048 case AUDIO_SOURCE_BOTH:
2049 return true;
2050
2051 default:
2052 return !has_media_audio;
2053 }
2054}
void buffer_pool_free(buffer_pool_t *pool, void *data, size_t size)
void * buffer_pool_alloc(buffer_pool_t *pool, size_t size)
Definition buffer_pool.c:99
void client_audio_pipeline_process_duplex(client_audio_pipeline_t *pipeline, const float *render_samples, int render_count, const float *capture_samples, int capture_count, float *processed_output)
size_t audio_ring_buffer_read(audio_ring_buffer_t *rb, float *data, size_t samples)
#define WORKER_BATCH_FRAMES
void audio_free_device_list(audio_device_info_t *devices)
asciichat_error_t audio_ring_buffer_write(audio_ring_buffer_t *rb, const float *data, int samples)
void audio_ring_buffer_destroy(audio_ring_buffer_t *rb)
asciichat_error_t audio_init(audio_context_t *ctx)
asciichat_error_t audio_set_realtime_priority(void)
audio_ring_buffer_t * audio_ring_buffer_create(void)
bool audio_is_supported_sample_rate(uint32_t sample_rate)
size_t audio_ring_buffer_available_read(audio_ring_buffer_t *rb)
asciichat_error_t audio_start_duplex(audio_context_t *ctx)
asciichat_error_t audio_parse_batch_header(const void *data, size_t len, audio_batch_info_t *out_batch)
void audio_destroy(audio_context_t *ctx)
asciichat_error_t audio_stop_duplex(audio_context_t *ctx)
asciichat_error_t audio_write_samples(audio_context_t *ctx, const float *buffer, int num_samples)
void audio_terminate_portaudio_final(void)
Terminate PortAudio and free all device resources.
#define WORKER_BATCH_SAMPLES
void audio_flush_playback_buffers(audio_context_t *ctx)
void audio_set_pipeline(audio_context_t *ctx, void *pipeline)
audio_ring_buffer_t * audio_ring_buffer_create_for_capture(void)
#define WORKER_TIMEOUT_MS
size_t audio_ring_buffer_peek(audio_ring_buffer_t *rb, float *data, size_t samples)
Peek at available samples without consuming them (for AEC3 render signal)
asciichat_error_t audio_validate_batch_params(const audio_batch_info_t *batch)
asciichat_error_t audio_list_input_devices(audio_device_info_t **out_devices, unsigned int *out_count)
asciichat_error_t audio_dequantize_samples(const uint8_t *samples_ptr, uint32_t total_samples, float *out_samples)
bool audio_should_enable_microphone(audio_source_t source, bool has_media_audio)
asciichat_error_t audio_list_output_devices(audio_device_info_t **out_devices, unsigned int *out_count)
void audio_ring_buffer_clear(audio_ring_buffer_t *rb)
asciichat_error_t audio_read_samples(audio_context_t *ctx, float *buffer, int num_samples)
void resample_linear(const float *src, size_t src_samples, float *dst, size_t dst_samples, double src_rate, double dst_rate)
size_t audio_ring_buffer_available_write(audio_ring_buffer_t *rb)
struct PaStreamCallbackTimeInfo PaStreamCallbackTimeInfo
Definition portaudio.h:16
#define paNoError
Definition portaudio.h:19
int PaError
Definition portaudio.h:14
unsigned long PaStreamCallbackFlags
Definition portaudio.h:15
size_t media_source_read_audio(media_source_t *source, float *buffer, size_t num_samples)
Definition source.c:526
int mutex_init(mutex_t *mutex)
Definition threading.c:16
int asciichat_thread_create(asciichat_thread_t *thread, void *(*start_routine)(void *), void *arg)
Definition threading.c:42
int asciichat_thread_join(asciichat_thread_t *thread, void **retval)
Definition threading.c:46
int mutex_destroy(mutex_t *mutex)
Definition threading.c:21
uint64_t time_get_ns(void)
Definition util/time.c:48
int format_duration_ns(double nanoseconds, char *buffer, size_t buffer_size)
Definition util/time.c:275
uint64_t time_elapsed_ns(uint64_t start_ns, uint64_t end_ns)
Definition util/time.c:90
const char * platform_getenv(const char *name)
Definition wasm/system.c:13