115 SAFE_MEMSET(&g_sent_stats,
sizeof(g_sent_stats), 0,
sizeof(g_sent_stats));
116 SAFE_MEMSET(&g_received_stats,
sizeof(g_received_stats), 0,
sizeof(g_received_stats));
119 SAFE_MEMSET(g_received_gap_intervals_ms,
sizeof(g_received_gap_intervals_ms), 0,
sizeof(g_received_gap_intervals_ms));
120 g_received_gap_count = 0;
121 g_received_silence_start_sample = 0;
122 g_received_last_silence_end_sample = 0;
123 SAFE_MEMSET(g_received_packet_times_ns,
sizeof(g_received_packet_times_ns), 0,
sizeof(g_received_packet_times_ns));
124 g_received_packet_times_count = 0;
125 SAFE_MEMSET(g_received_packet_sizes,
sizeof(g_received_packet_sizes), 0,
sizeof(g_received_packet_sizes));
126 g_received_total_audio_samples = 0;
129 SAFE_MEMSET(g_echo_buffer,
sizeof(g_echo_buffer), 0,
sizeof(g_echo_buffer));
130 g_echo_buffer_pos = 0;
131 g_echo_correlation_sample_count = 0;
133 g_echo_correlation_strength[i] = 0;
134 g_echo_match_count[i] = 0;
136 g_detected_echo_delay_ms = 0;
139 SAFE_MEMSET(g_received_beep_window,
sizeof(g_received_beep_window), 0,
sizeof(g_received_beep_window));
140 g_received_beep_window_idx = 0;
141 g_received_beep_events = 0;
142 g_received_tonal_samples = 0;
143 g_in_beep_burst =
false;
144 g_beep_burst_samples = 0;
146 int64_t now_us = (int64_t)time_ns_to_us(
time_get_ns());
148 g_sent_stats.timestamp_start_ns = now_us;
149 g_received_stats.timestamp_start_ns = now_us;
151 g_sent_last_sample = 0.0f;
152 g_received_last_sample = 0.0f;
153 g_sent_last_packet_time_us = now_us;
154 g_received_last_packet_time_us = now_us;
161 log_info(
"Dumping sent audio to /tmp/sent_audio.wav");
163 if (g_received_wav) {
164 log_info(
"Dumping received audio to /tmp/received_audio.wav");
168 g_analysis_enabled =
true;
169 log_info(
"Audio analysis enabled");
273 if (!g_analysis_enabled)
276 g_received_stats.total_samples++;
279 float abs_sample = fabsf(sample);
280 if (abs_sample > g_received_stats.peak_level) {
281 g_received_stats.peak_level = abs_sample;
285 if (abs_sample > 1.0f) {
286 g_received_stats.clipping_count++;
287 g_received_clipping_samples++;
291 float amp_change = fabsf(sample - g_received_last_sample);
292 if (amp_change > 0.3f) {
293 g_received_sharp_transitions++;
295 g_received_transition_samples++;
298 g_received_mean += sample;
304 static float s_received_prev_sample_for_zero_crossing = 0.0f;
305 if ((s_received_prev_sample_for_zero_crossing > 0 && sample < 0) ||
306 (s_received_prev_sample_for_zero_crossing < 0 && sample > 0)) {
307 g_received_zero_crossings++;
309 s_received_prev_sample_for_zero_crossing = sample;
312 if (abs_sample < 0.001f) {
313 g_received_stats.silent_samples++;
314 g_received_silence_burst++;
315 g_received_below_noise_floor++;
318 if (g_received_silence_burst == 1) {
319 g_received_silence_start_sample = g_received_stats.total_samples;
323 if (g_received_silence_burst > 0) {
325 if (g_received_last_silence_end_sample > 0) {
326 uint64_t samples_between = g_received_silence_start_sample - g_received_last_silence_end_sample;
327 uint32_t ms_between = (uint32_t)(samples_between * 1000 / 48000);
331 g_received_gap_intervals_ms[g_received_gap_count++] = ms_between;
335 g_received_last_silence_end_sample = g_received_stats.total_samples;
338 if (g_received_silence_burst > g_received_max_silence_burst) {
339 g_received_max_silence_burst = g_received_silence_burst;
342 g_received_silence_burst = 0;
346 if (abs_sample < 0.05f) {
347 g_received_low_energy_samples++;
351 float delta = fabsf(sample - g_received_last_sample);
353 g_received_stats.jitter_count++;
355 g_received_last_sample = sample;
358 g_received_rms_accumulator += sample * sample;
359 g_received_rms_sample_count++;
363 if (g_echo_correlation_sample_count < 500000) {
366 uint32_t delay_samples = (g_echo_delays_ms[delay_idx] * 48000) / 1000;
370 if (g_echo_buffer_pos >= delay_samples) {
371 sent_pos = g_echo_buffer_pos - delay_samples;
376 float sent_sample = g_echo_buffer[sent_pos];
379 float diff = fabsf(sample - sent_sample);
380 if (diff < 0.1f && fabsf(sent_sample) > 0.01f) {
381 g_echo_match_count[delay_idx]++;
382 g_echo_correlation_strength[delay_idx] += (0.1f - diff);
385 g_echo_correlation_sample_count++;
390 g_received_beep_window[g_received_beep_window_idx] = sample;
391 g_received_beep_window_idx = (g_received_beep_window_idx + 1) %
BEEP_WINDOW_SIZE;
394 if (g_received_beep_window_idx == 0 && g_received_stats.total_samples >
BEEP_WINDOW_SIZE) {
396 int zero_crossings = 0;
397 float min_amp = 1.0f, max_amp = 0.0f;
398 float sum_amp = 0.0f;
399 float prev = g_received_beep_window[0];
402 float curr = g_received_beep_window[i];
403 float abs_curr = fabsf(curr);
406 if (abs_curr > max_amp)
408 if (abs_curr < min_amp)
413 if ((prev > 0 && curr < 0) || (prev < 0 && curr > 0)) {
420 float amp_range = max_amp - min_amp;
426 bool is_tonal = (zero_crossings >= 5 && zero_crossings <= 100) &&
428 (amp_range < avg_amp * 1.5f);
433 if (!g_in_beep_burst) {
435 g_in_beep_burst =
true;
441 if (g_in_beep_burst) {
445 if (g_beep_burst_samples > 0 && g_beep_burst_samples < 24000) {
446 g_received_beep_events++;
447 g_received_stats.beep_events = g_received_beep_events;
449 g_in_beep_burst =
false;
450 g_beep_burst_samples = 0;
454 g_received_stats.tonal_samples = g_received_tonal_samples;
458 if (g_received_wav) {
514 if (!g_analysis_enabled) {
518 int64_t now_us = (int64_t)time_ns_to_us(
time_get_ns());
520 g_sent_stats.timestamp_end_ns = now_us;
521 g_received_stats.timestamp_end_ns = now_us;
523 int64_t sent_duration_ms = (g_sent_stats.timestamp_end_ns - g_sent_stats.timestamp_start_ns) / NS_PER_MS_INT;
524 int64_t recv_duration_ms = (g_received_stats.timestamp_end_ns - g_received_stats.timestamp_start_ns) / NS_PER_MS_INT;
527 float sent_rms = 0.0f;
528 float recv_rms = 0.0f;
529 if (g_sent_rms_sample_count > 0) {
530 sent_rms = sqrtf(g_sent_rms_accumulator / g_sent_rms_sample_count);
532 if (g_received_rms_sample_count > 0) {
533 recv_rms = sqrtf(g_received_rms_accumulator / g_received_rms_sample_count);
536 log_plain(
"================================================================================");
537 log_plain(
" AUDIO ANALYSIS REPORT ");
538 log_plain(
"================================================================================");
539 log_plain(
"SENT AUDIO (Microphone Capture):");
540 log_plain(
" Duration: %lld ms", (
long long)sent_duration_ms);
541 log_plain(
" Total Samples: %llu", (
unsigned long long)g_sent_stats.total_samples);
542 log_plain(
" Peak Level: %.4f (should be < 1.0)", g_sent_stats.peak_level);
543 log_plain(
" RMS Level: %.4f (audio energy/loudness)", sent_rms);
544 log_plain(
" Clipping Events: %llu samples (%.2f%%)", (
unsigned long long)g_sent_stats.clipping_count,
545 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.clipping_count / g_sent_stats.total_samples) : 0);
546 log_plain(
" Silent Samples: %llu samples (%.2f%%)", (
unsigned long long)g_sent_stats.silent_samples,
547 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.silent_samples / g_sent_stats.total_samples) : 0);
548 if (g_sent_max_silence_burst > 0) {
549 log_plain(
" Max Silence Burst: %llu samples", (
unsigned long long)g_sent_max_silence_burst);
551 log_plain(
" Packets Sent: %u", g_sent_stats.packets_count);
552 log_plain(
" Status: %s", g_sent_stats.clipping_count > 0 ?
"CLIPPING DETECTED!" :
"OK");
554 log_plain(
"RECEIVED AUDIO (Playback):");
555 log_plain(
" Duration: %lld ms", (
long long)recv_duration_ms);
556 log_plain(
" Total Samples: %llu", (
unsigned long long)g_received_stats.total_samples);
557 log_plain(
" Peak Level: %.4f", g_received_stats.peak_level);
558 log_plain(
" RMS Level: %.4f (audio energy/loudness)", recv_rms);
559 log_plain(
" Clipping Events: %llu samples (%.2f%%)", (
unsigned long long)g_received_stats.clipping_count,
560 g_received_stats.total_samples > 0
561 ? (100.0 * g_received_stats.clipping_count / g_received_stats.total_samples)
563 log_plain(
" Silent Samples: %llu samples (%.2f%%)", (
unsigned long long)g_received_stats.silent_samples,
564 g_received_stats.total_samples > 0
565 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
567 if (g_received_max_silence_burst > 0) {
568 log_plain(
" Max Silence Burst: %llu samples", (
unsigned long long)g_received_max_silence_burst);
570 double low_energy_pct =
571 g_received_stats.total_samples > 0 ? (100.0 * g_received_low_energy_samples / g_received_stats.total_samples) : 0;
572 log_plain(
" Very Quiet Samples: %llu samples (%.1f%%) [amplitude < 0.05]",
573 (
unsigned long long)g_received_low_energy_samples, low_energy_pct);
574 log_plain(
" Packets Received: %u", g_received_stats.packets_count);
575 log_plain(
" Status: %s", g_received_stats.total_samples == 0 ?
"NO AUDIO RECEIVED!" :
"Receiving");
577 log_plain(
"QUALITY METRICS (Scratchy/Distorted Audio Detection):");
579 log_plain(
" Jitter Events: %llu (rapid amplitude changes)", (
unsigned long long)g_sent_stats.jitter_count);
580 log_plain(
" Discontinuities: %llu (packet arrival gaps > 100ms)",
581 (
unsigned long long)g_sent_stats.discontinuity_count);
582 log_plain(
" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_sent_stats.max_gap_ns);
584 log_plain(
"RECEIVED:");
585 log_plain(
" Jitter Events: %llu (rapid amplitude changes)",
586 (
unsigned long long)g_received_stats.jitter_count);
587 log_plain(
" Discontinuities: %llu (packet arrival gaps > 100ms)",
588 (
unsigned long long)g_received_stats.discontinuity_count);
589 log_plain(
" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_received_stats.max_gap_ns);
592 if (g_received_beep_events > 0 || g_received_tonal_samples > 0) {
594 g_received_stats.total_samples > 0 ? (100.0 * g_received_tonal_samples / g_received_stats.total_samples) : 0;
595 log_plain(
"BEEP/TONE ARTIFACTS:");
596 log_plain(
" Beep Events: %llu (short tonal bursts < 500ms)",
597 (
unsigned long long)g_received_beep_events);
598 log_plain(
" Tonal Samples: %llu samples (%.1f%%) [consistent frequency content]",
599 (
unsigned long long)g_received_tonal_samples, tonal_pct);
601 if (g_received_beep_events > 10) {
602 log_plain(
" 🔴 BEEPING DETECTED: %llu short tonal bursts - likely codec artifacts or system sounds!",
603 (
unsigned long long)g_received_beep_events);
604 log_plain(
" Possible causes:");
605 log_plain(
" - Opus codec producing tonal artifacts during silence/transitions");
606 log_plain(
" - Buffer underruns creating synthetic tones");
607 log_plain(
" - AEC3 suppressor resonance");
608 log_plain(
" - System notification sounds bleeding through");
609 }
else if (g_received_beep_events > 3) {
610 log_plain(
" ⚠️ Some beep artifacts detected (%llu events)", (
unsigned long long)g_received_beep_events);
614 log_plain(
"DIAGNOSTICS:");
615 if (g_sent_stats.peak_level == 0) {
616 log_plain(
" No audio captured from microphone!");
618 if (g_received_stats.total_samples == 0) {
619 log_plain(
" No audio received from server!");
620 }
else if (g_received_stats.peak_level < 0.01f) {
621 log_plain(
" ⚠️ Received audio is very quiet (peak < 0.01)");
623 if (g_sent_stats.clipping_count > 0) {
624 log_plain(
" Microphone input is clipping - reduce microphone volume");
628 log_plain(
"ECHO DETECTION (Echo Cancellation Quality Check):");
629 if (g_echo_correlation_sample_count > 0 && g_sent_stats.total_samples > 0) {
630 uint64_t max_matches = 0;
631 int best_delay_idx = -1;
635 if (g_echo_match_count[i] > max_matches) {
636 max_matches = g_echo_match_count[i];
641 double echo_threshold_pct = 5.0;
643 if (best_delay_idx >= 0) {
644 double match_pct = (100.0 * g_echo_match_count[best_delay_idx]) / g_echo_correlation_sample_count;
645 log_plain(
" Echo correlation at different delays:");
647 double pct = (100.0 * g_echo_match_count[i]) / g_echo_correlation_sample_count;
648 const char *status = pct > echo_threshold_pct ?
"⚠️ ECHO DETECTED" :
"✓ OK";
649 log_plain(
" %3u ms delay: %.1f%% match rate %s", g_echo_delays_ms[i], pct, status);
652 if (match_pct > echo_threshold_pct) {
653 g_detected_echo_delay_ms = g_echo_delays_ms[best_delay_idx];
654 log_plain(
" 🔴 ECHO CANCELLATION NOT WORKING: Strong echo at %u ms delay!", g_detected_echo_delay_ms);
655 log_plain(
" Received audio contains %.1f%% samples matching sent audio from %u ms ago", match_pct,
656 g_detected_echo_delay_ms);
658 log_plain(
" ✓ Echo cancellation working: No significant echo detected");
662 log_plain(
" Insufficient data for echo detection (need both sent and received audio)");
666 if (g_aec3_metrics_available) {
667 log_plain(
"AEC3 METRICS (from WebRTC GetMetrics()):");
668 log_plain(
" Echo Return Loss (ERL): %.2f dB (how much echo is attenuated; >10 dB is good)",
669 g_aec3_echo_return_loss);
670 log_plain(
" Echo Return Loss Enhancement (ERLE): %.2f dB (residual echo suppression)",
671 g_aec3_echo_return_loss_enhancement);
672 log_plain(
" Estimated Echo Delay: %d ms", g_aec3_delay_ns);
674 if (g_aec3_echo_return_loss > 10.0) {
675 log_plain(
" ✓ Good echo attenuation (ERL > 10 dB)");
676 }
else if (g_aec3_echo_return_loss > 3.0) {
677 log_plain(
" ⚠️ Moderate echo attenuation (3-10 dB)");
679 log_plain(
" 🔴 Poor echo attenuation (ERL < 3 dB)");
684 if (recv_rms < 0.005f) {
685 log_plain(
" ⚠️ CRITICAL: Received audio RMS is extremely low (%.6f) - barely audible!", recv_rms);
686 }
else if (recv_rms < 0.02f) {
687 log_plain(
" ⚠️ WARNING: Received audio RMS is low (%.6f) - may sound quiet or muddy", recv_rms);
691 double received_silence_pct = g_received_stats.total_samples > 0
692 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
695 if (received_silence_pct > 30.0) {
696 log_plain(
" ⚠️ SCRATCHY AUDIO DETECTED: Too much silence in received audio!");
697 log_plain(
" - Silence: %.1f%% of received samples (should be < 10%%)", received_silence_pct);
698 log_plain(
" - Max silence burst: %llu samples", (
unsigned long long)g_received_max_silence_burst);
699 log_plain(
" - This creates jittery/choppy playback between audio bursts");
700 }
else if (received_silence_pct > 15.0) {
701 log_plain(
" ⚠️ WARNING: Moderate silence detected (%.1f%%)", received_silence_pct);
705 double sent_sharp_pct =
706 g_sent_transition_samples > 0 ? (100.0 * g_sent_sharp_transitions / g_sent_transition_samples) : 0;
707 double recv_sharp_pct =
708 g_received_transition_samples > 0 ? (100.0 * g_received_sharp_transitions / g_received_transition_samples) : 0;
712 double sent_zero_cross_pct =
713 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_zero_crossings / g_sent_stats.total_samples) : 0;
714 double recv_zero_cross_pct =
715 g_received_stats.total_samples > 0 ? (100.0 * g_received_zero_crossings / g_received_stats.total_samples) : 0;
717 log_plain(
"WAVEFORM ANALYSIS (Is it clean music or corrupted/static?):");
718 log_plain(
"SENT AUDIO:");
719 log_plain(
" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", sent_zero_cross_pct);
720 log_plain(
" Sharp transitions (clicks/pops): %.2f%% of samples", sent_sharp_pct);
721 log_plain(
" Clipping samples: %llu (%.3f%%)", (
unsigned long long)g_sent_clipping_samples,
722 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_clipping_samples / g_sent_stats.total_samples) : 0);
724 log_plain(
"RECEIVED AUDIO:");
725 log_plain(
" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", recv_zero_cross_pct);
726 log_plain(
" Sharp transitions (clicks/pops): %.2f%% of samples", recv_sharp_pct);
727 log_plain(
" Clipping samples: %llu (%.3f%%)", (
unsigned long long)g_received_clipping_samples,
728 g_received_stats.total_samples > 0 ? (100.0 * g_received_clipping_samples / g_received_stats.total_samples)
730 log_plain(
" Zero crossing increase: %.2f%% higher than sent (indicates corruption)",
731 recv_zero_cross_pct - sent_zero_cross_pct);
734 log_plain(
"SOUND QUALITY VERDICT:");
735 if (recv_zero_cross_pct > 10.0) {
736 log_plain(
" ⚠️ SOUNDS LIKE STATIC/DISTORTED: Excessive zero crossings (%.2f%%) = high frequency noise",
737 recv_zero_cross_pct);
738 log_plain(
" Increase from sent: %.2f%% (waveform corruption detected)",
739 recv_zero_cross_pct - sent_zero_cross_pct);
740 log_plain(
" Likely causes: Opus codec artifacts, jitter buffer issues, or packet delivery gaps");
741 }
else if (recv_zero_cross_pct - sent_zero_cross_pct > 3.0) {
742 log_plain(
" ⚠️ SOUNDS CORRUPTED: Zero crossing rate increased by %.2f%% (should be ±0.5%%)",
743 recv_zero_cross_pct - sent_zero_cross_pct);
744 log_plain(
" Indicates waveform distortion from network/processing artifacts");
745 }
else if (recv_sharp_pct > 2.0) {
746 log_plain(
" ⚠️ SOUNDS LIKE STATIC: High click/pop rate (%.2f%%) indicates audio artifacts", recv_sharp_pct);
747 log_plain(
" Likely causes: Packet loss, jitter buffer issues, or frame discontinuities");
748 }
else if (g_received_clipping_samples > (g_received_stats.total_samples / 1000)) {
749 log_plain(
" ⚠️ SOUNDS DISTORTED: Significant clipping detected (%.3f%%)",
750 100.0 * g_received_clipping_samples / g_received_stats.total_samples);
751 log_plain(
" Likely causes: AGC too aggressive, gain too high, or codec compression artifacts");
752 }
else if (low_energy_pct > 50.0 && recv_rms < 0.05f) {
753 log_plain(
" ⚠️ SOUNDS MUDDY/QUIET: Over 50%% very quiet samples + low RMS");
754 log_plain(
" Audio may sound unclear or like background noise rather than music");
755 }
else if (received_silence_pct > 10.0) {
756 log_plain(
" ⚠️ SOUNDS SCRATCHY: Excessive silence (%.1f%%) causes dropouts", received_silence_pct);
757 }
else if (recv_rms > 0.08f && recv_zero_cross_pct < 6.0 && recv_sharp_pct < 1.0 &&
758 g_received_clipping_samples == 0) {
759 log_plain(
" ✓ SOUNDS LIKE MUSIC: Good RMS (%.4f), clean waveform (%.2f%% zero crossings), minimal artifacts",
760 recv_rms, recv_zero_cross_pct);
761 log_plain(
" Audio quality acceptable for communication");
763 log_plain(
" ? BORDERLINE: Check specific metrics above");
767 if (low_energy_pct > 50.0) {
768 log_plain(
" ⚠️ WARNING: Over 50%% of received samples are very quiet (< 0.05 amplitude)");
769 log_plain(
" - This makes audio sound muddy, unclear, or hard to understand");
770 log_plain(
" - Caused by: Mixing other clients' audio with your own at wrong levels");
774 if (g_received_packet_times_count >= 5) {
776 uint32_t inter_arrival_count = 0;
777 uint32_t min_interval_ms = 0xFFFFFFFF;
778 uint32_t max_interval_ms = 0;
779 uint64_t sum_intervals_ms = 0;
780 uint32_t intervals_around_50ms = 0;
783 for (uint32_t i = 1; i < g_received_packet_times_count; i++) {
784 uint64_t prev_ns = g_received_packet_times_ns[i - 1];
785 uint64_t curr_ns = g_received_packet_times_ns[i];
788 uint32_t gap_ms = (uint32_t)time_ns_to_ms(gap_ns);
790 inter_arrival_times_ms[inter_arrival_count++] = gap_ms;
791 if (gap_ms < min_interval_ms)
792 min_interval_ms = gap_ms;
793 if (gap_ms > max_interval_ms)
794 max_interval_ms = gap_ms;
795 sum_intervals_ms += gap_ms;
798 if (gap_ms >= 35 && gap_ms <= 70) {
799 intervals_around_50ms++;
803 uint32_t avg_interval_ms = (uint32_t)(sum_intervals_ms / inter_arrival_count);
804 uint32_t interval_consistency = (intervals_around_50ms * 100) / inter_arrival_count;
809 double avg_samples_per_packet =
810 g_received_stats.total_samples > 0 ? (double)g_received_stats.total_samples / inter_arrival_count : 0;
811 double frames_per_packet = avg_samples_per_packet / 960.0;
812 double ms_audio_per_packet = frames_per_packet * 20.0;
815 if (intervals_around_50ms >= (inter_arrival_count * 2 / 3)) {
817 log_plain(
" 🔴 PERIODIC STUTTERING DETECTED: Server sends packets every ~%u ms (should be ~20ms)!",
819 log_plain(
" - Packet inter-arrival: %u-%u ms (avg: %u ms)", min_interval_ms, max_interval_ms, avg_interval_ms);
820 log_plain(
" - %u/%u packets (~%u%%) are ~50ms apart (CLEAR STUTTERING PATTERN)", intervals_around_50ms,
821 inter_arrival_count, interval_consistency);
823 log_plain(
" - PACKET ANALYSIS:");
824 log_plain(
" - Total audio samples: %llu over %u packets", (
unsigned long long)g_received_stats.total_samples,
825 inter_arrival_count);
826 log_plain(
" - Avg samples per packet: %.0f (= %.2f Opus frames = %.1f ms)", avg_samples_per_packet,
827 frames_per_packet, ms_audio_per_packet);
829 if (frames_per_packet < 1.5) {
830 log_plain(
" - ❌ PROBLEM: Each packet contains < 1.5 frames (should be 2-3 frames!)");
831 log_plain(
" - With only %.1f frames per packet arriving every %u ms, there are gaps between chunks",
832 frames_per_packet, avg_interval_ms);
833 log_plain(
" - Audio plays for ~%.0f ms, then %u ms gap, then plays again", ms_audio_per_packet,
834 avg_interval_ms - (uint32_t)ms_audio_per_packet);
835 }
else if (frames_per_packet > 2.5) {
836 log_plain(
" - ✓ Packets contain %.1f frames (~%.0f ms audio each)", frames_per_packet,
837 ms_audio_per_packet);
838 log_plain(
" - Should play smoothly if jitter buffer is large enough");
839 log_plain(
" - If still stuttering, issue is jitter buffer depth or timing precision");
841 log_plain(
" - Packets contain %.1f frames (~%.0f ms)", frames_per_packet, ms_audio_per_packet);
842 log_plain(
" - Borderline: buffer needs to hold %.0f ms to bridge %.u ms gap", ms_audio_per_packet,
843 avg_interval_ms - (uint32_t)ms_audio_per_packet);
845 }
else if (avg_interval_ms > 30) {
846 log_plain(
" ⚠️ AUDIO DELIVERY INCONSISTENCY: Server packets arrive every ~%u ms (expected ~20ms)",
848 log_plain(
" - Interval range: %u-%u ms", min_interval_ms, max_interval_ms);
849 log_plain(
" - This causes dropouts and buffering issues");
854 if (g_received_stats.max_gap_ns > 40) {
855 log_plain(
" ⚠️ DISTORTION DETECTED: Packet delivery gaps too large!");
856 log_plain(
" - Max gap: %u ms (should be ~20ms for smooth audio)", g_received_stats.max_gap_ns);
857 if (g_received_stats.max_gap_ns > 80) {
858 log_plain(
" - SEVERE: Gaps > 80ms cause severe distortion and dropouts");
859 }
else if (g_received_stats.max_gap_ns > 50) {
860 log_plain(
" - Gaps > 50ms cause noticeable distortion");
863 if (g_received_stats.discontinuity_count > 0) {
864 log_plain(
" Packet delivery discontinuities: %llu gaps > 100ms detected",
865 (
unsigned long long)g_received_stats.discontinuity_count);
867 if (g_received_stats.jitter_count > (g_received_stats.total_samples / 100)) {
868 log_plain(
" High jitter detected: > 1%% of samples have rapid amplitude changes");
869 log_plain(
" - May indicate buffer underruns from sparse packet delivery");
872 log_plain(
"================================================================================");