ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
analysis.h File Reference

Audio Analysis and Debugging Interface. More...

Go to the source code of this file.

Data Structures

struct  audio_analysis_stats_t
 Audio analysis statistics for sent or received audio. More...
 

Functions

int audio_analysis_init (void)
 Initialize audio analysis.
 
void audio_analysis_track_sent_sample (float sample)
 Track sent audio sample.
 
void audio_analysis_track_sent_packet (size_t size)
 Track sent packet.
 
void audio_analysis_track_received_sample (float sample)
 Track received audio sample.
 
void audio_analysis_track_received_packet (size_t size)
 Track received packet.
 
const audio_analysis_stats_taudio_analysis_get_sent_stats (void)
 Get sent audio statistics.
 
const audio_analysis_stats_taudio_analysis_get_received_stats (void)
 Get received audio statistics.
 
void audio_analysis_print_report (void)
 Print audio analysis report.
 
void audio_analysis_set_aec3_metrics (double echo_return_loss, double echo_return_loss_enhancement, int delay_ms)
 Set AEC3 echo cancellation metrics.
 
void audio_analysis_cleanup (void)
 Cleanup audio analysis.
 

Detailed Description

Audio Analysis and Debugging Interface.

Provides audio quality analysis for troubleshooting audio issues. Tracks sent and received audio characteristics for debugging.

Author
Zachary Fogg me@zf.nosp@m.o.gg
Date
2025

Definition in file analysis.h.

Function Documentation

◆ audio_analysis_cleanup()

void audio_analysis_cleanup ( void  )

Cleanup audio analysis.

Definition at line 882 of file analysis.c.

882 {
883 g_analysis_enabled = false;
884
885 // Close WAV files if they were open
886 if (g_sent_wav) {
887 wav_writer_close(g_sent_wav);
888 g_sent_wav = NULL;
889 log_info("Closed sent audio WAV file");
890 }
891 if (g_received_wav) {
892 wav_writer_close(g_received_wav);
893 g_received_wav = NULL;
894 log_info("Closed received audio WAV file");
895 }
896}
#define log_info(...)
Log an INFO message.
void wav_writer_close(wav_writer_t *writer)
Close WAV file and finalize header.
Definition wav_writer.c:99

References log_info, and wav_writer_close().

◆ audio_analysis_get_received_stats()

const audio_analysis_stats_t * audio_analysis_get_received_stats ( void  )

Get received audio statistics.

Returns
Pointer to analysis stats (do not free)

Definition at line 504 of file analysis.c.

504 {
505 return &g_received_stats;
506}

◆ audio_analysis_get_sent_stats()

const audio_analysis_stats_t * audio_analysis_get_sent_stats ( void  )

Get sent audio statistics.

Returns
Pointer to analysis stats (do not free)

Definition at line 500 of file analysis.c.

500 {
501 return &g_sent_stats;
502}

◆ audio_analysis_init()

int audio_analysis_init ( void  )

Initialize audio analysis.

Returns
0 on success, negative on error

Definition at line 113 of file analysis.c.

113 {
114 SAFE_MEMSET(&g_sent_stats, sizeof(g_sent_stats), 0, sizeof(g_sent_stats));
115 SAFE_MEMSET(&g_received_stats, sizeof(g_received_stats), 0, sizeof(g_received_stats));
116
117 // Reset stuttering/gap tracking
118 SAFE_MEMSET(g_received_gap_intervals_ms, sizeof(g_received_gap_intervals_ms), 0, sizeof(g_received_gap_intervals_ms));
119 g_received_gap_count = 0;
120 g_received_silence_start_sample = 0;
121 g_received_last_silence_end_sample = 0;
122 SAFE_MEMSET(g_received_packet_times, sizeof(g_received_packet_times), 0, sizeof(g_received_packet_times));
123 g_received_packet_times_count = 0;
124 SAFE_MEMSET(g_received_packet_sizes, sizeof(g_received_packet_sizes), 0, sizeof(g_received_packet_sizes));
125 g_received_total_audio_samples = 0;
126
127 // Reset echo detection
128 SAFE_MEMSET(g_echo_buffer, sizeof(g_echo_buffer), 0, sizeof(g_echo_buffer));
129 g_echo_buffer_pos = 0;
130 g_echo_correlation_sample_count = 0;
131 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
132 g_echo_correlation_strength[i] = 0;
133 g_echo_match_count[i] = 0;
134 }
135 g_detected_echo_delay_ms = 0;
136
137 // Reset beep detection
138 SAFE_MEMSET(g_received_beep_window, sizeof(g_received_beep_window), 0, sizeof(g_received_beep_window));
139 g_received_beep_window_idx = 0;
140 g_received_beep_events = 0;
141 g_received_tonal_samples = 0;
142 g_in_beep_burst = false;
143 g_beep_burst_samples = 0;
144
145 struct timespec ts;
146 clock_gettime(CLOCK_MONOTONIC, &ts);
147 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
148
149 g_sent_stats.timestamp_start_us = now_us;
150 g_received_stats.timestamp_start_us = now_us;
151
152 g_sent_last_sample = 0.0f;
153 g_received_last_sample = 0.0f;
154 g_sent_last_packet_time_us = now_us;
155 g_received_last_packet_time_us = now_us;
156
157 // Initialize WAV file dumping if enabled
158 if (wav_dump_enabled()) {
159 g_sent_wav = wav_writer_open("/tmp/sent_audio.wav", 48000, 1);
160 g_received_wav = wav_writer_open("/tmp/received_audio.wav", 48000, 1);
161 if (g_sent_wav) {
162 log_info("Dumping sent audio to /tmp/sent_audio.wav");
163 }
164 if (g_received_wav) {
165 log_info("Dumping received audio to /tmp/received_audio.wav");
166 }
167 }
168
169 g_analysis_enabled = true;
170 log_info("Audio analysis enabled");
171 return 0;
172}
#define ECHO_DELAY_COUNT
Definition analysis.c:88
#define SAFE_MEMSET(dest, dest_size, ch, count)
Definition common.h:389
int64_t timestamp_start_us
Definition analysis.h:35
bool wav_dump_enabled(void)
Check if audio dumping is enabled via environment.
Definition wav_writer.c:123
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Open WAV file for writing.
Definition wav_writer.c:39

References ECHO_DELAY_COUNT, log_info, SAFE_MEMSET, audio_analysis_stats_t::timestamp_start_us, wav_dump_enabled(), and wav_writer_open().

◆ audio_analysis_print_report()

void audio_analysis_print_report ( void  )

Print audio analysis report.

Definition at line 517 of file analysis.c.

517 {
518 if (!g_analysis_enabled) {
519 return;
520 }
521
522 struct timespec ts;
523 clock_gettime(CLOCK_MONOTONIC, &ts);
524 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
525
526 g_sent_stats.timestamp_end_us = now_us;
527 g_received_stats.timestamp_end_us = now_us;
528
529 int64_t sent_duration_ms = (g_sent_stats.timestamp_end_us - g_sent_stats.timestamp_start_us) / 1000;
530 int64_t recv_duration_ms = (g_received_stats.timestamp_end_us - g_received_stats.timestamp_start_us) / 1000;
531
532 // Calculate RMS levels
533 float sent_rms = 0.0f;
534 float recv_rms = 0.0f;
535 if (g_sent_rms_sample_count > 0) {
536 sent_rms = sqrtf(g_sent_rms_accumulator / g_sent_rms_sample_count);
537 }
538 if (g_received_rms_sample_count > 0) {
539 recv_rms = sqrtf(g_received_rms_accumulator / g_received_rms_sample_count);
540 }
541
542 log_plain("================================================================================");
543 log_plain(" AUDIO ANALYSIS REPORT ");
544 log_plain("================================================================================");
545 log_plain("SENT AUDIO (Microphone Capture):");
546 log_plain(" Duration: %lld ms", (long long)sent_duration_ms);
547 log_plain(" Total Samples: %llu", (unsigned long long)g_sent_stats.total_samples);
548 log_plain(" Peak Level: %.4f (should be < 1.0)", g_sent_stats.peak_level);
549 log_plain(" RMS Level: %.4f (audio energy/loudness)", sent_rms);
550 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.clipping_count,
551 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.clipping_count / g_sent_stats.total_samples) : 0);
552 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.silent_samples,
553 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.silent_samples / g_sent_stats.total_samples) : 0);
554 if (g_sent_max_silence_burst > 0) {
555 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_sent_max_silence_burst);
556 }
557 log_plain(" Packets Sent: %u", g_sent_stats.packets_count);
558 log_plain(" Status: %s", g_sent_stats.clipping_count > 0 ? "CLIPPING DETECTED!" : "OK");
559
560 log_plain("RECEIVED AUDIO (Playback):");
561 log_plain(" Duration: %lld ms", (long long)recv_duration_ms);
562 log_plain(" Total Samples: %llu", (unsigned long long)g_received_stats.total_samples);
563 log_plain(" Peak Level: %.4f", g_received_stats.peak_level);
564 log_plain(" RMS Level: %.4f (audio energy/loudness)", recv_rms);
565 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.clipping_count,
566 g_received_stats.total_samples > 0
567 ? (100.0 * g_received_stats.clipping_count / g_received_stats.total_samples)
568 : 0);
569 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.silent_samples,
570 g_received_stats.total_samples > 0
571 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
572 : 0);
573 if (g_received_max_silence_burst > 0) {
574 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
575 }
576 double low_energy_pct =
577 g_received_stats.total_samples > 0 ? (100.0 * g_received_low_energy_samples / g_received_stats.total_samples) : 0;
578 log_plain(" Very Quiet Samples: %llu samples (%.1f%%) [amplitude < 0.05]",
579 (unsigned long long)g_received_low_energy_samples, low_energy_pct);
580 log_plain(" Packets Received: %u", g_received_stats.packets_count);
581 log_plain(" Status: %s", g_received_stats.total_samples == 0 ? "NO AUDIO RECEIVED!" : "Receiving");
582
583 log_plain("QUALITY METRICS (Scratchy/Distorted Audio Detection):");
584 log_plain("SENT:");
585 log_plain(" Jitter Events: %llu (rapid amplitude changes)", (unsigned long long)g_sent_stats.jitter_count);
586 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
587 (unsigned long long)g_sent_stats.discontinuity_count);
588 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_sent_stats.max_gap_ms);
589
590 log_plain("RECEIVED:");
591 log_plain(" Jitter Events: %llu (rapid amplitude changes)",
592 (unsigned long long)g_received_stats.jitter_count);
593 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
594 (unsigned long long)g_received_stats.discontinuity_count);
595 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_received_stats.max_gap_ms);
596
597 // Beep/tone artifact detection
598 if (g_received_beep_events > 0 || g_received_tonal_samples > 0) {
599 double tonal_pct =
600 g_received_stats.total_samples > 0 ? (100.0 * g_received_tonal_samples / g_received_stats.total_samples) : 0;
601 log_plain("BEEP/TONE ARTIFACTS:");
602 log_plain(" Beep Events: %llu (short tonal bursts < 500ms)",
603 (unsigned long long)g_received_beep_events);
604 log_plain(" Tonal Samples: %llu samples (%.1f%%) [consistent frequency content]",
605 (unsigned long long)g_received_tonal_samples, tonal_pct);
606
607 if (g_received_beep_events > 10) {
608 log_plain(" 🔴 BEEPING DETECTED: %llu short tonal bursts - likely codec artifacts or system sounds!",
609 (unsigned long long)g_received_beep_events);
610 log_plain(" Possible causes:");
611 log_plain(" - Opus codec producing tonal artifacts during silence/transitions");
612 log_plain(" - Buffer underruns creating synthetic tones");
613 log_plain(" - AEC3 suppressor resonance");
614 log_plain(" - System notification sounds bleeding through");
615 } else if (g_received_beep_events > 3) {
616 log_plain(" ⚠️ Some beep artifacts detected (%llu events)", (unsigned long long)g_received_beep_events);
617 }
618 }
619
620 log_plain("DIAGNOSTICS:");
621 if (g_sent_stats.peak_level == 0) {
622 log_plain(" No audio captured from microphone!");
623 }
624 if (g_received_stats.total_samples == 0) {
625 log_plain(" No audio received from server!");
626 } else if (g_received_stats.peak_level < 0.01f) {
627 log_plain(" ⚠️ Received audio is very quiet (peak < 0.01)");
628 }
629 if (g_sent_stats.clipping_count > 0) {
630 log_plain(" Microphone input is clipping - reduce microphone volume");
631 }
632
633 // Echo detection diagnostics
634 log_plain("ECHO DETECTION (Echo Cancellation Quality Check):");
635 if (g_echo_correlation_sample_count > 0 && g_sent_stats.total_samples > 0) {
636 uint64_t max_matches = 0;
637 int best_delay_idx = -1;
638
639 // Find which delay has the most matches (if any)
640 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
641 if (g_echo_match_count[i] > max_matches) {
642 max_matches = g_echo_match_count[i];
643 best_delay_idx = i;
644 }
645 }
646
647 double echo_threshold_pct = 5.0; // If > 5% of samples match at a delay, it's echo
648
649 if (best_delay_idx >= 0) {
650 double match_pct = (100.0 * g_echo_match_count[best_delay_idx]) / g_echo_correlation_sample_count;
651 log_plain(" Echo correlation at different delays:");
652 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
653 double pct = (100.0 * g_echo_match_count[i]) / g_echo_correlation_sample_count;
654 const char *status = pct > echo_threshold_pct ? "⚠️ ECHO DETECTED" : "✓ OK";
655 log_plain(" %3u ms delay: %.1f%% match rate %s", g_echo_delays_ms[i], pct, status);
656 }
657
658 if (match_pct > echo_threshold_pct) {
659 g_detected_echo_delay_ms = g_echo_delays_ms[best_delay_idx];
660 log_plain(" 🔴 ECHO CANCELLATION NOT WORKING: Strong echo at %u ms delay!", g_detected_echo_delay_ms);
661 log_plain(" Received audio contains %.1f%% samples matching sent audio from %u ms ago", match_pct,
662 g_detected_echo_delay_ms);
663 } else {
664 log_plain(" ✓ Echo cancellation working: No significant echo detected");
665 }
666 }
667 } else {
668 log_plain(" Insufficient data for echo detection (need both sent and received audio)");
669 }
670
671 // AEC3 metrics from WebRTC (if available)
672 if (g_aec3_metrics_available) {
673 log_plain("AEC3 METRICS (from WebRTC GetMetrics()):");
674 log_plain(" Echo Return Loss (ERL): %.2f dB (how much echo is attenuated; >10 dB is good)",
675 g_aec3_echo_return_loss);
676 log_plain(" Echo Return Loss Enhancement (ERLE): %.2f dB (residual echo suppression)",
677 g_aec3_echo_return_loss_enhancement);
678 log_plain(" Estimated Echo Delay: %d ms", g_aec3_delay_ms);
679
680 if (g_aec3_echo_return_loss > 10.0) {
681 log_plain(" ✓ Good echo attenuation (ERL > 10 dB)");
682 } else if (g_aec3_echo_return_loss > 3.0) {
683 log_plain(" ⚠️ Moderate echo attenuation (3-10 dB)");
684 } else {
685 log_plain(" 🔴 Poor echo attenuation (ERL < 3 dB)");
686 }
687 }
688
689 // Audio quality diagnostics
690 if (recv_rms < 0.005f) {
691 log_plain(" ⚠️ CRITICAL: Received audio RMS is extremely low (%.6f) - barely audible!", recv_rms);
692 } else if (recv_rms < 0.02f) {
693 log_plain(" ⚠️ WARNING: Received audio RMS is low (%.6f) - may sound quiet or muddy", recv_rms);
694 }
695
696 // Silence analysis
697 double received_silence_pct = g_received_stats.total_samples > 0
698 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
699 : 0;
700
701 if (received_silence_pct > 30.0) {
702 log_plain(" ⚠️ SCRATCHY AUDIO DETECTED: Too much silence in received audio!");
703 log_plain(" - Silence: %.1f%% of received samples (should be < 10%%)", received_silence_pct);
704 log_plain(" - Max silence burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
705 log_plain(" - This creates jittery/choppy playback between audio bursts");
706 } else if (received_silence_pct > 15.0) {
707 log_plain(" ⚠️ WARNING: Moderate silence detected (%.1f%%)", received_silence_pct);
708 }
709
710 // Sharp transition analysis (clicks/pops)
711 double sent_sharp_pct =
712 g_sent_transition_samples > 0 ? (100.0 * g_sent_sharp_transitions / g_sent_transition_samples) : 0;
713 double recv_sharp_pct =
714 g_received_transition_samples > 0 ? (100.0 * g_received_sharp_transitions / g_received_transition_samples) : 0;
715
716 // Zero crossing rate analysis (spectral content)
717 // Music: 1-5%, Speech: 5-15%, Static/Noise: 15-50%
718 double sent_zero_cross_pct =
719 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_zero_crossings / g_sent_stats.total_samples) : 0;
720 double recv_zero_cross_pct =
721 g_received_stats.total_samples > 0 ? (100.0 * g_received_zero_crossings / g_received_stats.total_samples) : 0;
722
723 log_plain("WAVEFORM ANALYSIS (Is it clean music or corrupted/static?):");
724 log_plain("SENT AUDIO:");
725 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", sent_zero_cross_pct);
726 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", sent_sharp_pct);
727 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_sent_clipping_samples,
728 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_clipping_samples / g_sent_stats.total_samples) : 0);
729
730 log_plain("RECEIVED AUDIO:");
731 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", recv_zero_cross_pct);
732 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", recv_sharp_pct);
733 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_received_clipping_samples,
734 g_received_stats.total_samples > 0 ? (100.0 * g_received_clipping_samples / g_received_stats.total_samples)
735 : 0);
736 log_plain(" Zero crossing increase: %.2f%% higher than sent (indicates corruption)",
737 recv_zero_cross_pct - sent_zero_cross_pct);
738
739 // Musicality verdict
740 log_plain("SOUND QUALITY VERDICT:");
741 if (recv_zero_cross_pct > 10.0) {
742 log_plain(" ⚠️ SOUNDS LIKE STATIC/DISTORTED: Excessive zero crossings (%.2f%%) = high frequency noise",
743 recv_zero_cross_pct);
744 log_plain(" Increase from sent: %.2f%% (waveform corruption detected)",
745 recv_zero_cross_pct - sent_zero_cross_pct);
746 log_plain(" Likely causes: Opus codec artifacts, jitter buffer issues, or packet delivery gaps");
747 } else if (recv_zero_cross_pct - sent_zero_cross_pct > 3.0) {
748 log_plain(" ⚠️ SOUNDS CORRUPTED: Zero crossing rate increased by %.2f%% (should be ±0.5%%)",
749 recv_zero_cross_pct - sent_zero_cross_pct);
750 log_plain(" Indicates waveform distortion from network/processing artifacts");
751 } else if (recv_sharp_pct > 2.0) {
752 log_plain(" ⚠️ SOUNDS LIKE STATIC: High click/pop rate (%.2f%%) indicates audio artifacts", recv_sharp_pct);
753 log_plain(" Likely causes: Packet loss, jitter buffer issues, or frame discontinuities");
754 } else if (g_received_clipping_samples > (g_received_stats.total_samples / 1000)) {
755 log_plain(" ⚠️ SOUNDS DISTORTED: Significant clipping detected (%.3f%%)",
756 100.0 * g_received_clipping_samples / g_received_stats.total_samples);
757 log_plain(" Likely causes: AGC too aggressive, gain too high, or codec compression artifacts");
758 } else if (low_energy_pct > 50.0 && recv_rms < 0.05f) {
759 log_plain(" ⚠️ SOUNDS MUDDY/QUIET: Over 50%% very quiet samples + low RMS");
760 log_plain(" Audio may sound unclear or like background noise rather than music");
761 } else if (received_silence_pct > 10.0) {
762 log_plain(" ⚠️ SOUNDS SCRATCHY: Excessive silence (%.1f%%) causes dropouts", received_silence_pct);
763 } else if (recv_rms > 0.08f && recv_zero_cross_pct < 6.0 && recv_sharp_pct < 1.0 &&
764 g_received_clipping_samples == 0) {
765 log_plain(" ✓ SOUNDS LIKE MUSIC: Good RMS (%.4f), clean waveform (%.2f%% zero crossings), minimal artifacts",
766 recv_rms, recv_zero_cross_pct);
767 log_plain(" Audio quality acceptable for communication");
768 } else {
769 log_plain(" ? BORDERLINE: Check specific metrics above");
770 }
771
772 // Low energy audio analysis
773 if (low_energy_pct > 50.0) {
774 log_plain(" ⚠️ WARNING: Over 50%% of received samples are very quiet (< 0.05 amplitude)");
775 log_plain(" - This makes audio sound muddy, unclear, or hard to understand");
776 log_plain(" - Caused by: Mixing other clients' audio with your own at wrong levels");
777 }
778
779 // Stuttering/periodic gap detection using packet inter-arrival times
780 if (g_received_packet_times_count >= 5) {
781 uint32_t inter_arrival_times_ms[MAX_PACKET_SAMPLES - 1];
782 uint32_t inter_arrival_count = 0;
783 uint32_t min_interval_ms = 0xFFFFFFFF;
784 uint32_t max_interval_ms = 0;
785 uint64_t sum_intervals_ms = 0;
786 uint32_t intervals_around_50ms = 0; // Count intervals ~40-60ms
787
788 // Calculate inter-packet arrival times
789 for (uint32_t i = 1; i < g_received_packet_times_count; i++) {
790 struct timespec *prev = &g_received_packet_times[i - 1];
791 struct timespec *curr = &g_received_packet_times[i];
792
793 int64_t prev_us = (int64_t)prev->tv_sec * 1000000 + prev->tv_nsec / 1000;
794 int64_t curr_us = (int64_t)curr->tv_sec * 1000000 + curr->tv_nsec / 1000;
795 uint32_t gap_ms = (uint32_t)((curr_us - prev_us) / 1000);
796
797 inter_arrival_times_ms[inter_arrival_count++] = gap_ms;
798 if (gap_ms < min_interval_ms)
799 min_interval_ms = gap_ms;
800 if (gap_ms > max_interval_ms)
801 max_interval_ms = gap_ms;
802 sum_intervals_ms += gap_ms;
803
804 // Check if interval is ~50ms (within 15ms tolerance for network jitter)
805 if (gap_ms >= 35 && gap_ms <= 70) {
806 intervals_around_50ms++;
807 }
808 }
809
810 uint32_t avg_interval_ms = (uint32_t)(sum_intervals_ms / inter_arrival_count);
811 uint32_t interval_consistency = (intervals_around_50ms * 100) / inter_arrival_count;
812
813 // Calculate how much audio is in each packet
814 // Total decoded samples / number of packets = average samples per packet
815 // At 48kHz, 960 samples = 1 Opus frame = 20ms
816 double avg_samples_per_packet =
817 g_received_stats.total_samples > 0 ? (double)g_received_stats.total_samples / inter_arrival_count : 0;
818 double frames_per_packet = avg_samples_per_packet / 960.0; // 960 samples = 1 frame @ 48kHz
819 double ms_audio_per_packet = frames_per_packet * 20.0; // 20ms per frame
820
821 // Detect if stuttering is periodic (consistent ~50ms intervals)
822 if (intervals_around_50ms >= (inter_arrival_count * 2 / 3)) {
823 // More than 66% of packets are ~50ms apart - clear periodic stuttering
824 log_plain(" 🔴 PERIODIC STUTTERING DETECTED: Server sends packets every ~%u ms (should be ~20ms)!",
825 avg_interval_ms);
826 log_plain(" - Packet inter-arrival: %u-%u ms (avg: %u ms)", min_interval_ms, max_interval_ms, avg_interval_ms);
827 log_plain(" - %u/%u packets (~%u%%) are ~50ms apart (CLEAR STUTTERING PATTERN)", intervals_around_50ms,
828 inter_arrival_count, interval_consistency);
829
830 log_plain(" - PACKET ANALYSIS:");
831 log_plain(" - Total audio samples: %llu over %u packets", (unsigned long long)g_received_stats.total_samples,
832 inter_arrival_count);
833 log_plain(" - Avg samples per packet: %.0f (= %.2f Opus frames = %.1f ms)", avg_samples_per_packet,
834 frames_per_packet, ms_audio_per_packet);
835
836 if (frames_per_packet < 1.5) {
837 log_plain(" - ❌ PROBLEM: Each packet contains < 1.5 frames (should be 2-3 frames!)");
838 log_plain(" - With only %.1f frames per packet arriving every %u ms, there are gaps between chunks",
839 frames_per_packet, avg_interval_ms);
840 log_plain(" - Audio plays for ~%.0f ms, then %u ms gap, then plays again", ms_audio_per_packet,
841 avg_interval_ms - (uint32_t)ms_audio_per_packet);
842 } else if (frames_per_packet > 2.5) {
843 log_plain(" - ✓ Packets contain %.1f frames (~%.0f ms audio each)", frames_per_packet,
844 ms_audio_per_packet);
845 log_plain(" - Should play smoothly if jitter buffer is large enough");
846 log_plain(" - If still stuttering, issue is jitter buffer depth or timing precision");
847 } else {
848 log_plain(" - Packets contain %.1f frames (~%.0f ms)", frames_per_packet, ms_audio_per_packet);
849 log_plain(" - Borderline: buffer needs to hold %.0f ms to bridge %.u ms gap", ms_audio_per_packet,
850 avg_interval_ms - (uint32_t)ms_audio_per_packet);
851 }
852 } else if (avg_interval_ms > 30) {
853 log_plain(" ⚠️ AUDIO DELIVERY INCONSISTENCY: Server packets arrive every ~%u ms (expected ~20ms)",
854 avg_interval_ms);
855 log_plain(" - Interval range: %u-%u ms", min_interval_ms, max_interval_ms);
856 log_plain(" - This causes dropouts and buffering issues");
857 }
858 }
859
860 // Packet delivery gaps
861 if (g_received_stats.max_gap_ms > 40) {
862 log_plain(" ⚠️ DISTORTION DETECTED: Packet delivery gaps too large!");
863 log_plain(" - Max gap: %u ms (should be ~20ms for smooth audio)", g_received_stats.max_gap_ms);
864 if (g_received_stats.max_gap_ms > 80) {
865 log_plain(" - SEVERE: Gaps > 80ms cause severe distortion and dropouts");
866 } else if (g_received_stats.max_gap_ms > 50) {
867 log_plain(" - Gaps > 50ms cause noticeable distortion");
868 }
869 }
870 if (g_received_stats.discontinuity_count > 0) {
871 log_plain(" Packet delivery discontinuities: %llu gaps > 100ms detected",
872 (unsigned long long)g_received_stats.discontinuity_count);
873 }
874 if (g_received_stats.jitter_count > (g_received_stats.total_samples / 100)) {
875 log_plain(" High jitter detected: > 1%% of samples have rapid amplitude changes");
876 log_plain(" - May indicate buffer underruns from sparse packet delivery");
877 }
878
879 log_plain("================================================================================");
880}
#define MAX_PACKET_SAMPLES
Definition analysis.c:75
unsigned int uint32_t
Definition common.h:58
unsigned long long uint64_t
Definition common.h:59
#define log_plain(...)
Plain logging - writes to both log file and stderr without timestamps or log levels.
uint64_t silent_samples
Definition analysis.h:31
uint64_t clipping_count
Definition analysis.h:30
uint64_t discontinuity_count
Definition analysis.h:39

References audio_analysis_stats_t::clipping_count, audio_analysis_stats_t::discontinuity_count, ECHO_DELAY_COUNT, audio_analysis_stats_t::jitter_count, log_plain, audio_analysis_stats_t::max_gap_ms, MAX_PACKET_SAMPLES, audio_analysis_stats_t::packets_count, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::timestamp_end_us, audio_analysis_stats_t::timestamp_start_us, and audio_analysis_stats_t::total_samples.

◆ audio_analysis_set_aec3_metrics()

void audio_analysis_set_aec3_metrics ( double  echo_return_loss,
double  echo_return_loss_enhancement,
int  delay_ms 
)

Set AEC3 echo cancellation metrics.

Parameters
echo_return_lossEcho return loss (dB) - how much echo is attenuated
echo_return_loss_enhancementAdditional echo suppression (dB)
delay_msEstimated echo delay in milliseconds

Definition at line 508 of file analysis.c.

508 {
509 // Store AEC3 metrics for reporting
510 // These come from WebRTC EchoControl::GetMetrics() call
511 g_aec3_echo_return_loss = echo_return_loss;
512 g_aec3_echo_return_loss_enhancement = echo_return_loss_enhancement;
513 g_aec3_delay_ms = delay_ms;
514 g_aec3_metrics_available = true;
515}

Referenced by client_audio_pipeline_process_duplex().

◆ audio_analysis_track_received_packet()

void audio_analysis_track_received_packet ( size_t  size)

Track received packet.

Parameters
sizePacket size in bytes

Definition at line 466 of file analysis.c.

466 {
467 (void)size; // Unused parameter - reserved for future per-packet analysis
468 if (!g_analysis_enabled)
469 return;
470
471 struct timespec ts;
472 clock_gettime(CLOCK_MONOTONIC, &ts);
473 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
474
475 // Track packet timing for stuttering detection
476 if (g_received_packet_times_count < MAX_PACKET_SAMPLES) {
477 g_received_packet_times[g_received_packet_times_count++] = ts;
478 }
479
480 // Detect gaps between consecutive packets (discontinuity)
481 if (g_received_stats.packets_count > 0) {
482 int64_t gap_us = now_us - g_received_last_packet_time_us;
483 int32_t gap_ms = (int32_t)(gap_us / 1000);
484
485 // Expected: ~20ms per Opus frame, flag if gap > 100ms
486 if (gap_ms > 100) {
487 g_received_stats.discontinuity_count++;
488 }
489
490 // Track max gap
491 if (gap_ms > (int32_t)g_received_stats.max_gap_ms) {
492 g_received_stats.max_gap_ms = (uint32_t)gap_ms;
493 }
494 }
495
496 g_received_last_packet_time_us = now_us;
497 g_received_stats.packets_count++;
498}

References audio_analysis_stats_t::discontinuity_count, audio_analysis_stats_t::max_gap_ms, MAX_PACKET_SAMPLES, and audio_analysis_stats_t::packets_count.

◆ audio_analysis_track_received_sample()

void audio_analysis_track_received_sample ( float  sample)

Track received audio sample.

Parameters
sampleAudio sample value

Definition at line 275 of file analysis.c.

275 {
276 if (!g_analysis_enabled)
277 return;
278
279 g_received_stats.total_samples++;
280
281 // Track peak level
282 float abs_sample = fabsf(sample);
283 if (abs_sample > g_received_stats.peak_level) {
284 g_received_stats.peak_level = abs_sample;
285 }
286
287 // Track clipping (samples > 1.0) - indicates distortion
288 if (abs_sample > 1.0f) {
289 g_received_stats.clipping_count++;
290 g_received_clipping_samples++;
291 }
292
293 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops/artifacts
294 float amp_change = fabsf(sample - g_received_last_sample);
295 if (amp_change > 0.3f) {
296 g_received_sharp_transitions++;
297 }
298 g_received_transition_samples++;
299
300 // Accumulate for mean calculation
301 g_received_mean += sample;
302
303 // Detect zero crossings (waveform crossing zero) - indicates spectral content
304 // Use file-scope static variable for prev sample tracking
305 // (This function is called from the protocol reception thread, separate from the
306 // audio capture thread, so using distinct static variables is safe)
307 static float s_received_prev_sample_for_zero_crossing = 0.0f;
308 if ((s_received_prev_sample_for_zero_crossing > 0 && sample < 0) ||
309 (s_received_prev_sample_for_zero_crossing < 0 && sample > 0)) {
310 g_received_zero_crossings++;
311 }
312 s_received_prev_sample_for_zero_crossing = sample;
313
314 // Track silence and low-energy audio
315 if (abs_sample < 0.001f) {
316 g_received_stats.silent_samples++;
317 g_received_silence_burst++;
318 g_received_below_noise_floor++;
319
320 // Track when silence started
321 if (g_received_silence_burst == 1) {
322 g_received_silence_start_sample = g_received_stats.total_samples;
323 }
324 } else {
325 // Silence ended - track gap interval and max burst length
326 if (g_received_silence_burst > 0) {
327 // Calculate time gap between end of last silence and start of this one
328 if (g_received_last_silence_end_sample > 0) {
329 uint64_t samples_between = g_received_silence_start_sample - g_received_last_silence_end_sample;
330 uint32_t ms_between = (uint32_t)(samples_between * 1000 / 48000); // Convert samples to ms at 48kHz
331
332 // Track the gap interval if we have room
333 if (g_received_gap_count < MAX_GAP_SAMPLES) {
334 g_received_gap_intervals_ms[g_received_gap_count++] = ms_between;
335 }
336 }
337
338 g_received_last_silence_end_sample = g_received_stats.total_samples;
339
340 // Track max burst length
341 if (g_received_silence_burst > g_received_max_silence_burst) {
342 g_received_max_silence_burst = g_received_silence_burst;
343 }
344 }
345 g_received_silence_burst = 0;
346 }
347
348 // Track very quiet audio (< 0.05 amplitude) which contributes to muddy/quiet perception
349 if (abs_sample < 0.05f) {
350 g_received_low_energy_samples++;
351 }
352
353 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
354 float delta = fabsf(sample - g_received_last_sample);
355 if (delta > 0.5f) {
356 g_received_stats.jitter_count++;
357 }
358 g_received_last_sample = sample;
359
360 // Accumulate for RMS calculation
361 g_received_rms_accumulator += sample * sample;
362 g_received_rms_sample_count++;
363
364 // Echo detection: check if received sample matches sent sample from N ms ago
365 // This detects if echo cancellation is working (it shouldn't find matches)
366 if (g_echo_correlation_sample_count < 500000) { // Limit to first ~10 seconds
367 for (int delay_idx = 0; delay_idx < ECHO_DELAY_COUNT; delay_idx++) {
368 // Calculate sample delay: delay_ms * (sample_rate / 1000)
369 uint32_t delay_samples = (g_echo_delays_ms[delay_idx] * 48000) / 1000;
370
371 // Get sent sample from that delay ago (from circular buffer)
372 uint64_t sent_pos;
373 if (g_echo_buffer_pos >= delay_samples) {
374 sent_pos = g_echo_buffer_pos - delay_samples;
375 } else {
376 sent_pos = (g_echo_buffer_pos + ECHO_BUFFER_SIZE) - delay_samples;
377 }
378
379 float sent_sample = g_echo_buffer[sent_pos];
380
381 // Check if samples match (correlation threshold = 0.1)
382 float diff = fabsf(sample - sent_sample);
383 if (diff < 0.1f && fabsf(sent_sample) > 0.01f) { // Only count if sent is not silence
384 g_echo_match_count[delay_idx]++;
385 g_echo_correlation_strength[delay_idx] += (0.1f - diff); // Accumulate strength
386 }
387 }
388 g_echo_correlation_sample_count++;
389 }
390
391 // Beep/tone artifact detection
392 // Store sample in sliding window for frequency analysis
393 g_received_beep_window[g_received_beep_window_idx] = sample;
394 g_received_beep_window_idx = (g_received_beep_window_idx + 1) % BEEP_WINDOW_SIZE;
395
396 // Analyze window every 10ms (480 samples at 48kHz)
397 if (g_received_beep_window_idx == 0 && g_received_stats.total_samples > BEEP_WINDOW_SIZE) {
398 // Calculate zero-crossing rate in this window
399 int zero_crossings = 0;
400 float min_amp = 1.0f, max_amp = 0.0f;
401 float sum_amp = 0.0f;
402 float prev = g_received_beep_window[0];
403
404 for (int i = 1; i < BEEP_WINDOW_SIZE; i++) {
405 float curr = g_received_beep_window[i];
406 float abs_curr = fabsf(curr);
407
408 // Track amplitude range
409 if (abs_curr > max_amp)
410 max_amp = abs_curr;
411 if (abs_curr < min_amp)
412 min_amp = abs_curr;
413 sum_amp += abs_curr;
414
415 // Count zero crossings
416 if ((prev > 0 && curr < 0) || (prev < 0 && curr > 0)) {
417 zero_crossings++;
418 }
419 prev = curr;
420 }
421
422 float avg_amp = sum_amp / BEEP_WINDOW_SIZE;
423 float amp_range = max_amp - min_amp;
424
425 // A beep/tone has:
426 // 1. High zero-crossing rate (>20 per 10ms = >2000Hz equivalent, or 5-20 = 500-2000Hz)
427 // 2. Consistent amplitude (range/avg < 0.5 means sine-wave like)
428 // 3. Non-trivial amplitude (avg > 0.02)
429 bool is_tonal = (zero_crossings >= 5 && zero_crossings <= 100) && // 500Hz-10kHz range
430 (avg_amp > 0.02f) && // Not silence
431 (amp_range < avg_amp * 1.5f); // Relatively consistent amplitude
432
433 if (is_tonal) {
434 g_received_tonal_samples += BEEP_WINDOW_SIZE;
435
436 if (!g_in_beep_burst) {
437 // Starting a new beep burst
438 g_in_beep_burst = true;
439 g_beep_burst_samples = BEEP_WINDOW_SIZE;
440 } else {
441 g_beep_burst_samples += BEEP_WINDOW_SIZE;
442 }
443 } else {
444 if (g_in_beep_burst) {
445 // Beep burst ended
446 // Only count as beep event if it was short (< 500ms = 24000 samples)
447 // Long tonal sounds are likely music, not artifacts
448 if (g_beep_burst_samples > 0 && g_beep_burst_samples < 24000) {
449 g_received_beep_events++;
450 g_received_stats.beep_events = g_received_beep_events;
451 }
452 g_in_beep_burst = false;
453 g_beep_burst_samples = 0;
454 }
455 }
456
457 g_received_stats.tonal_samples = g_received_tonal_samples;
458 }
459
460 // Write to WAV file if enabled
461 if (g_received_wav) {
462 wav_writer_write(g_received_wav, &sample, 1);
463 }
464}
#define MAX_GAP_SAMPLES
Definition analysis.c:68
#define ECHO_BUFFER_SIZE
Definition analysis.c:82
#define BEEP_WINDOW_SIZE
Definition analysis.c:105
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Write audio samples to WAV file.
Definition wav_writer.c:85

References audio_analysis_stats_t::beep_events, BEEP_WINDOW_SIZE, audio_analysis_stats_t::clipping_count, ECHO_BUFFER_SIZE, ECHO_DELAY_COUNT, audio_analysis_stats_t::jitter_count, MAX_GAP_SAMPLES, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::tonal_samples, audio_analysis_stats_t::total_samples, and wav_writer_write().

Referenced by audio_process_received_samples().

◆ audio_analysis_track_sent_packet()

void audio_analysis_track_sent_packet ( size_t  size)

Track sent packet.

Parameters
sizePacket size in bytes

Definition at line 246 of file analysis.c.

246 {
247 (void)size; // Unused parameter - reserved for future per-packet analysis
248 if (!g_analysis_enabled)
249 return;
250
251 struct timespec ts;
252 clock_gettime(CLOCK_MONOTONIC, &ts);
253 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
254
255 // Detect gaps between consecutive packets (discontinuity)
256 if (g_sent_stats.packets_count > 0) {
257 int64_t gap_us = now_us - g_sent_last_packet_time_us;
258 int32_t gap_ms = (int32_t)(gap_us / 1000);
259
260 // Expected: ~20ms per Opus frame, flag if gap > 100ms
261 if (gap_ms > 100) {
262 g_sent_stats.discontinuity_count++;
263 }
264
265 // Track max gap
266 if (gap_ms > (int32_t)g_sent_stats.max_gap_ms) {
267 g_sent_stats.max_gap_ms = (uint32_t)gap_ms;
268 }
269 }
270
271 g_sent_last_packet_time_us = now_us;
272 g_sent_stats.packets_count++;
273}

References audio_analysis_stats_t::discontinuity_count, audio_analysis_stats_t::max_gap_ms, and audio_analysis_stats_t::packets_count.

◆ audio_analysis_track_sent_sample()

void audio_analysis_track_sent_sample ( float  sample)

Track sent audio sample.

Parameters
sampleAudio sample value

Definition at line 174 of file analysis.c.

174 {
175 if (!g_analysis_enabled)
176 return;
177
178 g_sent_stats.total_samples++;
179
180 // Track peak level
181 float abs_sample = fabsf(sample);
182 if (abs_sample > g_sent_stats.peak_level) {
183 g_sent_stats.peak_level = abs_sample;
184 }
185
186 // Track clipping (samples > 1.0) - indicates distortion
187 if (abs_sample > 1.0f) {
188 g_sent_stats.clipping_count++;
189 g_sent_clipping_samples++;
190 }
191
192 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops
193 float amp_change = fabsf(sample - g_sent_last_sample);
194 if (amp_change > 0.3f) {
195 g_sent_sharp_transitions++;
196 }
197 g_sent_transition_samples++;
198
199 // Accumulate for mean calculation
200 g_sent_mean += sample;
201
202 // Detect zero crossings (waveform crossing zero) - indicates spectral content
203 // Use file-scope static variable for prev sample tracking
204 // (This function is only called from the audio capture thread, but using file-scope
205 // static is clearer and avoids shadowing the existing g_sent_last_sample variable)
206 static float s_sent_prev_sample_for_zero_crossing = 0.0f;
207 if ((s_sent_prev_sample_for_zero_crossing > 0 && sample < 0) ||
208 (s_sent_prev_sample_for_zero_crossing < 0 && sample > 0)) {
209 g_sent_zero_crossings++;
210 }
211 s_sent_prev_sample_for_zero_crossing = sample;
212
213 // Track silence (very low level)
214 if (abs_sample < 0.001f) {
215 g_sent_stats.silent_samples++;
216 g_sent_silence_burst++;
217 } else {
218 // Silence ended - track max burst length
219 if (g_sent_silence_burst > g_sent_max_silence_burst) {
220 g_sent_max_silence_burst = g_sent_silence_burst;
221 }
222 g_sent_silence_burst = 0;
223 }
224
225 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
226 float delta = fabsf(sample - g_sent_last_sample);
227 if (delta > 0.5f) {
228 g_sent_stats.jitter_count++;
229 }
230 g_sent_last_sample = sample;
231
232 // Accumulate for RMS calculation
233 g_sent_rms_accumulator += sample * sample;
234 g_sent_rms_sample_count++;
235
236 // Write to WAV file if enabled
237 if (g_sent_wav) {
238 wav_writer_write(g_sent_wav, &sample, 1);
239 }
240
241 // Store in echo detection buffer (circular)
242 g_echo_buffer[g_echo_buffer_pos] = sample;
243 g_echo_buffer_pos = (g_echo_buffer_pos + 1) % ECHO_BUFFER_SIZE;
244}

References audio_analysis_stats_t::clipping_count, ECHO_BUFFER_SIZE, audio_analysis_stats_t::jitter_count, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::total_samples, and wav_writer_write().