ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
analysis.c File Reference

Audio Analysis Implementation. More...

Go to the source code of this file.

Macros

#define FRAME_SIZE   960
 
#define MAX_GAP_SAMPLES   100
 
#define MAX_PACKET_SAMPLES   200
 
#define ECHO_BUFFER_SIZE   48000
 
#define ECHO_DELAY_COUNT   5
 
#define BEEP_WINDOW_SIZE   480
 

Functions

int audio_analysis_init (void)
 
void audio_analysis_track_sent_sample (float sample)
 
void audio_analysis_track_sent_packet (size_t size)
 
void audio_analysis_track_received_sample (float sample)
 
void audio_analysis_track_received_packet (size_t size)
 
const audio_analysis_stats_t * audio_analysis_get_sent_stats (void)
 
const audio_analysis_stats_t * audio_analysis_get_received_stats (void)
 
void audio_analysis_set_aec3_metrics (double echo_return_loss, double echo_return_loss_enhancement, uint64_t delay_ns)
 
void audio_analysis_print_report (void)
 
void audio_analysis_destroy (void)
 

Detailed Description

Audio Analysis Implementation.

Definition in file analysis.c.

Macro Definition Documentation

◆ BEEP_WINDOW_SIZE

#define BEEP_WINDOW_SIZE   480

Definition at line 106 of file analysis.c.

◆ ECHO_BUFFER_SIZE

#define ECHO_BUFFER_SIZE   48000

Definition at line 83 of file analysis.c.

◆ ECHO_DELAY_COUNT

#define ECHO_DELAY_COUNT   5

Definition at line 89 of file analysis.c.

◆ FRAME_SIZE

#define FRAME_SIZE   960

Definition at line 62 of file analysis.c.

◆ MAX_GAP_SAMPLES

#define MAX_GAP_SAMPLES   100

Definition at line 69 of file analysis.c.

◆ MAX_PACKET_SAMPLES

#define MAX_PACKET_SAMPLES   200

Definition at line 76 of file analysis.c.

Function Documentation

◆ audio_analysis_destroy()

void audio_analysis_destroy ( void  )

Definition at line 875 of file analysis.c.

875 {
876 g_analysis_enabled = false;
877
878 // Close WAV files if they were open
879 if (g_sent_wav) {
880 wav_writer_close(g_sent_wav);
881 g_sent_wav = NULL;
882 log_info("Closed sent audio WAV file");
883 }
884 if (g_received_wav) {
885 wav_writer_close(g_received_wav);
886 g_received_wav = NULL;
887 log_info("Closed received audio WAV file");
888 }
889}
void wav_writer_close(wav_writer_t *writer)
Definition wav_writer.c:113

References wav_writer_close().

◆ audio_analysis_get_received_stats()

const audio_analysis_stats_t * audio_analysis_get_received_stats ( void  )

Definition at line 500 of file analysis.c.

500 {
501 return &g_received_stats;
502}

◆ audio_analysis_get_sent_stats()

const audio_analysis_stats_t * audio_analysis_get_sent_stats ( void  )

Definition at line 496 of file analysis.c.

496 {
497 return &g_sent_stats;
498}

◆ audio_analysis_init()

int audio_analysis_init ( void  )

Definition at line 114 of file analysis.c.

114 {
115 SAFE_MEMSET(&g_sent_stats, sizeof(g_sent_stats), 0, sizeof(g_sent_stats));
116 SAFE_MEMSET(&g_received_stats, sizeof(g_received_stats), 0, sizeof(g_received_stats));
117
118 // Reset stuttering/gap tracking
119 SAFE_MEMSET(g_received_gap_intervals_ms, sizeof(g_received_gap_intervals_ms), 0, sizeof(g_received_gap_intervals_ms));
120 g_received_gap_count = 0;
121 g_received_silence_start_sample = 0;
122 g_received_last_silence_end_sample = 0;
123 SAFE_MEMSET(g_received_packet_times_ns, sizeof(g_received_packet_times_ns), 0, sizeof(g_received_packet_times_ns));
124 g_received_packet_times_count = 0;
125 SAFE_MEMSET(g_received_packet_sizes, sizeof(g_received_packet_sizes), 0, sizeof(g_received_packet_sizes));
126 g_received_total_audio_samples = 0;
127
128 // Reset echo detection
129 SAFE_MEMSET(g_echo_buffer, sizeof(g_echo_buffer), 0, sizeof(g_echo_buffer));
130 g_echo_buffer_pos = 0;
131 g_echo_correlation_sample_count = 0;
132 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
133 g_echo_correlation_strength[i] = 0;
134 g_echo_match_count[i] = 0;
135 }
136 g_detected_echo_delay_ms = 0;
137
138 // Reset beep detection
139 SAFE_MEMSET(g_received_beep_window, sizeof(g_received_beep_window), 0, sizeof(g_received_beep_window));
140 g_received_beep_window_idx = 0;
141 g_received_beep_events = 0;
142 g_received_tonal_samples = 0;
143 g_in_beep_burst = false;
144 g_beep_burst_samples = 0;
145
146 int64_t now_us = (int64_t)time_ns_to_us(time_get_ns());
147
148 g_sent_stats.timestamp_start_ns = now_us;
149 g_received_stats.timestamp_start_ns = now_us;
150
151 g_sent_last_sample = 0.0f;
152 g_received_last_sample = 0.0f;
153 g_sent_last_packet_time_us = now_us;
154 g_received_last_packet_time_us = now_us;
155
156 // Initialize WAV file dumping if enabled
157 if (wav_dump_enabled()) {
158 g_sent_wav = wav_writer_open("/tmp/sent_audio.wav", 48000, 1);
159 g_received_wav = wav_writer_open("/tmp/received_audio.wav", 48000, 1);
160 if (g_sent_wav) {
161 log_info("Dumping sent audio to /tmp/sent_audio.wav");
162 }
163 if (g_received_wav) {
164 log_info("Dumping received audio to /tmp/received_audio.wav");
165 }
166 }
167
168 g_analysis_enabled = true;
169 log_info("Audio analysis enabled");
170 return 0;
171}
#define ECHO_DELAY_COUNT
Definition analysis.c:89
uint64_t time_get_ns(void)
Definition util/time.c:48
bool wav_dump_enabled(void)
Definition wav_writer.c:139
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Definition wav_writer.c:49

References ECHO_DELAY_COUNT, time_get_ns(), wav_dump_enabled(), and wav_writer_open().

◆ audio_analysis_print_report()

void audio_analysis_print_report ( void  )

Definition at line 513 of file analysis.c.

513 {
514 if (!g_analysis_enabled) {
515 return;
516 }
517
518 int64_t now_us = (int64_t)time_ns_to_us(time_get_ns());
519
520 g_sent_stats.timestamp_end_ns = now_us;
521 g_received_stats.timestamp_end_ns = now_us;
522
523 int64_t sent_duration_ms = (g_sent_stats.timestamp_end_ns - g_sent_stats.timestamp_start_ns) / NS_PER_MS_INT;
524 int64_t recv_duration_ms = (g_received_stats.timestamp_end_ns - g_received_stats.timestamp_start_ns) / NS_PER_MS_INT;
525
526 // Calculate RMS levels
527 float sent_rms = 0.0f;
528 float recv_rms = 0.0f;
529 if (g_sent_rms_sample_count > 0) {
530 sent_rms = sqrtf(g_sent_rms_accumulator / g_sent_rms_sample_count);
531 }
532 if (g_received_rms_sample_count > 0) {
533 recv_rms = sqrtf(g_received_rms_accumulator / g_received_rms_sample_count);
534 }
535
536 log_plain("================================================================================");
537 log_plain(" AUDIO ANALYSIS REPORT ");
538 log_plain("================================================================================");
539 log_plain("SENT AUDIO (Microphone Capture):");
540 log_plain(" Duration: %lld ms", (long long)sent_duration_ms);
541 log_plain(" Total Samples: %llu", (unsigned long long)g_sent_stats.total_samples);
542 log_plain(" Peak Level: %.4f (should be < 1.0)", g_sent_stats.peak_level);
543 log_plain(" RMS Level: %.4f (audio energy/loudness)", sent_rms);
544 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.clipping_count,
545 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.clipping_count / g_sent_stats.total_samples) : 0);
546 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.silent_samples,
547 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.silent_samples / g_sent_stats.total_samples) : 0);
548 if (g_sent_max_silence_burst > 0) {
549 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_sent_max_silence_burst);
550 }
551 log_plain(" Packets Sent: %u", g_sent_stats.packets_count);
552 log_plain(" Status: %s", g_sent_stats.clipping_count > 0 ? "CLIPPING DETECTED!" : "OK");
553
554 log_plain("RECEIVED AUDIO (Playback):");
555 log_plain(" Duration: %lld ms", (long long)recv_duration_ms);
556 log_plain(" Total Samples: %llu", (unsigned long long)g_received_stats.total_samples);
557 log_plain(" Peak Level: %.4f", g_received_stats.peak_level);
558 log_plain(" RMS Level: %.4f (audio energy/loudness)", recv_rms);
559 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.clipping_count,
560 g_received_stats.total_samples > 0
561 ? (100.0 * g_received_stats.clipping_count / g_received_stats.total_samples)
562 : 0);
563 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.silent_samples,
564 g_received_stats.total_samples > 0
565 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
566 : 0);
567 if (g_received_max_silence_burst > 0) {
568 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
569 }
570 double low_energy_pct =
571 g_received_stats.total_samples > 0 ? (100.0 * g_received_low_energy_samples / g_received_stats.total_samples) : 0;
572 log_plain(" Very Quiet Samples: %llu samples (%.1f%%) [amplitude < 0.05]",
573 (unsigned long long)g_received_low_energy_samples, low_energy_pct);
574 log_plain(" Packets Received: %u", g_received_stats.packets_count);
575 log_plain(" Status: %s", g_received_stats.total_samples == 0 ? "NO AUDIO RECEIVED!" : "Receiving");
576
577 log_plain("QUALITY METRICS (Scratchy/Distorted Audio Detection):");
578 log_plain("SENT:");
579 log_plain(" Jitter Events: %llu (rapid amplitude changes)", (unsigned long long)g_sent_stats.jitter_count);
580 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
581 (unsigned long long)g_sent_stats.discontinuity_count);
582 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_sent_stats.max_gap_ns);
583
584 log_plain("RECEIVED:");
585 log_plain(" Jitter Events: %llu (rapid amplitude changes)",
586 (unsigned long long)g_received_stats.jitter_count);
587 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
588 (unsigned long long)g_received_stats.discontinuity_count);
589 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_received_stats.max_gap_ns);
590
591 // Beep/tone artifact detection
592 if (g_received_beep_events > 0 || g_received_tonal_samples > 0) {
593 double tonal_pct =
594 g_received_stats.total_samples > 0 ? (100.0 * g_received_tonal_samples / g_received_stats.total_samples) : 0;
595 log_plain("BEEP/TONE ARTIFACTS:");
596 log_plain(" Beep Events: %llu (short tonal bursts < 500ms)",
597 (unsigned long long)g_received_beep_events);
598 log_plain(" Tonal Samples: %llu samples (%.1f%%) [consistent frequency content]",
599 (unsigned long long)g_received_tonal_samples, tonal_pct);
600
601 if (g_received_beep_events > 10) {
602 log_plain(" 🔴 BEEPING DETECTED: %llu short tonal bursts - likely codec artifacts or system sounds!",
603 (unsigned long long)g_received_beep_events);
604 log_plain(" Possible causes:");
605 log_plain(" - Opus codec producing tonal artifacts during silence/transitions");
606 log_plain(" - Buffer underruns creating synthetic tones");
607 log_plain(" - AEC3 suppressor resonance");
608 log_plain(" - System notification sounds bleeding through");
609 } else if (g_received_beep_events > 3) {
610 log_plain(" ⚠️ Some beep artifacts detected (%llu events)", (unsigned long long)g_received_beep_events);
611 }
612 }
613
614 log_plain("DIAGNOSTICS:");
615 if (g_sent_stats.peak_level == 0) {
616 log_plain(" No audio captured from microphone!");
617 }
618 if (g_received_stats.total_samples == 0) {
619 log_plain(" No audio received from server!");
620 } else if (g_received_stats.peak_level < 0.01f) {
621 log_plain(" ⚠️ Received audio is very quiet (peak < 0.01)");
622 }
623 if (g_sent_stats.clipping_count > 0) {
624 log_plain(" Microphone input is clipping - reduce microphone volume");
625 }
626
627 // Echo detection diagnostics
628 log_plain("ECHO DETECTION (Echo Cancellation Quality Check):");
629 if (g_echo_correlation_sample_count > 0 && g_sent_stats.total_samples > 0) {
630 uint64_t max_matches = 0;
631 int best_delay_idx = -1;
632
633 // Find which delay has the most matches (if any)
634 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
635 if (g_echo_match_count[i] > max_matches) {
636 max_matches = g_echo_match_count[i];
637 best_delay_idx = i;
638 }
639 }
640
641 double echo_threshold_pct = 5.0; // If > 5% of samples match at a delay, it's echo
642
643 if (best_delay_idx >= 0) {
644 double match_pct = (100.0 * g_echo_match_count[best_delay_idx]) / g_echo_correlation_sample_count;
645 log_plain(" Echo correlation at different delays:");
646 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
647 double pct = (100.0 * g_echo_match_count[i]) / g_echo_correlation_sample_count;
648 const char *status = pct > echo_threshold_pct ? "⚠️ ECHO DETECTED" : "✓ OK";
649 log_plain(" %3u ms delay: %.1f%% match rate %s", g_echo_delays_ms[i], pct, status);
650 }
651
652 if (match_pct > echo_threshold_pct) {
653 g_detected_echo_delay_ms = g_echo_delays_ms[best_delay_idx];
654 log_plain(" 🔴 ECHO CANCELLATION NOT WORKING: Strong echo at %u ms delay!", g_detected_echo_delay_ms);
655 log_plain(" Received audio contains %.1f%% samples matching sent audio from %u ms ago", match_pct,
656 g_detected_echo_delay_ms);
657 } else {
658 log_plain(" ✓ Echo cancellation working: No significant echo detected");
659 }
660 }
661 } else {
662 log_plain(" Insufficient data for echo detection (need both sent and received audio)");
663 }
664
665 // AEC3 metrics from WebRTC (if available)
666 if (g_aec3_metrics_available) {
667 log_plain("AEC3 METRICS (from WebRTC GetMetrics()):");
668 log_plain(" Echo Return Loss (ERL): %.2f dB (how much echo is attenuated; >10 dB is good)",
669 g_aec3_echo_return_loss);
670 log_plain(" Echo Return Loss Enhancement (ERLE): %.2f dB (residual echo suppression)",
671 g_aec3_echo_return_loss_enhancement);
672 log_plain(" Estimated Echo Delay: %d ms", g_aec3_delay_ns);
673
674 if (g_aec3_echo_return_loss > 10.0) {
675 log_plain(" ✓ Good echo attenuation (ERL > 10 dB)");
676 } else if (g_aec3_echo_return_loss > 3.0) {
677 log_plain(" ⚠️ Moderate echo attenuation (3-10 dB)");
678 } else {
679 log_plain(" 🔴 Poor echo attenuation (ERL < 3 dB)");
680 }
681 }
682
683 // Audio quality diagnostics
684 if (recv_rms < 0.005f) {
685 log_plain(" ⚠️ CRITICAL: Received audio RMS is extremely low (%.6f) - barely audible!", recv_rms);
686 } else if (recv_rms < 0.02f) {
687 log_plain(" ⚠️ WARNING: Received audio RMS is low (%.6f) - may sound quiet or muddy", recv_rms);
688 }
689
690 // Silence analysis
691 double received_silence_pct = g_received_stats.total_samples > 0
692 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
693 : 0;
694
695 if (received_silence_pct > 30.0) {
696 log_plain(" ⚠️ SCRATCHY AUDIO DETECTED: Too much silence in received audio!");
697 log_plain(" - Silence: %.1f%% of received samples (should be < 10%%)", received_silence_pct);
698 log_plain(" - Max silence burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
699 log_plain(" - This creates jittery/choppy playback between audio bursts");
700 } else if (received_silence_pct > 15.0) {
701 log_plain(" ⚠️ WARNING: Moderate silence detected (%.1f%%)", received_silence_pct);
702 }
703
704 // Sharp transition analysis (clicks/pops)
705 double sent_sharp_pct =
706 g_sent_transition_samples > 0 ? (100.0 * g_sent_sharp_transitions / g_sent_transition_samples) : 0;
707 double recv_sharp_pct =
708 g_received_transition_samples > 0 ? (100.0 * g_received_sharp_transitions / g_received_transition_samples) : 0;
709
710 // Zero crossing rate analysis (spectral content)
711 // Music: 1-5%, Speech: 5-15%, Static/Noise: 15-50%
712 double sent_zero_cross_pct =
713 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_zero_crossings / g_sent_stats.total_samples) : 0;
714 double recv_zero_cross_pct =
715 g_received_stats.total_samples > 0 ? (100.0 * g_received_zero_crossings / g_received_stats.total_samples) : 0;
716
717 log_plain("WAVEFORM ANALYSIS (Is it clean music or corrupted/static?):");
718 log_plain("SENT AUDIO:");
719 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", sent_zero_cross_pct);
720 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", sent_sharp_pct);
721 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_sent_clipping_samples,
722 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_clipping_samples / g_sent_stats.total_samples) : 0);
723
724 log_plain("RECEIVED AUDIO:");
725 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", recv_zero_cross_pct);
726 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", recv_sharp_pct);
727 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_received_clipping_samples,
728 g_received_stats.total_samples > 0 ? (100.0 * g_received_clipping_samples / g_received_stats.total_samples)
729 : 0);
730 log_plain(" Zero crossing increase: %.2f%% higher than sent (indicates corruption)",
731 recv_zero_cross_pct - sent_zero_cross_pct);
732
733 // Musicality verdict
734 log_plain("SOUND QUALITY VERDICT:");
735 if (recv_zero_cross_pct > 10.0) {
736 log_plain(" ⚠️ SOUNDS LIKE STATIC/DISTORTED: Excessive zero crossings (%.2f%%) = high frequency noise",
737 recv_zero_cross_pct);
738 log_plain(" Increase from sent: %.2f%% (waveform corruption detected)",
739 recv_zero_cross_pct - sent_zero_cross_pct);
740 log_plain(" Likely causes: Opus codec artifacts, jitter buffer issues, or packet delivery gaps");
741 } else if (recv_zero_cross_pct - sent_zero_cross_pct > 3.0) {
742 log_plain(" ⚠️ SOUNDS CORRUPTED: Zero crossing rate increased by %.2f%% (should be ±0.5%%)",
743 recv_zero_cross_pct - sent_zero_cross_pct);
744 log_plain(" Indicates waveform distortion from network/processing artifacts");
745 } else if (recv_sharp_pct > 2.0) {
746 log_plain(" ⚠️ SOUNDS LIKE STATIC: High click/pop rate (%.2f%%) indicates audio artifacts", recv_sharp_pct);
747 log_plain(" Likely causes: Packet loss, jitter buffer issues, or frame discontinuities");
748 } else if (g_received_clipping_samples > (g_received_stats.total_samples / 1000)) {
749 log_plain(" ⚠️ SOUNDS DISTORTED: Significant clipping detected (%.3f%%)",
750 100.0 * g_received_clipping_samples / g_received_stats.total_samples);
751 log_plain(" Likely causes: AGC too aggressive, gain too high, or codec compression artifacts");
752 } else if (low_energy_pct > 50.0 && recv_rms < 0.05f) {
753 log_plain(" ⚠️ SOUNDS MUDDY/QUIET: Over 50%% very quiet samples + low RMS");
754 log_plain(" Audio may sound unclear or like background noise rather than music");
755 } else if (received_silence_pct > 10.0) {
756 log_plain(" ⚠️ SOUNDS SCRATCHY: Excessive silence (%.1f%%) causes dropouts", received_silence_pct);
757 } else if (recv_rms > 0.08f && recv_zero_cross_pct < 6.0 && recv_sharp_pct < 1.0 &&
758 g_received_clipping_samples == 0) {
759 log_plain(" ✓ SOUNDS LIKE MUSIC: Good RMS (%.4f), clean waveform (%.2f%% zero crossings), minimal artifacts",
760 recv_rms, recv_zero_cross_pct);
761 log_plain(" Audio quality acceptable for communication");
762 } else {
763 log_plain(" ? BORDERLINE: Check specific metrics above");
764 }
765
766 // Low energy audio analysis
767 if (low_energy_pct > 50.0) {
768 log_plain(" ⚠️ WARNING: Over 50%% of received samples are very quiet (< 0.05 amplitude)");
769 log_plain(" - This makes audio sound muddy, unclear, or hard to understand");
770 log_plain(" - Caused by: Mixing other clients' audio with your own at wrong levels");
771 }
772
773 // Stuttering/periodic gap detection using packet inter-arrival times
774 if (g_received_packet_times_count >= 5) {
775 uint32_t inter_arrival_times_ms[MAX_PACKET_SAMPLES - 1];
776 uint32_t inter_arrival_count = 0;
777 uint32_t min_interval_ms = 0xFFFFFFFF;
778 uint32_t max_interval_ms = 0;
779 uint64_t sum_intervals_ms = 0;
780 uint32_t intervals_around_50ms = 0; // Count intervals ~40-60ms
781
782 // Calculate inter-packet arrival times
783 for (uint32_t i = 1; i < g_received_packet_times_count; i++) {
784 uint64_t prev_ns = g_received_packet_times_ns[i - 1];
785 uint64_t curr_ns = g_received_packet_times_ns[i];
786
787 uint64_t gap_ns = time_elapsed_ns(prev_ns, curr_ns);
788 uint32_t gap_ms = (uint32_t)time_ns_to_ms(gap_ns);
789
790 inter_arrival_times_ms[inter_arrival_count++] = gap_ms;
791 if (gap_ms < min_interval_ms)
792 min_interval_ms = gap_ms;
793 if (gap_ms > max_interval_ms)
794 max_interval_ms = gap_ms;
795 sum_intervals_ms += gap_ms;
796
797 // Check if interval is ~50ms (within 15ms tolerance for network jitter)
798 if (gap_ms >= 35 && gap_ms <= 70) {
799 intervals_around_50ms++;
800 }
801 }
802
803 uint32_t avg_interval_ms = (uint32_t)(sum_intervals_ms / inter_arrival_count);
804 uint32_t interval_consistency = (intervals_around_50ms * 100) / inter_arrival_count;
805
806 // Calculate how much audio is in each packet
807 // Total decoded samples / number of packets = average samples per packet
808 // At 48kHz, 960 samples = 1 Opus frame = 20ms
809 double avg_samples_per_packet =
810 g_received_stats.total_samples > 0 ? (double)g_received_stats.total_samples / inter_arrival_count : 0;
811 double frames_per_packet = avg_samples_per_packet / 960.0; // 960 samples = 1 frame @ 48kHz
812 double ms_audio_per_packet = frames_per_packet * 20.0; // 20ms per frame
813
814 // Detect if stuttering is periodic (consistent ~50ms intervals)
815 if (intervals_around_50ms >= (inter_arrival_count * 2 / 3)) {
816 // More than 66% of packets are ~50ms apart - clear periodic stuttering
817 log_plain(" 🔴 PERIODIC STUTTERING DETECTED: Server sends packets every ~%u ms (should be ~20ms)!",
818 avg_interval_ms);
819 log_plain(" - Packet inter-arrival: %u-%u ms (avg: %u ms)", min_interval_ms, max_interval_ms, avg_interval_ms);
820 log_plain(" - %u/%u packets (~%u%%) are ~50ms apart (CLEAR STUTTERING PATTERN)", intervals_around_50ms,
821 inter_arrival_count, interval_consistency);
822
823 log_plain(" - PACKET ANALYSIS:");
824 log_plain(" - Total audio samples: %llu over %u packets", (unsigned long long)g_received_stats.total_samples,
825 inter_arrival_count);
826 log_plain(" - Avg samples per packet: %.0f (= %.2f Opus frames = %.1f ms)", avg_samples_per_packet,
827 frames_per_packet, ms_audio_per_packet);
828
829 if (frames_per_packet < 1.5) {
830 log_plain(" - ❌ PROBLEM: Each packet contains < 1.5 frames (should be 2-3 frames!)");
831 log_plain(" - With only %.1f frames per packet arriving every %u ms, there are gaps between chunks",
832 frames_per_packet, avg_interval_ms);
833 log_plain(" - Audio plays for ~%.0f ms, then %u ms gap, then plays again", ms_audio_per_packet,
834 avg_interval_ms - (uint32_t)ms_audio_per_packet);
835 } else if (frames_per_packet > 2.5) {
836 log_plain(" - ✓ Packets contain %.1f frames (~%.0f ms audio each)", frames_per_packet,
837 ms_audio_per_packet);
838 log_plain(" - Should play smoothly if jitter buffer is large enough");
839 log_plain(" - If still stuttering, issue is jitter buffer depth or timing precision");
840 } else {
841 log_plain(" - Packets contain %.1f frames (~%.0f ms)", frames_per_packet, ms_audio_per_packet);
842 log_plain(" - Borderline: buffer needs to hold %.0f ms to bridge %.u ms gap", ms_audio_per_packet,
843 avg_interval_ms - (uint32_t)ms_audio_per_packet);
844 }
845 } else if (avg_interval_ms > 30) {
846 log_plain(" ⚠️ AUDIO DELIVERY INCONSISTENCY: Server packets arrive every ~%u ms (expected ~20ms)",
847 avg_interval_ms);
848 log_plain(" - Interval range: %u-%u ms", min_interval_ms, max_interval_ms);
849 log_plain(" - This causes dropouts and buffering issues");
850 }
851 }
852
853 // Packet delivery gaps
854 if (g_received_stats.max_gap_ns > 40) {
855 log_plain(" ⚠️ DISTORTION DETECTED: Packet delivery gaps too large!");
856 log_plain(" - Max gap: %u ms (should be ~20ms for smooth audio)", g_received_stats.max_gap_ns);
857 if (g_received_stats.max_gap_ns > 80) {
858 log_plain(" - SEVERE: Gaps > 80ms cause severe distortion and dropouts");
859 } else if (g_received_stats.max_gap_ns > 50) {
860 log_plain(" - Gaps > 50ms cause noticeable distortion");
861 }
862 }
863 if (g_received_stats.discontinuity_count > 0) {
864 log_plain(" Packet delivery discontinuities: %llu gaps > 100ms detected",
865 (unsigned long long)g_received_stats.discontinuity_count);
866 }
867 if (g_received_stats.jitter_count > (g_received_stats.total_samples / 100)) {
868 log_plain(" High jitter detected: > 1%% of samples have rapid amplitude changes");
869 log_plain(" - May indicate buffer underruns from sparse packet delivery");
870 }
871
872 log_plain("================================================================================");
873}
#define MAX_PACKET_SAMPLES
Definition analysis.c:76
uint64_t time_elapsed_ns(uint64_t start_ns, uint64_t end_ns)
Definition util/time.c:90

References ECHO_DELAY_COUNT, MAX_PACKET_SAMPLES, time_elapsed_ns(), and time_get_ns().

◆ audio_analysis_set_aec3_metrics()

void audio_analysis_set_aec3_metrics ( double  echo_return_loss,
double  echo_return_loss_enhancement,
uint64_t  delay_ns 
)

Definition at line 504 of file analysis.c.

504 {
505 // Store AEC3 metrics for reporting
506 // These come from WebRTC EchoControl::GetMetrics() call
507 g_aec3_echo_return_loss = echo_return_loss;
508 g_aec3_echo_return_loss_enhancement = echo_return_loss_enhancement;
509 g_aec3_delay_ns = delay_ns;
510 g_aec3_metrics_available = true;
511}

Referenced by client_audio_pipeline_process_duplex().

◆ audio_analysis_track_received_packet()

void audio_analysis_track_received_packet ( size_t  size)

Definition at line 463 of file analysis.c.

463 {
464 (void)size; // Unused parameter - reserved for future per-packet analysis
465 if (!g_analysis_enabled)
466 return;
467
468 uint64_t now_ns = time_get_ns();
469 int64_t now_us = (int64_t)time_ns_to_us(now_ns);
470
471 // Track packet timing for stuttering detection
472 if (g_received_packet_times_count < MAX_PACKET_SAMPLES) {
473 g_received_packet_times_ns[g_received_packet_times_count++] = now_ns;
474 }
475
476 // Detect gaps between consecutive packets (discontinuity)
477 if (g_received_stats.packets_count > 0) {
478 int64_t gap_us = now_us - g_received_last_packet_time_us;
479 int32_t gap_ms = (int32_t)(gap_us / 1000);
480
481 // Expected: ~20ms per Opus frame, flag if gap > 100ms
482 if (gap_ms > 100) {
483 g_received_stats.discontinuity_count++;
484 }
485
486 // Track max gap
487 if (gap_ms > (int32_t)g_received_stats.max_gap_ns) {
488 g_received_stats.max_gap_ns = (uint32_t)gap_ms;
489 }
490 }
491
492 g_received_last_packet_time_us = now_us;
493 g_received_stats.packets_count++;
494}

References MAX_PACKET_SAMPLES, and time_get_ns().

◆ audio_analysis_track_received_sample()

void audio_analysis_track_received_sample ( float  sample)

Definition at line 272 of file analysis.c.

272 {
273 if (!g_analysis_enabled)
274 return;
275
276 g_received_stats.total_samples++;
277
278 // Track peak level
279 float abs_sample = fabsf(sample);
280 if (abs_sample > g_received_stats.peak_level) {
281 g_received_stats.peak_level = abs_sample;
282 }
283
284 // Track clipping (samples > 1.0) - indicates distortion
285 if (abs_sample > 1.0f) {
286 g_received_stats.clipping_count++;
287 g_received_clipping_samples++;
288 }
289
290 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops/artifacts
291 float amp_change = fabsf(sample - g_received_last_sample);
292 if (amp_change > 0.3f) {
293 g_received_sharp_transitions++;
294 }
295 g_received_transition_samples++;
296
297 // Accumulate for mean calculation
298 g_received_mean += sample;
299
300 // Detect zero crossings (waveform crossing zero) - indicates spectral content
301 // Use file-scope static variable for prev sample tracking
302 // (This function is called from the protocol reception thread, separate from the
303 // audio capture thread, so using distinct static variables is safe)
304 static float s_received_prev_sample_for_zero_crossing = 0.0f;
305 if ((s_received_prev_sample_for_zero_crossing > 0 && sample < 0) ||
306 (s_received_prev_sample_for_zero_crossing < 0 && sample > 0)) {
307 g_received_zero_crossings++;
308 }
309 s_received_prev_sample_for_zero_crossing = sample;
310
311 // Track silence and low-energy audio
312 if (abs_sample < 0.001f) {
313 g_received_stats.silent_samples++;
314 g_received_silence_burst++;
315 g_received_below_noise_floor++;
316
317 // Track when silence started
318 if (g_received_silence_burst == 1) {
319 g_received_silence_start_sample = g_received_stats.total_samples;
320 }
321 } else {
322 // Silence ended - track gap interval and max burst length
323 if (g_received_silence_burst > 0) {
324 // Calculate time gap between end of last silence and start of this one
325 if (g_received_last_silence_end_sample > 0) {
326 uint64_t samples_between = g_received_silence_start_sample - g_received_last_silence_end_sample;
327 uint32_t ms_between = (uint32_t)(samples_between * 1000 / 48000); // Convert samples to ms at 48kHz
328
329 // Track the gap interval if we have room
330 if (g_received_gap_count < MAX_GAP_SAMPLES) {
331 g_received_gap_intervals_ms[g_received_gap_count++] = ms_between;
332 }
333 }
334
335 g_received_last_silence_end_sample = g_received_stats.total_samples;
336
337 // Track max burst length
338 if (g_received_silence_burst > g_received_max_silence_burst) {
339 g_received_max_silence_burst = g_received_silence_burst;
340 }
341 }
342 g_received_silence_burst = 0;
343 }
344
345 // Track very quiet audio (< 0.05 amplitude) which contributes to muddy/quiet perception
346 if (abs_sample < 0.05f) {
347 g_received_low_energy_samples++;
348 }
349
350 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
351 float delta = fabsf(sample - g_received_last_sample);
352 if (delta > 0.5f) {
353 g_received_stats.jitter_count++;
354 }
355 g_received_last_sample = sample;
356
357 // Accumulate for RMS calculation
358 g_received_rms_accumulator += sample * sample;
359 g_received_rms_sample_count++;
360
361 // Echo detection: check if received sample matches sent sample from N ms ago
362 // This detects if echo cancellation is working (it shouldn't find matches)
363 if (g_echo_correlation_sample_count < 500000) { // Limit to first ~10 seconds
364 for (int delay_idx = 0; delay_idx < ECHO_DELAY_COUNT; delay_idx++) {
365 // Calculate sample delay: delay_ms * (sample_rate / 1000)
366 uint32_t delay_samples = (g_echo_delays_ms[delay_idx] * 48000) / 1000;
367
368 // Get sent sample from that delay ago (from circular buffer)
369 uint64_t sent_pos;
370 if (g_echo_buffer_pos >= delay_samples) {
371 sent_pos = g_echo_buffer_pos - delay_samples;
372 } else {
373 sent_pos = (g_echo_buffer_pos + ECHO_BUFFER_SIZE) - delay_samples;
374 }
375
376 float sent_sample = g_echo_buffer[sent_pos];
377
378 // Check if samples match (correlation threshold = 0.1)
379 float diff = fabsf(sample - sent_sample);
380 if (diff < 0.1f && fabsf(sent_sample) > 0.01f) { // Only count if sent is not silence
381 g_echo_match_count[delay_idx]++;
382 g_echo_correlation_strength[delay_idx] += (0.1f - diff); // Accumulate strength
383 }
384 }
385 g_echo_correlation_sample_count++;
386 }
387
388 // Beep/tone artifact detection
389 // Store sample in sliding window for frequency analysis
390 g_received_beep_window[g_received_beep_window_idx] = sample;
391 g_received_beep_window_idx = (g_received_beep_window_idx + 1) % BEEP_WINDOW_SIZE;
392
393 // Analyze window every 10ms (480 samples at 48kHz)
394 if (g_received_beep_window_idx == 0 && g_received_stats.total_samples > BEEP_WINDOW_SIZE) {
395 // Calculate zero-crossing rate in this window
396 int zero_crossings = 0;
397 float min_amp = 1.0f, max_amp = 0.0f;
398 float sum_amp = 0.0f;
399 float prev = g_received_beep_window[0];
400
401 for (int i = 1; i < BEEP_WINDOW_SIZE; i++) {
402 float curr = g_received_beep_window[i];
403 float abs_curr = fabsf(curr);
404
405 // Track amplitude range
406 if (abs_curr > max_amp)
407 max_amp = abs_curr;
408 if (abs_curr < min_amp)
409 min_amp = abs_curr;
410 sum_amp += abs_curr;
411
412 // Count zero crossings
413 if ((prev > 0 && curr < 0) || (prev < 0 && curr > 0)) {
414 zero_crossings++;
415 }
416 prev = curr;
417 }
418
419 float avg_amp = sum_amp / BEEP_WINDOW_SIZE;
420 float amp_range = max_amp - min_amp;
421
422 // A beep/tone has:
423 // 1. High zero-crossing rate (>20 per 10ms = >2000Hz equivalent, or 5-20 = 500-2000Hz)
424 // 2. Consistent amplitude (range/avg < 0.5 means sine-wave like)
425 // 3. Non-trivial amplitude (avg > 0.02)
426 bool is_tonal = (zero_crossings >= 5 && zero_crossings <= 100) && // 500Hz-10kHz range
427 (avg_amp > 0.02f) && // Not silence
428 (amp_range < avg_amp * 1.5f); // Relatively consistent amplitude
429
430 if (is_tonal) {
431 g_received_tonal_samples += BEEP_WINDOW_SIZE;
432
433 if (!g_in_beep_burst) {
434 // Starting a new beep burst
435 g_in_beep_burst = true;
436 g_beep_burst_samples = BEEP_WINDOW_SIZE;
437 } else {
438 g_beep_burst_samples += BEEP_WINDOW_SIZE;
439 }
440 } else {
441 if (g_in_beep_burst) {
442 // Beep burst ended
443 // Only count as beep event if it was short (< 500ms = 24000 samples)
444 // Long tonal sounds are likely music, not artifacts
445 if (g_beep_burst_samples > 0 && g_beep_burst_samples < 24000) {
446 g_received_beep_events++;
447 g_received_stats.beep_events = g_received_beep_events;
448 }
449 g_in_beep_burst = false;
450 g_beep_burst_samples = 0;
451 }
452 }
453
454 g_received_stats.tonal_samples = g_received_tonal_samples;
455 }
456
457 // Write to WAV file if enabled
458 if (g_received_wav) {
459 wav_writer_write(g_received_wav, &sample, 1);
460 }
461}
#define MAX_GAP_SAMPLES
Definition analysis.c:69
#define ECHO_BUFFER_SIZE
Definition analysis.c:83
#define BEEP_WINDOW_SIZE
Definition analysis.c:106
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Definition wav_writer.c:95

References BEEP_WINDOW_SIZE, ECHO_BUFFER_SIZE, ECHO_DELAY_COUNT, MAX_GAP_SAMPLES, and wav_writer_write().

Referenced by audio_process_received_samples().

◆ audio_analysis_track_sent_packet()

void audio_analysis_track_sent_packet ( size_t  size)

Definition at line 245 of file analysis.c.

245 {
246 (void)size; // Unused parameter - reserved for future per-packet analysis
247 if (!g_analysis_enabled)
248 return;
249
250 int64_t now_us = (int64_t)time_ns_to_us(time_get_ns());
251
252 // Detect gaps between consecutive packets (discontinuity)
253 if (g_sent_stats.packets_count > 0) {
254 int64_t gap_us = now_us - g_sent_last_packet_time_us;
255 int32_t gap_ms = (int32_t)(gap_us / 1000);
256
257 // Expected: ~20ms per Opus frame, flag if gap > 100ms
258 if (gap_ms > 100) {
259 g_sent_stats.discontinuity_count++;
260 }
261
262 // Track max gap
263 if (gap_ms > (int32_t)g_sent_stats.max_gap_ns) {
264 g_sent_stats.max_gap_ns = (uint32_t)gap_ms;
265 }
266 }
267
268 g_sent_last_packet_time_us = now_us;
269 g_sent_stats.packets_count++;
270}

References time_get_ns().

◆ audio_analysis_track_sent_sample()

void audio_analysis_track_sent_sample ( float  sample)

Definition at line 173 of file analysis.c.

173 {
174 if (!g_analysis_enabled)
175 return;
176
177 g_sent_stats.total_samples++;
178
179 // Track peak level
180 float abs_sample = fabsf(sample);
181 if (abs_sample > g_sent_stats.peak_level) {
182 g_sent_stats.peak_level = abs_sample;
183 }
184
185 // Track clipping (samples > 1.0) - indicates distortion
186 if (abs_sample > 1.0f) {
187 g_sent_stats.clipping_count++;
188 g_sent_clipping_samples++;
189 }
190
191 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops
192 float amp_change = fabsf(sample - g_sent_last_sample);
193 if (amp_change > 0.3f) {
194 g_sent_sharp_transitions++;
195 }
196 g_sent_transition_samples++;
197
198 // Accumulate for mean calculation
199 g_sent_mean += sample;
200
201 // Detect zero crossings (waveform crossing zero) - indicates spectral content
202 // Use file-scope static variable for prev sample tracking
203 // (This function is only called from the audio capture thread, but using file-scope
204 // static is clearer and avoids shadowing the existing g_sent_last_sample variable)
205 static float s_sent_prev_sample_for_zero_crossing = 0.0f;
206 if ((s_sent_prev_sample_for_zero_crossing > 0 && sample < 0) ||
207 (s_sent_prev_sample_for_zero_crossing < 0 && sample > 0)) {
208 g_sent_zero_crossings++;
209 }
210 s_sent_prev_sample_for_zero_crossing = sample;
211
212 // Track silence (very low level)
213 if (abs_sample < 0.001f) {
214 g_sent_stats.silent_samples++;
215 g_sent_silence_burst++;
216 } else {
217 // Silence ended - track max burst length
218 if (g_sent_silence_burst > g_sent_max_silence_burst) {
219 g_sent_max_silence_burst = g_sent_silence_burst;
220 }
221 g_sent_silence_burst = 0;
222 }
223
224 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
225 float delta = fabsf(sample - g_sent_last_sample);
226 if (delta > 0.5f) {
227 g_sent_stats.jitter_count++;
228 }
229 g_sent_last_sample = sample;
230
231 // Accumulate for RMS calculation
232 g_sent_rms_accumulator += sample * sample;
233 g_sent_rms_sample_count++;
234
235 // Write to WAV file if enabled
236 if (g_sent_wav) {
237 wav_writer_write(g_sent_wav, &sample, 1);
238 }
239
240 // Store in echo detection buffer (circular)
241 g_echo_buffer[g_echo_buffer_pos] = sample;
242 g_echo_buffer_pos = (g_echo_buffer_pos + 1) % ECHO_BUFFER_SIZE;
243}

References ECHO_BUFFER_SIZE, and wav_writer_write().