ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
analysis.c File Reference

Audio Analysis Implementation. More...

Go to the source code of this file.

Macros

#define FRAME_SIZE   960
 
#define MAX_GAP_SAMPLES   100
 
#define MAX_PACKET_SAMPLES   200
 
#define ECHO_BUFFER_SIZE   48000
 
#define ECHO_DELAY_COUNT   5
 
#define BEEP_WINDOW_SIZE   480
 

Functions

int audio_analysis_init (void)
 Initialize audio analysis.
 
void audio_analysis_track_sent_sample (float sample)
 Track sent audio sample.
 
void audio_analysis_track_sent_packet (size_t size)
 Track sent packet.
 
void audio_analysis_track_received_sample (float sample)
 Track received audio sample.
 
void audio_analysis_track_received_packet (size_t size)
 Track received packet.
 
const audio_analysis_stats_taudio_analysis_get_sent_stats (void)
 Get sent audio statistics.
 
const audio_analysis_stats_taudio_analysis_get_received_stats (void)
 Get received audio statistics.
 
void audio_analysis_set_aec3_metrics (double echo_return_loss, double echo_return_loss_enhancement, int delay_ms)
 Set AEC3 echo cancellation metrics.
 
void audio_analysis_print_report (void)
 Print audio analysis report.
 
void audio_analysis_cleanup (void)
 Cleanup audio analysis.
 

Detailed Description

Audio Analysis Implementation.

Definition in file analysis.c.

Macro Definition Documentation

◆ BEEP_WINDOW_SIZE

#define BEEP_WINDOW_SIZE   480

Definition at line 105 of file analysis.c.

◆ ECHO_BUFFER_SIZE

#define ECHO_BUFFER_SIZE   48000

Definition at line 82 of file analysis.c.

◆ ECHO_DELAY_COUNT

#define ECHO_DELAY_COUNT   5

Definition at line 88 of file analysis.c.

◆ FRAME_SIZE

#define FRAME_SIZE   960

Definition at line 61 of file analysis.c.

◆ MAX_GAP_SAMPLES

#define MAX_GAP_SAMPLES   100

Definition at line 68 of file analysis.c.

◆ MAX_PACKET_SAMPLES

#define MAX_PACKET_SAMPLES   200

Definition at line 75 of file analysis.c.

Function Documentation

◆ audio_analysis_cleanup()

void audio_analysis_cleanup ( void  )

Cleanup audio analysis.

Definition at line 882 of file analysis.c.

882 {
883 g_analysis_enabled = false;
884
885 // Close WAV files if they were open
886 if (g_sent_wav) {
887 wav_writer_close(g_sent_wav);
888 g_sent_wav = NULL;
889 log_info("Closed sent audio WAV file");
890 }
891 if (g_received_wav) {
892 wav_writer_close(g_received_wav);
893 g_received_wav = NULL;
894 log_info("Closed received audio WAV file");
895 }
896}
#define log_info(...)
Log an INFO message.
void wav_writer_close(wav_writer_t *writer)
Close WAV file and finalize header.
Definition wav_writer.c:99

References log_info, and wav_writer_close().

◆ audio_analysis_get_received_stats()

const audio_analysis_stats_t * audio_analysis_get_received_stats ( void  )

Get received audio statistics.

Returns
Pointer to analysis stats (do not free)

Definition at line 504 of file analysis.c.

504 {
505 return &g_received_stats;
506}

◆ audio_analysis_get_sent_stats()

const audio_analysis_stats_t * audio_analysis_get_sent_stats ( void  )

Get sent audio statistics.

Returns
Pointer to analysis stats (do not free)

Definition at line 500 of file analysis.c.

500 {
501 return &g_sent_stats;
502}

◆ audio_analysis_init()

int audio_analysis_init ( void  )

Initialize audio analysis.

Returns
0 on success, negative on error

Definition at line 113 of file analysis.c.

113 {
114 SAFE_MEMSET(&g_sent_stats, sizeof(g_sent_stats), 0, sizeof(g_sent_stats));
115 SAFE_MEMSET(&g_received_stats, sizeof(g_received_stats), 0, sizeof(g_received_stats));
116
117 // Reset stuttering/gap tracking
118 SAFE_MEMSET(g_received_gap_intervals_ms, sizeof(g_received_gap_intervals_ms), 0, sizeof(g_received_gap_intervals_ms));
119 g_received_gap_count = 0;
120 g_received_silence_start_sample = 0;
121 g_received_last_silence_end_sample = 0;
122 SAFE_MEMSET(g_received_packet_times, sizeof(g_received_packet_times), 0, sizeof(g_received_packet_times));
123 g_received_packet_times_count = 0;
124 SAFE_MEMSET(g_received_packet_sizes, sizeof(g_received_packet_sizes), 0, sizeof(g_received_packet_sizes));
125 g_received_total_audio_samples = 0;
126
127 // Reset echo detection
128 SAFE_MEMSET(g_echo_buffer, sizeof(g_echo_buffer), 0, sizeof(g_echo_buffer));
129 g_echo_buffer_pos = 0;
130 g_echo_correlation_sample_count = 0;
131 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
132 g_echo_correlation_strength[i] = 0;
133 g_echo_match_count[i] = 0;
134 }
135 g_detected_echo_delay_ms = 0;
136
137 // Reset beep detection
138 SAFE_MEMSET(g_received_beep_window, sizeof(g_received_beep_window), 0, sizeof(g_received_beep_window));
139 g_received_beep_window_idx = 0;
140 g_received_beep_events = 0;
141 g_received_tonal_samples = 0;
142 g_in_beep_burst = false;
143 g_beep_burst_samples = 0;
144
145 struct timespec ts;
146 clock_gettime(CLOCK_MONOTONIC, &ts);
147 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
148
149 g_sent_stats.timestamp_start_us = now_us;
150 g_received_stats.timestamp_start_us = now_us;
151
152 g_sent_last_sample = 0.0f;
153 g_received_last_sample = 0.0f;
154 g_sent_last_packet_time_us = now_us;
155 g_received_last_packet_time_us = now_us;
156
157 // Initialize WAV file dumping if enabled
158 if (wav_dump_enabled()) {
159 g_sent_wav = wav_writer_open("/tmp/sent_audio.wav", 48000, 1);
160 g_received_wav = wav_writer_open("/tmp/received_audio.wav", 48000, 1);
161 if (g_sent_wav) {
162 log_info("Dumping sent audio to /tmp/sent_audio.wav");
163 }
164 if (g_received_wav) {
165 log_info("Dumping received audio to /tmp/received_audio.wav");
166 }
167 }
168
169 g_analysis_enabled = true;
170 log_info("Audio analysis enabled");
171 return 0;
172}
#define ECHO_DELAY_COUNT
Definition analysis.c:88
#define SAFE_MEMSET(dest, dest_size, ch, count)
Definition common.h:389
int64_t timestamp_start_us
Definition analysis.h:35
bool wav_dump_enabled(void)
Check if audio dumping is enabled via environment.
Definition wav_writer.c:123
wav_writer_t * wav_writer_open(const char *filepath, int sample_rate, int channels)
Open WAV file for writing.
Definition wav_writer.c:39

References ECHO_DELAY_COUNT, log_info, SAFE_MEMSET, audio_analysis_stats_t::timestamp_start_us, wav_dump_enabled(), and wav_writer_open().

◆ audio_analysis_print_report()

void audio_analysis_print_report ( void  )

Print audio analysis report.

Definition at line 517 of file analysis.c.

517 {
518 if (!g_analysis_enabled) {
519 return;
520 }
521
522 struct timespec ts;
523 clock_gettime(CLOCK_MONOTONIC, &ts);
524 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
525
526 g_sent_stats.timestamp_end_us = now_us;
527 g_received_stats.timestamp_end_us = now_us;
528
529 int64_t sent_duration_ms = (g_sent_stats.timestamp_end_us - g_sent_stats.timestamp_start_us) / 1000;
530 int64_t recv_duration_ms = (g_received_stats.timestamp_end_us - g_received_stats.timestamp_start_us) / 1000;
531
532 // Calculate RMS levels
533 float sent_rms = 0.0f;
534 float recv_rms = 0.0f;
535 if (g_sent_rms_sample_count > 0) {
536 sent_rms = sqrtf(g_sent_rms_accumulator / g_sent_rms_sample_count);
537 }
538 if (g_received_rms_sample_count > 0) {
539 recv_rms = sqrtf(g_received_rms_accumulator / g_received_rms_sample_count);
540 }
541
542 log_plain("================================================================================");
543 log_plain(" AUDIO ANALYSIS REPORT ");
544 log_plain("================================================================================");
545 log_plain("SENT AUDIO (Microphone Capture):");
546 log_plain(" Duration: %lld ms", (long long)sent_duration_ms);
547 log_plain(" Total Samples: %llu", (unsigned long long)g_sent_stats.total_samples);
548 log_plain(" Peak Level: %.4f (should be < 1.0)", g_sent_stats.peak_level);
549 log_plain(" RMS Level: %.4f (audio energy/loudness)", sent_rms);
550 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.clipping_count,
551 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.clipping_count / g_sent_stats.total_samples) : 0);
552 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.silent_samples,
553 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.silent_samples / g_sent_stats.total_samples) : 0);
554 if (g_sent_max_silence_burst > 0) {
555 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_sent_max_silence_burst);
556 }
557 log_plain(" Packets Sent: %u", g_sent_stats.packets_count);
558 log_plain(" Status: %s", g_sent_stats.clipping_count > 0 ? "CLIPPING DETECTED!" : "OK");
559
560 log_plain("RECEIVED AUDIO (Playback):");
561 log_plain(" Duration: %lld ms", (long long)recv_duration_ms);
562 log_plain(" Total Samples: %llu", (unsigned long long)g_received_stats.total_samples);
563 log_plain(" Peak Level: %.4f", g_received_stats.peak_level);
564 log_plain(" RMS Level: %.4f (audio energy/loudness)", recv_rms);
565 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.clipping_count,
566 g_received_stats.total_samples > 0
567 ? (100.0 * g_received_stats.clipping_count / g_received_stats.total_samples)
568 : 0);
569 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.silent_samples,
570 g_received_stats.total_samples > 0
571 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
572 : 0);
573 if (g_received_max_silence_burst > 0) {
574 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
575 }
576 double low_energy_pct =
577 g_received_stats.total_samples > 0 ? (100.0 * g_received_low_energy_samples / g_received_stats.total_samples) : 0;
578 log_plain(" Very Quiet Samples: %llu samples (%.1f%%) [amplitude < 0.05]",
579 (unsigned long long)g_received_low_energy_samples, low_energy_pct);
580 log_plain(" Packets Received: %u", g_received_stats.packets_count);
581 log_plain(" Status: %s", g_received_stats.total_samples == 0 ? "NO AUDIO RECEIVED!" : "Receiving");
582
583 log_plain("QUALITY METRICS (Scratchy/Distorted Audio Detection):");
584 log_plain("SENT:");
585 log_plain(" Jitter Events: %llu (rapid amplitude changes)", (unsigned long long)g_sent_stats.jitter_count);
586 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
587 (unsigned long long)g_sent_stats.discontinuity_count);
588 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_sent_stats.max_gap_ms);
589
590 log_plain("RECEIVED:");
591 log_plain(" Jitter Events: %llu (rapid amplitude changes)",
592 (unsigned long long)g_received_stats.jitter_count);
593 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
594 (unsigned long long)g_received_stats.discontinuity_count);
595 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_received_stats.max_gap_ms);
596
597 // Beep/tone artifact detection
598 if (g_received_beep_events > 0 || g_received_tonal_samples > 0) {
599 double tonal_pct =
600 g_received_stats.total_samples > 0 ? (100.0 * g_received_tonal_samples / g_received_stats.total_samples) : 0;
601 log_plain("BEEP/TONE ARTIFACTS:");
602 log_plain(" Beep Events: %llu (short tonal bursts < 500ms)",
603 (unsigned long long)g_received_beep_events);
604 log_plain(" Tonal Samples: %llu samples (%.1f%%) [consistent frequency content]",
605 (unsigned long long)g_received_tonal_samples, tonal_pct);
606
607 if (g_received_beep_events > 10) {
608 log_plain(" 🔴 BEEPING DETECTED: %llu short tonal bursts - likely codec artifacts or system sounds!",
609 (unsigned long long)g_received_beep_events);
610 log_plain(" Possible causes:");
611 log_plain(" - Opus codec producing tonal artifacts during silence/transitions");
612 log_plain(" - Buffer underruns creating synthetic tones");
613 log_plain(" - AEC3 suppressor resonance");
614 log_plain(" - System notification sounds bleeding through");
615 } else if (g_received_beep_events > 3) {
616 log_plain(" ⚠️ Some beep artifacts detected (%llu events)", (unsigned long long)g_received_beep_events);
617 }
618 }
619
620 log_plain("DIAGNOSTICS:");
621 if (g_sent_stats.peak_level == 0) {
622 log_plain(" No audio captured from microphone!");
623 }
624 if (g_received_stats.total_samples == 0) {
625 log_plain(" No audio received from server!");
626 } else if (g_received_stats.peak_level < 0.01f) {
627 log_plain(" ⚠️ Received audio is very quiet (peak < 0.01)");
628 }
629 if (g_sent_stats.clipping_count > 0) {
630 log_plain(" Microphone input is clipping - reduce microphone volume");
631 }
632
633 // Echo detection diagnostics
634 log_plain("ECHO DETECTION (Echo Cancellation Quality Check):");
635 if (g_echo_correlation_sample_count > 0 && g_sent_stats.total_samples > 0) {
636 uint64_t max_matches = 0;
637 int best_delay_idx = -1;
638
639 // Find which delay has the most matches (if any)
640 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
641 if (g_echo_match_count[i] > max_matches) {
642 max_matches = g_echo_match_count[i];
643 best_delay_idx = i;
644 }
645 }
646
647 double echo_threshold_pct = 5.0; // If > 5% of samples match at a delay, it's echo
648
649 if (best_delay_idx >= 0) {
650 double match_pct = (100.0 * g_echo_match_count[best_delay_idx]) / g_echo_correlation_sample_count;
651 log_plain(" Echo correlation at different delays:");
652 for (int i = 0; i < ECHO_DELAY_COUNT; i++) {
653 double pct = (100.0 * g_echo_match_count[i]) / g_echo_correlation_sample_count;
654 const char *status = pct > echo_threshold_pct ? "⚠️ ECHO DETECTED" : "✓ OK";
655 log_plain(" %3u ms delay: %.1f%% match rate %s", g_echo_delays_ms[i], pct, status);
656 }
657
658 if (match_pct > echo_threshold_pct) {
659 g_detected_echo_delay_ms = g_echo_delays_ms[best_delay_idx];
660 log_plain(" 🔴 ECHO CANCELLATION NOT WORKING: Strong echo at %u ms delay!", g_detected_echo_delay_ms);
661 log_plain(" Received audio contains %.1f%% samples matching sent audio from %u ms ago", match_pct,
662 g_detected_echo_delay_ms);
663 } else {
664 log_plain(" ✓ Echo cancellation working: No significant echo detected");
665 }
666 }
667 } else {
668 log_plain(" Insufficient data for echo detection (need both sent and received audio)");
669 }
670
671 // AEC3 metrics from WebRTC (if available)
672 if (g_aec3_metrics_available) {
673 log_plain("AEC3 METRICS (from WebRTC GetMetrics()):");
674 log_plain(" Echo Return Loss (ERL): %.2f dB (how much echo is attenuated; >10 dB is good)",
675 g_aec3_echo_return_loss);
676 log_plain(" Echo Return Loss Enhancement (ERLE): %.2f dB (residual echo suppression)",
677 g_aec3_echo_return_loss_enhancement);
678 log_plain(" Estimated Echo Delay: %d ms", g_aec3_delay_ms);
679
680 if (g_aec3_echo_return_loss > 10.0) {
681 log_plain(" ✓ Good echo attenuation (ERL > 10 dB)");
682 } else if (g_aec3_echo_return_loss > 3.0) {
683 log_plain(" ⚠️ Moderate echo attenuation (3-10 dB)");
684 } else {
685 log_plain(" 🔴 Poor echo attenuation (ERL < 3 dB)");
686 }
687 }
688
689 // Audio quality diagnostics
690 if (recv_rms < 0.005f) {
691 log_plain(" ⚠️ CRITICAL: Received audio RMS is extremely low (%.6f) - barely audible!", recv_rms);
692 } else if (recv_rms < 0.02f) {
693 log_plain(" ⚠️ WARNING: Received audio RMS is low (%.6f) - may sound quiet or muddy", recv_rms);
694 }
695
696 // Silence analysis
697 double received_silence_pct = g_received_stats.total_samples > 0
698 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
699 : 0;
700
701 if (received_silence_pct > 30.0) {
702 log_plain(" ⚠️ SCRATCHY AUDIO DETECTED: Too much silence in received audio!");
703 log_plain(" - Silence: %.1f%% of received samples (should be < 10%%)", received_silence_pct);
704 log_plain(" - Max silence burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
705 log_plain(" - This creates jittery/choppy playback between audio bursts");
706 } else if (received_silence_pct > 15.0) {
707 log_plain(" ⚠️ WARNING: Moderate silence detected (%.1f%%)", received_silence_pct);
708 }
709
710 // Sharp transition analysis (clicks/pops)
711 double sent_sharp_pct =
712 g_sent_transition_samples > 0 ? (100.0 * g_sent_sharp_transitions / g_sent_transition_samples) : 0;
713 double recv_sharp_pct =
714 g_received_transition_samples > 0 ? (100.0 * g_received_sharp_transitions / g_received_transition_samples) : 0;
715
716 // Zero crossing rate analysis (spectral content)
717 // Music: 1-5%, Speech: 5-15%, Static/Noise: 15-50%
718 double sent_zero_cross_pct =
719 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_zero_crossings / g_sent_stats.total_samples) : 0;
720 double recv_zero_cross_pct =
721 g_received_stats.total_samples > 0 ? (100.0 * g_received_zero_crossings / g_received_stats.total_samples) : 0;
722
723 log_plain("WAVEFORM ANALYSIS (Is it clean music or corrupted/static?):");
724 log_plain("SENT AUDIO:");
725 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", sent_zero_cross_pct);
726 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", sent_sharp_pct);
727 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_sent_clipping_samples,
728 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_clipping_samples / g_sent_stats.total_samples) : 0);
729
730 log_plain("RECEIVED AUDIO:");
731 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", recv_zero_cross_pct);
732 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", recv_sharp_pct);
733 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_received_clipping_samples,
734 g_received_stats.total_samples > 0 ? (100.0 * g_received_clipping_samples / g_received_stats.total_samples)
735 : 0);
736 log_plain(" Zero crossing increase: %.2f%% higher than sent (indicates corruption)",
737 recv_zero_cross_pct - sent_zero_cross_pct);
738
739 // Musicality verdict
740 log_plain("SOUND QUALITY VERDICT:");
741 if (recv_zero_cross_pct > 10.0) {
742 log_plain(" ⚠️ SOUNDS LIKE STATIC/DISTORTED: Excessive zero crossings (%.2f%%) = high frequency noise",
743 recv_zero_cross_pct);
744 log_plain(" Increase from sent: %.2f%% (waveform corruption detected)",
745 recv_zero_cross_pct - sent_zero_cross_pct);
746 log_plain(" Likely causes: Opus codec artifacts, jitter buffer issues, or packet delivery gaps");
747 } else if (recv_zero_cross_pct - sent_zero_cross_pct > 3.0) {
748 log_plain(" ⚠️ SOUNDS CORRUPTED: Zero crossing rate increased by %.2f%% (should be ±0.5%%)",
749 recv_zero_cross_pct - sent_zero_cross_pct);
750 log_plain(" Indicates waveform distortion from network/processing artifacts");
751 } else if (recv_sharp_pct > 2.0) {
752 log_plain(" ⚠️ SOUNDS LIKE STATIC: High click/pop rate (%.2f%%) indicates audio artifacts", recv_sharp_pct);
753 log_plain(" Likely causes: Packet loss, jitter buffer issues, or frame discontinuities");
754 } else if (g_received_clipping_samples > (g_received_stats.total_samples / 1000)) {
755 log_plain(" ⚠️ SOUNDS DISTORTED: Significant clipping detected (%.3f%%)",
756 100.0 * g_received_clipping_samples / g_received_stats.total_samples);
757 log_plain(" Likely causes: AGC too aggressive, gain too high, or codec compression artifacts");
758 } else if (low_energy_pct > 50.0 && recv_rms < 0.05f) {
759 log_plain(" ⚠️ SOUNDS MUDDY/QUIET: Over 50%% very quiet samples + low RMS");
760 log_plain(" Audio may sound unclear or like background noise rather than music");
761 } else if (received_silence_pct > 10.0) {
762 log_plain(" ⚠️ SOUNDS SCRATCHY: Excessive silence (%.1f%%) causes dropouts", received_silence_pct);
763 } else if (recv_rms > 0.08f && recv_zero_cross_pct < 6.0 && recv_sharp_pct < 1.0 &&
764 g_received_clipping_samples == 0) {
765 log_plain(" ✓ SOUNDS LIKE MUSIC: Good RMS (%.4f), clean waveform (%.2f%% zero crossings), minimal artifacts",
766 recv_rms, recv_zero_cross_pct);
767 log_plain(" Audio quality acceptable for communication");
768 } else {
769 log_plain(" ? BORDERLINE: Check specific metrics above");
770 }
771
772 // Low energy audio analysis
773 if (low_energy_pct > 50.0) {
774 log_plain(" ⚠️ WARNING: Over 50%% of received samples are very quiet (< 0.05 amplitude)");
775 log_plain(" - This makes audio sound muddy, unclear, or hard to understand");
776 log_plain(" - Caused by: Mixing other clients' audio with your own at wrong levels");
777 }
778
779 // Stuttering/periodic gap detection using packet inter-arrival times
780 if (g_received_packet_times_count >= 5) {
781 uint32_t inter_arrival_times_ms[MAX_PACKET_SAMPLES - 1];
782 uint32_t inter_arrival_count = 0;
783 uint32_t min_interval_ms = 0xFFFFFFFF;
784 uint32_t max_interval_ms = 0;
785 uint64_t sum_intervals_ms = 0;
786 uint32_t intervals_around_50ms = 0; // Count intervals ~40-60ms
787
788 // Calculate inter-packet arrival times
789 for (uint32_t i = 1; i < g_received_packet_times_count; i++) {
790 struct timespec *prev = &g_received_packet_times[i - 1];
791 struct timespec *curr = &g_received_packet_times[i];
792
793 int64_t prev_us = (int64_t)prev->tv_sec * 1000000 + prev->tv_nsec / 1000;
794 int64_t curr_us = (int64_t)curr->tv_sec * 1000000 + curr->tv_nsec / 1000;
795 uint32_t gap_ms = (uint32_t)((curr_us - prev_us) / 1000);
796
797 inter_arrival_times_ms[inter_arrival_count++] = gap_ms;
798 if (gap_ms < min_interval_ms)
799 min_interval_ms = gap_ms;
800 if (gap_ms > max_interval_ms)
801 max_interval_ms = gap_ms;
802 sum_intervals_ms += gap_ms;
803
804 // Check if interval is ~50ms (within 15ms tolerance for network jitter)
805 if (gap_ms >= 35 && gap_ms <= 70) {
806 intervals_around_50ms++;
807 }
808 }
809
810 uint32_t avg_interval_ms = (uint32_t)(sum_intervals_ms / inter_arrival_count);
811 uint32_t interval_consistency = (intervals_around_50ms * 100) / inter_arrival_count;
812
813 // Calculate how much audio is in each packet
814 // Total decoded samples / number of packets = average samples per packet
815 // At 48kHz, 960 samples = 1 Opus frame = 20ms
816 double avg_samples_per_packet =
817 g_received_stats.total_samples > 0 ? (double)g_received_stats.total_samples / inter_arrival_count : 0;
818 double frames_per_packet = avg_samples_per_packet / 960.0; // 960 samples = 1 frame @ 48kHz
819 double ms_audio_per_packet = frames_per_packet * 20.0; // 20ms per frame
820
821 // Detect if stuttering is periodic (consistent ~50ms intervals)
822 if (intervals_around_50ms >= (inter_arrival_count * 2 / 3)) {
823 // More than 66% of packets are ~50ms apart - clear periodic stuttering
824 log_plain(" 🔴 PERIODIC STUTTERING DETECTED: Server sends packets every ~%u ms (should be ~20ms)!",
825 avg_interval_ms);
826 log_plain(" - Packet inter-arrival: %u-%u ms (avg: %u ms)", min_interval_ms, max_interval_ms, avg_interval_ms);
827 log_plain(" - %u/%u packets (~%u%%) are ~50ms apart (CLEAR STUTTERING PATTERN)", intervals_around_50ms,
828 inter_arrival_count, interval_consistency);
829
830 log_plain(" - PACKET ANALYSIS:");
831 log_plain(" - Total audio samples: %llu over %u packets", (unsigned long long)g_received_stats.total_samples,
832 inter_arrival_count);
833 log_plain(" - Avg samples per packet: %.0f (= %.2f Opus frames = %.1f ms)", avg_samples_per_packet,
834 frames_per_packet, ms_audio_per_packet);
835
836 if (frames_per_packet < 1.5) {
837 log_plain(" - ❌ PROBLEM: Each packet contains < 1.5 frames (should be 2-3 frames!)");
838 log_plain(" - With only %.1f frames per packet arriving every %u ms, there are gaps between chunks",
839 frames_per_packet, avg_interval_ms);
840 log_plain(" - Audio plays for ~%.0f ms, then %u ms gap, then plays again", ms_audio_per_packet,
841 avg_interval_ms - (uint32_t)ms_audio_per_packet);
842 } else if (frames_per_packet > 2.5) {
843 log_plain(" - ✓ Packets contain %.1f frames (~%.0f ms audio each)", frames_per_packet,
844 ms_audio_per_packet);
845 log_plain(" - Should play smoothly if jitter buffer is large enough");
846 log_plain(" - If still stuttering, issue is jitter buffer depth or timing precision");
847 } else {
848 log_plain(" - Packets contain %.1f frames (~%.0f ms)", frames_per_packet, ms_audio_per_packet);
849 log_plain(" - Borderline: buffer needs to hold %.0f ms to bridge %.u ms gap", ms_audio_per_packet,
850 avg_interval_ms - (uint32_t)ms_audio_per_packet);
851 }
852 } else if (avg_interval_ms > 30) {
853 log_plain(" ⚠️ AUDIO DELIVERY INCONSISTENCY: Server packets arrive every ~%u ms (expected ~20ms)",
854 avg_interval_ms);
855 log_plain(" - Interval range: %u-%u ms", min_interval_ms, max_interval_ms);
856 log_plain(" - This causes dropouts and buffering issues");
857 }
858 }
859
860 // Packet delivery gaps
861 if (g_received_stats.max_gap_ms > 40) {
862 log_plain(" ⚠️ DISTORTION DETECTED: Packet delivery gaps too large!");
863 log_plain(" - Max gap: %u ms (should be ~20ms for smooth audio)", g_received_stats.max_gap_ms);
864 if (g_received_stats.max_gap_ms > 80) {
865 log_plain(" - SEVERE: Gaps > 80ms cause severe distortion and dropouts");
866 } else if (g_received_stats.max_gap_ms > 50) {
867 log_plain(" - Gaps > 50ms cause noticeable distortion");
868 }
869 }
870 if (g_received_stats.discontinuity_count > 0) {
871 log_plain(" Packet delivery discontinuities: %llu gaps > 100ms detected",
872 (unsigned long long)g_received_stats.discontinuity_count);
873 }
874 if (g_received_stats.jitter_count > (g_received_stats.total_samples / 100)) {
875 log_plain(" High jitter detected: > 1%% of samples have rapid amplitude changes");
876 log_plain(" - May indicate buffer underruns from sparse packet delivery");
877 }
878
879 log_plain("================================================================================");
880}
#define MAX_PACKET_SAMPLES
Definition analysis.c:75
unsigned int uint32_t
Definition common.h:58
unsigned long long uint64_t
Definition common.h:59
#define log_plain(...)
Plain logging - writes to both log file and stderr without timestamps or log levels.
uint64_t silent_samples
Definition analysis.h:31
uint64_t clipping_count
Definition analysis.h:30
uint64_t discontinuity_count
Definition analysis.h:39

References audio_analysis_stats_t::clipping_count, audio_analysis_stats_t::discontinuity_count, ECHO_DELAY_COUNT, audio_analysis_stats_t::jitter_count, log_plain, audio_analysis_stats_t::max_gap_ms, MAX_PACKET_SAMPLES, audio_analysis_stats_t::packets_count, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::timestamp_end_us, audio_analysis_stats_t::timestamp_start_us, and audio_analysis_stats_t::total_samples.

◆ audio_analysis_set_aec3_metrics()

void audio_analysis_set_aec3_metrics ( double  echo_return_loss,
double  echo_return_loss_enhancement,
int  delay_ms 
)

Set AEC3 echo cancellation metrics.

Parameters
echo_return_lossEcho return loss (dB) - how much echo is attenuated
echo_return_loss_enhancementAdditional echo suppression (dB)
delay_msEstimated echo delay in milliseconds

Definition at line 508 of file analysis.c.

508 {
509 // Store AEC3 metrics for reporting
510 // These come from WebRTC EchoControl::GetMetrics() call
511 g_aec3_echo_return_loss = echo_return_loss;
512 g_aec3_echo_return_loss_enhancement = echo_return_loss_enhancement;
513 g_aec3_delay_ms = delay_ms;
514 g_aec3_metrics_available = true;
515}

Referenced by client_audio_pipeline_process_duplex().

◆ audio_analysis_track_received_packet()

void audio_analysis_track_received_packet ( size_t  size)

Track received packet.

Parameters
sizePacket size in bytes

Definition at line 466 of file analysis.c.

466 {
467 (void)size; // Unused parameter - reserved for future per-packet analysis
468 if (!g_analysis_enabled)
469 return;
470
471 struct timespec ts;
472 clock_gettime(CLOCK_MONOTONIC, &ts);
473 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
474
475 // Track packet timing for stuttering detection
476 if (g_received_packet_times_count < MAX_PACKET_SAMPLES) {
477 g_received_packet_times[g_received_packet_times_count++] = ts;
478 }
479
480 // Detect gaps between consecutive packets (discontinuity)
481 if (g_received_stats.packets_count > 0) {
482 int64_t gap_us = now_us - g_received_last_packet_time_us;
483 int32_t gap_ms = (int32_t)(gap_us / 1000);
484
485 // Expected: ~20ms per Opus frame, flag if gap > 100ms
486 if (gap_ms > 100) {
487 g_received_stats.discontinuity_count++;
488 }
489
490 // Track max gap
491 if (gap_ms > (int32_t)g_received_stats.max_gap_ms) {
492 g_received_stats.max_gap_ms = (uint32_t)gap_ms;
493 }
494 }
495
496 g_received_last_packet_time_us = now_us;
497 g_received_stats.packets_count++;
498}

References audio_analysis_stats_t::discontinuity_count, audio_analysis_stats_t::max_gap_ms, MAX_PACKET_SAMPLES, and audio_analysis_stats_t::packets_count.

◆ audio_analysis_track_received_sample()

void audio_analysis_track_received_sample ( float  sample)

Track received audio sample.

Parameters
sampleAudio sample value

Definition at line 275 of file analysis.c.

275 {
276 if (!g_analysis_enabled)
277 return;
278
279 g_received_stats.total_samples++;
280
281 // Track peak level
282 float abs_sample = fabsf(sample);
283 if (abs_sample > g_received_stats.peak_level) {
284 g_received_stats.peak_level = abs_sample;
285 }
286
287 // Track clipping (samples > 1.0) - indicates distortion
288 if (abs_sample > 1.0f) {
289 g_received_stats.clipping_count++;
290 g_received_clipping_samples++;
291 }
292
293 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops/artifacts
294 float amp_change = fabsf(sample - g_received_last_sample);
295 if (amp_change > 0.3f) {
296 g_received_sharp_transitions++;
297 }
298 g_received_transition_samples++;
299
300 // Accumulate for mean calculation
301 g_received_mean += sample;
302
303 // Detect zero crossings (waveform crossing zero) - indicates spectral content
304 // Use file-scope static variable for prev sample tracking
305 // (This function is called from the protocol reception thread, separate from the
306 // audio capture thread, so using distinct static variables is safe)
307 static float s_received_prev_sample_for_zero_crossing = 0.0f;
308 if ((s_received_prev_sample_for_zero_crossing > 0 && sample < 0) ||
309 (s_received_prev_sample_for_zero_crossing < 0 && sample > 0)) {
310 g_received_zero_crossings++;
311 }
312 s_received_prev_sample_for_zero_crossing = sample;
313
314 // Track silence and low-energy audio
315 if (abs_sample < 0.001f) {
316 g_received_stats.silent_samples++;
317 g_received_silence_burst++;
318 g_received_below_noise_floor++;
319
320 // Track when silence started
321 if (g_received_silence_burst == 1) {
322 g_received_silence_start_sample = g_received_stats.total_samples;
323 }
324 } else {
325 // Silence ended - track gap interval and max burst length
326 if (g_received_silence_burst > 0) {
327 // Calculate time gap between end of last silence and start of this one
328 if (g_received_last_silence_end_sample > 0) {
329 uint64_t samples_between = g_received_silence_start_sample - g_received_last_silence_end_sample;
330 uint32_t ms_between = (uint32_t)(samples_between * 1000 / 48000); // Convert samples to ms at 48kHz
331
332 // Track the gap interval if we have room
333 if (g_received_gap_count < MAX_GAP_SAMPLES) {
334 g_received_gap_intervals_ms[g_received_gap_count++] = ms_between;
335 }
336 }
337
338 g_received_last_silence_end_sample = g_received_stats.total_samples;
339
340 // Track max burst length
341 if (g_received_silence_burst > g_received_max_silence_burst) {
342 g_received_max_silence_burst = g_received_silence_burst;
343 }
344 }
345 g_received_silence_burst = 0;
346 }
347
348 // Track very quiet audio (< 0.05 amplitude) which contributes to muddy/quiet perception
349 if (abs_sample < 0.05f) {
350 g_received_low_energy_samples++;
351 }
352
353 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
354 float delta = fabsf(sample - g_received_last_sample);
355 if (delta > 0.5f) {
356 g_received_stats.jitter_count++;
357 }
358 g_received_last_sample = sample;
359
360 // Accumulate for RMS calculation
361 g_received_rms_accumulator += sample * sample;
362 g_received_rms_sample_count++;
363
364 // Echo detection: check if received sample matches sent sample from N ms ago
365 // This detects if echo cancellation is working (it shouldn't find matches)
366 if (g_echo_correlation_sample_count < 500000) { // Limit to first ~10 seconds
367 for (int delay_idx = 0; delay_idx < ECHO_DELAY_COUNT; delay_idx++) {
368 // Calculate sample delay: delay_ms * (sample_rate / 1000)
369 uint32_t delay_samples = (g_echo_delays_ms[delay_idx] * 48000) / 1000;
370
371 // Get sent sample from that delay ago (from circular buffer)
372 uint64_t sent_pos;
373 if (g_echo_buffer_pos >= delay_samples) {
374 sent_pos = g_echo_buffer_pos - delay_samples;
375 } else {
376 sent_pos = (g_echo_buffer_pos + ECHO_BUFFER_SIZE) - delay_samples;
377 }
378
379 float sent_sample = g_echo_buffer[sent_pos];
380
381 // Check if samples match (correlation threshold = 0.1)
382 float diff = fabsf(sample - sent_sample);
383 if (diff < 0.1f && fabsf(sent_sample) > 0.01f) { // Only count if sent is not silence
384 g_echo_match_count[delay_idx]++;
385 g_echo_correlation_strength[delay_idx] += (0.1f - diff); // Accumulate strength
386 }
387 }
388 g_echo_correlation_sample_count++;
389 }
390
391 // Beep/tone artifact detection
392 // Store sample in sliding window for frequency analysis
393 g_received_beep_window[g_received_beep_window_idx] = sample;
394 g_received_beep_window_idx = (g_received_beep_window_idx + 1) % BEEP_WINDOW_SIZE;
395
396 // Analyze window every 10ms (480 samples at 48kHz)
397 if (g_received_beep_window_idx == 0 && g_received_stats.total_samples > BEEP_WINDOW_SIZE) {
398 // Calculate zero-crossing rate in this window
399 int zero_crossings = 0;
400 float min_amp = 1.0f, max_amp = 0.0f;
401 float sum_amp = 0.0f;
402 float prev = g_received_beep_window[0];
403
404 for (int i = 1; i < BEEP_WINDOW_SIZE; i++) {
405 float curr = g_received_beep_window[i];
406 float abs_curr = fabsf(curr);
407
408 // Track amplitude range
409 if (abs_curr > max_amp)
410 max_amp = abs_curr;
411 if (abs_curr < min_amp)
412 min_amp = abs_curr;
413 sum_amp += abs_curr;
414
415 // Count zero crossings
416 if ((prev > 0 && curr < 0) || (prev < 0 && curr > 0)) {
417 zero_crossings++;
418 }
419 prev = curr;
420 }
421
422 float avg_amp = sum_amp / BEEP_WINDOW_SIZE;
423 float amp_range = max_amp - min_amp;
424
425 // A beep/tone has:
426 // 1. High zero-crossing rate (>20 per 10ms = >2000Hz equivalent, or 5-20 = 500-2000Hz)
427 // 2. Consistent amplitude (range/avg < 0.5 means sine-wave like)
428 // 3. Non-trivial amplitude (avg > 0.02)
429 bool is_tonal = (zero_crossings >= 5 && zero_crossings <= 100) && // 500Hz-10kHz range
430 (avg_amp > 0.02f) && // Not silence
431 (amp_range < avg_amp * 1.5f); // Relatively consistent amplitude
432
433 if (is_tonal) {
434 g_received_tonal_samples += BEEP_WINDOW_SIZE;
435
436 if (!g_in_beep_burst) {
437 // Starting a new beep burst
438 g_in_beep_burst = true;
439 g_beep_burst_samples = BEEP_WINDOW_SIZE;
440 } else {
441 g_beep_burst_samples += BEEP_WINDOW_SIZE;
442 }
443 } else {
444 if (g_in_beep_burst) {
445 // Beep burst ended
446 // Only count as beep event if it was short (< 500ms = 24000 samples)
447 // Long tonal sounds are likely music, not artifacts
448 if (g_beep_burst_samples > 0 && g_beep_burst_samples < 24000) {
449 g_received_beep_events++;
450 g_received_stats.beep_events = g_received_beep_events;
451 }
452 g_in_beep_burst = false;
453 g_beep_burst_samples = 0;
454 }
455 }
456
457 g_received_stats.tonal_samples = g_received_tonal_samples;
458 }
459
460 // Write to WAV file if enabled
461 if (g_received_wav) {
462 wav_writer_write(g_received_wav, &sample, 1);
463 }
464}
#define MAX_GAP_SAMPLES
Definition analysis.c:68
#define ECHO_BUFFER_SIZE
Definition analysis.c:82
#define BEEP_WINDOW_SIZE
Definition analysis.c:105
int wav_writer_write(wav_writer_t *writer, const float *samples, int num_samples)
Write audio samples to WAV file.
Definition wav_writer.c:85

References audio_analysis_stats_t::beep_events, BEEP_WINDOW_SIZE, audio_analysis_stats_t::clipping_count, ECHO_BUFFER_SIZE, ECHO_DELAY_COUNT, audio_analysis_stats_t::jitter_count, MAX_GAP_SAMPLES, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::tonal_samples, audio_analysis_stats_t::total_samples, and wav_writer_write().

Referenced by audio_process_received_samples().

◆ audio_analysis_track_sent_packet()

void audio_analysis_track_sent_packet ( size_t  size)

Track sent packet.

Parameters
sizePacket size in bytes

Definition at line 246 of file analysis.c.

246 {
247 (void)size; // Unused parameter - reserved for future per-packet analysis
248 if (!g_analysis_enabled)
249 return;
250
251 struct timespec ts;
252 clock_gettime(CLOCK_MONOTONIC, &ts);
253 int64_t now_us = (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
254
255 // Detect gaps between consecutive packets (discontinuity)
256 if (g_sent_stats.packets_count > 0) {
257 int64_t gap_us = now_us - g_sent_last_packet_time_us;
258 int32_t gap_ms = (int32_t)(gap_us / 1000);
259
260 // Expected: ~20ms per Opus frame, flag if gap > 100ms
261 if (gap_ms > 100) {
262 g_sent_stats.discontinuity_count++;
263 }
264
265 // Track max gap
266 if (gap_ms > (int32_t)g_sent_stats.max_gap_ms) {
267 g_sent_stats.max_gap_ms = (uint32_t)gap_ms;
268 }
269 }
270
271 g_sent_last_packet_time_us = now_us;
272 g_sent_stats.packets_count++;
273}

References audio_analysis_stats_t::discontinuity_count, audio_analysis_stats_t::max_gap_ms, and audio_analysis_stats_t::packets_count.

◆ audio_analysis_track_sent_sample()

void audio_analysis_track_sent_sample ( float  sample)

Track sent audio sample.

Parameters
sampleAudio sample value

Definition at line 174 of file analysis.c.

174 {
175 if (!g_analysis_enabled)
176 return;
177
178 g_sent_stats.total_samples++;
179
180 // Track peak level
181 float abs_sample = fabsf(sample);
182 if (abs_sample > g_sent_stats.peak_level) {
183 g_sent_stats.peak_level = abs_sample;
184 }
185
186 // Track clipping (samples > 1.0) - indicates distortion
187 if (abs_sample > 1.0f) {
188 g_sent_stats.clipping_count++;
189 g_sent_clipping_samples++;
190 }
191
192 // Detect sharp transitions (sudden amplitude jumps > 0.3) - indicates clicks/pops
193 float amp_change = fabsf(sample - g_sent_last_sample);
194 if (amp_change > 0.3f) {
195 g_sent_sharp_transitions++;
196 }
197 g_sent_transition_samples++;
198
199 // Accumulate for mean calculation
200 g_sent_mean += sample;
201
202 // Detect zero crossings (waveform crossing zero) - indicates spectral content
203 // Use file-scope static variable for prev sample tracking
204 // (This function is only called from the audio capture thread, but using file-scope
205 // static is clearer and avoids shadowing the existing g_sent_last_sample variable)
206 static float s_sent_prev_sample_for_zero_crossing = 0.0f;
207 if ((s_sent_prev_sample_for_zero_crossing > 0 && sample < 0) ||
208 (s_sent_prev_sample_for_zero_crossing < 0 && sample > 0)) {
209 g_sent_zero_crossings++;
210 }
211 s_sent_prev_sample_for_zero_crossing = sample;
212
213 // Track silence (very low level)
214 if (abs_sample < 0.001f) {
215 g_sent_stats.silent_samples++;
216 g_sent_silence_burst++;
217 } else {
218 // Silence ended - track max burst length
219 if (g_sent_silence_burst > g_sent_max_silence_burst) {
220 g_sent_max_silence_burst = g_sent_silence_burst;
221 }
222 g_sent_silence_burst = 0;
223 }
224
225 // Detect jitter: rapid amplitude changes > 0.5 between consecutive samples
226 float delta = fabsf(sample - g_sent_last_sample);
227 if (delta > 0.5f) {
228 g_sent_stats.jitter_count++;
229 }
230 g_sent_last_sample = sample;
231
232 // Accumulate for RMS calculation
233 g_sent_rms_accumulator += sample * sample;
234 g_sent_rms_sample_count++;
235
236 // Write to WAV file if enabled
237 if (g_sent_wav) {
238 wav_writer_write(g_sent_wav, &sample, 1);
239 }
240
241 // Store in echo detection buffer (circular)
242 g_echo_buffer[g_echo_buffer_pos] = sample;
243 g_echo_buffer_pos = (g_echo_buffer_pos + 1) % ECHO_BUFFER_SIZE;
244}

References audio_analysis_stats_t::clipping_count, ECHO_BUFFER_SIZE, audio_analysis_stats_t::jitter_count, audio_analysis_stats_t::peak_level, audio_analysis_stats_t::silent_samples, audio_analysis_stats_t::total_samples, and wav_writer_write().