513 {
514 if (!g_analysis_enabled) {
515 return;
516 }
517
518 int64_t now_us = (int64_t)time_ns_to_us(
time_get_ns());
519
520 g_sent_stats.timestamp_end_ns = now_us;
521 g_received_stats.timestamp_end_ns = now_us;
522
523 int64_t sent_duration_ms = (g_sent_stats.timestamp_end_ns - g_sent_stats.timestamp_start_ns) / NS_PER_MS_INT;
524 int64_t recv_duration_ms = (g_received_stats.timestamp_end_ns - g_received_stats.timestamp_start_ns) / NS_PER_MS_INT;
525
526
527 float sent_rms = 0.0f;
528 float recv_rms = 0.0f;
529 if (g_sent_rms_sample_count > 0) {
530 sent_rms = sqrtf(g_sent_rms_accumulator / g_sent_rms_sample_count);
531 }
532 if (g_received_rms_sample_count > 0) {
533 recv_rms = sqrtf(g_received_rms_accumulator / g_received_rms_sample_count);
534 }
535
536 log_plain("================================================================================");
537 log_plain(" AUDIO ANALYSIS REPORT ");
538 log_plain("================================================================================");
539 log_plain("SENT AUDIO (Microphone Capture):");
540 log_plain(" Duration: %lld ms", (long long)sent_duration_ms);
541 log_plain(" Total Samples: %llu", (unsigned long long)g_sent_stats.total_samples);
542 log_plain(" Peak Level: %.4f (should be < 1.0)", g_sent_stats.peak_level);
543 log_plain(" RMS Level: %.4f (audio energy/loudness)", sent_rms);
544 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.clipping_count,
545 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.clipping_count / g_sent_stats.total_samples) : 0);
546 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_sent_stats.silent_samples,
547 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_stats.silent_samples / g_sent_stats.total_samples) : 0);
548 if (g_sent_max_silence_burst > 0) {
549 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_sent_max_silence_burst);
550 }
551 log_plain(" Packets Sent: %u", g_sent_stats.packets_count);
552 log_plain(" Status: %s", g_sent_stats.clipping_count > 0 ? "CLIPPING DETECTED!" : "OK");
553
554 log_plain("RECEIVED AUDIO (Playback):");
555 log_plain(" Duration: %lld ms", (long long)recv_duration_ms);
556 log_plain(" Total Samples: %llu", (unsigned long long)g_received_stats.total_samples);
557 log_plain(" Peak Level: %.4f", g_received_stats.peak_level);
558 log_plain(" RMS Level: %.4f (audio energy/loudness)", recv_rms);
559 log_plain(" Clipping Events: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.clipping_count,
560 g_received_stats.total_samples > 0
561 ? (100.0 * g_received_stats.clipping_count / g_received_stats.total_samples)
562 : 0);
563 log_plain(" Silent Samples: %llu samples (%.2f%%)", (unsigned long long)g_received_stats.silent_samples,
564 g_received_stats.total_samples > 0
565 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
566 : 0);
567 if (g_received_max_silence_burst > 0) {
568 log_plain(" Max Silence Burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
569 }
570 double low_energy_pct =
571 g_received_stats.total_samples > 0 ? (100.0 * g_received_low_energy_samples / g_received_stats.total_samples) : 0;
572 log_plain(" Very Quiet Samples: %llu samples (%.1f%%) [amplitude < 0.05]",
573 (unsigned long long)g_received_low_energy_samples, low_energy_pct);
574 log_plain(" Packets Received: %u", g_received_stats.packets_count);
575 log_plain(" Status: %s", g_received_stats.total_samples == 0 ? "NO AUDIO RECEIVED!" : "Receiving");
576
577 log_plain("QUALITY METRICS (Scratchy/Distorted Audio Detection):");
578 log_plain("SENT:");
579 log_plain(" Jitter Events: %llu (rapid amplitude changes)", (unsigned long long)g_sent_stats.jitter_count);
580 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
581 (unsigned long long)g_sent_stats.discontinuity_count);
582 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_sent_stats.max_gap_ns);
583
584 log_plain("RECEIVED:");
585 log_plain(" Jitter Events: %llu (rapid amplitude changes)",
586 (unsigned long long)g_received_stats.jitter_count);
587 log_plain(" Discontinuities: %llu (packet arrival gaps > 100ms)",
588 (unsigned long long)g_received_stats.discontinuity_count);
589 log_plain(" Max Gap Between Packets: %u ms (expected ~20ms per frame)", g_received_stats.max_gap_ns);
590
591
592 if (g_received_beep_events > 0 || g_received_tonal_samples > 0) {
593 double tonal_pct =
594 g_received_stats.total_samples > 0 ? (100.0 * g_received_tonal_samples / g_received_stats.total_samples) : 0;
595 log_plain("BEEP/TONE ARTIFACTS:");
596 log_plain(" Beep Events: %llu (short tonal bursts < 500ms)",
597 (unsigned long long)g_received_beep_events);
598 log_plain(" Tonal Samples: %llu samples (%.1f%%) [consistent frequency content]",
599 (unsigned long long)g_received_tonal_samples, tonal_pct);
600
601 if (g_received_beep_events > 10) {
602 log_plain(" 🔴 BEEPING DETECTED: %llu short tonal bursts - likely codec artifacts or system sounds!",
603 (unsigned long long)g_received_beep_events);
604 log_plain(" Possible causes:");
605 log_plain(" - Opus codec producing tonal artifacts during silence/transitions");
606 log_plain(" - Buffer underruns creating synthetic tones");
607 log_plain(" - AEC3 suppressor resonance");
608 log_plain(" - System notification sounds bleeding through");
609 } else if (g_received_beep_events > 3) {
610 log_plain(" ⚠️ Some beep artifacts detected (%llu events)", (unsigned long long)g_received_beep_events);
611 }
612 }
613
614 log_plain("DIAGNOSTICS:");
615 if (g_sent_stats.peak_level == 0) {
616 log_plain(" No audio captured from microphone!");
617 }
618 if (g_received_stats.total_samples == 0) {
619 log_plain(" No audio received from server!");
620 } else if (g_received_stats.peak_level < 0.01f) {
621 log_plain(" ⚠️ Received audio is very quiet (peak < 0.01)");
622 }
623 if (g_sent_stats.clipping_count > 0) {
624 log_plain(" Microphone input is clipping - reduce microphone volume");
625 }
626
627
628 log_plain("ECHO DETECTION (Echo Cancellation Quality Check):");
629 if (g_echo_correlation_sample_count > 0 && g_sent_stats.total_samples > 0) {
630 uint64_t max_matches = 0;
631 int best_delay_idx = -1;
632
633
635 if (g_echo_match_count[i] > max_matches) {
636 max_matches = g_echo_match_count[i];
637 best_delay_idx = i;
638 }
639 }
640
641 double echo_threshold_pct = 5.0;
642
643 if (best_delay_idx >= 0) {
644 double match_pct = (100.0 * g_echo_match_count[best_delay_idx]) / g_echo_correlation_sample_count;
645 log_plain(" Echo correlation at different delays:");
647 double pct = (100.0 * g_echo_match_count[i]) / g_echo_correlation_sample_count;
648 const char *status = pct > echo_threshold_pct ? "⚠️ ECHO DETECTED" : "✓ OK";
649 log_plain(" %3u ms delay: %.1f%% match rate %s", g_echo_delays_ms[i], pct, status);
650 }
651
652 if (match_pct > echo_threshold_pct) {
653 g_detected_echo_delay_ms = g_echo_delays_ms[best_delay_idx];
654 log_plain(" 🔴 ECHO CANCELLATION NOT WORKING: Strong echo at %u ms delay!", g_detected_echo_delay_ms);
655 log_plain(" Received audio contains %.1f%% samples matching sent audio from %u ms ago", match_pct,
656 g_detected_echo_delay_ms);
657 } else {
658 log_plain(" ✓ Echo cancellation working: No significant echo detected");
659 }
660 }
661 } else {
662 log_plain(" Insufficient data for echo detection (need both sent and received audio)");
663 }
664
665
666 if (g_aec3_metrics_available) {
667 log_plain("AEC3 METRICS (from WebRTC GetMetrics()):");
668 log_plain(" Echo Return Loss (ERL): %.2f dB (how much echo is attenuated; >10 dB is good)",
669 g_aec3_echo_return_loss);
670 log_plain(" Echo Return Loss Enhancement (ERLE): %.2f dB (residual echo suppression)",
671 g_aec3_echo_return_loss_enhancement);
672 log_plain(" Estimated Echo Delay: %d ms", g_aec3_delay_ns);
673
674 if (g_aec3_echo_return_loss > 10.0) {
675 log_plain(" ✓ Good echo attenuation (ERL > 10 dB)");
676 } else if (g_aec3_echo_return_loss > 3.0) {
677 log_plain(" ⚠️ Moderate echo attenuation (3-10 dB)");
678 } else {
679 log_plain(" 🔴 Poor echo attenuation (ERL < 3 dB)");
680 }
681 }
682
683
684 if (recv_rms < 0.005f) {
685 log_plain(" ⚠️ CRITICAL: Received audio RMS is extremely low (%.6f) - barely audible!", recv_rms);
686 } else if (recv_rms < 0.02f) {
687 log_plain(" ⚠️ WARNING: Received audio RMS is low (%.6f) - may sound quiet or muddy", recv_rms);
688 }
689
690
691 double received_silence_pct = g_received_stats.total_samples > 0
692 ? (100.0 * g_received_stats.silent_samples / g_received_stats.total_samples)
693 : 0;
694
695 if (received_silence_pct > 30.0) {
696 log_plain(" ⚠️ SCRATCHY AUDIO DETECTED: Too much silence in received audio!");
697 log_plain(" - Silence: %.1f%% of received samples (should be < 10%%)", received_silence_pct);
698 log_plain(" - Max silence burst: %llu samples", (unsigned long long)g_received_max_silence_burst);
699 log_plain(" - This creates jittery/choppy playback between audio bursts");
700 } else if (received_silence_pct > 15.0) {
701 log_plain(" ⚠️ WARNING: Moderate silence detected (%.1f%%)", received_silence_pct);
702 }
703
704
705 double sent_sharp_pct =
706 g_sent_transition_samples > 0 ? (100.0 * g_sent_sharp_transitions / g_sent_transition_samples) : 0;
707 double recv_sharp_pct =
708 g_received_transition_samples > 0 ? (100.0 * g_received_sharp_transitions / g_received_transition_samples) : 0;
709
710
711
712 double sent_zero_cross_pct =
713 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_zero_crossings / g_sent_stats.total_samples) : 0;
714 double recv_zero_cross_pct =
715 g_received_stats.total_samples > 0 ? (100.0 * g_received_zero_crossings / g_received_stats.total_samples) : 0;
716
717 log_plain("WAVEFORM ANALYSIS (Is it clean music or corrupted/static?):");
718 log_plain("SENT AUDIO:");
719 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", sent_zero_cross_pct);
720 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", sent_sharp_pct);
721 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_sent_clipping_samples,
722 g_sent_stats.total_samples > 0 ? (100.0 * g_sent_clipping_samples / g_sent_stats.total_samples) : 0);
723
724 log_plain("RECEIVED AUDIO:");
725 log_plain(" Zero crossings: %.2f%% of samples (music: 1-5%%, noise: 15-50%%)", recv_zero_cross_pct);
726 log_plain(" Sharp transitions (clicks/pops): %.2f%% of samples", recv_sharp_pct);
727 log_plain(" Clipping samples: %llu (%.3f%%)", (unsigned long long)g_received_clipping_samples,
728 g_received_stats.total_samples > 0 ? (100.0 * g_received_clipping_samples / g_received_stats.total_samples)
729 : 0);
730 log_plain(" Zero crossing increase: %.2f%% higher than sent (indicates corruption)",
731 recv_zero_cross_pct - sent_zero_cross_pct);
732
733
734 log_plain("SOUND QUALITY VERDICT:");
735 if (recv_zero_cross_pct > 10.0) {
736 log_plain(" ⚠️ SOUNDS LIKE STATIC/DISTORTED: Excessive zero crossings (%.2f%%) = high frequency noise",
737 recv_zero_cross_pct);
738 log_plain(" Increase from sent: %.2f%% (waveform corruption detected)",
739 recv_zero_cross_pct - sent_zero_cross_pct);
740 log_plain(" Likely causes: Opus codec artifacts, jitter buffer issues, or packet delivery gaps");
741 } else if (recv_zero_cross_pct - sent_zero_cross_pct > 3.0) {
742 log_plain(" ⚠️ SOUNDS CORRUPTED: Zero crossing rate increased by %.2f%% (should be ±0.5%%)",
743 recv_zero_cross_pct - sent_zero_cross_pct);
744 log_plain(" Indicates waveform distortion from network/processing artifacts");
745 } else if (recv_sharp_pct > 2.0) {
746 log_plain(" ⚠️ SOUNDS LIKE STATIC: High click/pop rate (%.2f%%) indicates audio artifacts", recv_sharp_pct);
747 log_plain(" Likely causes: Packet loss, jitter buffer issues, or frame discontinuities");
748 } else if (g_received_clipping_samples > (g_received_stats.total_samples / 1000)) {
749 log_plain(" ⚠️ SOUNDS DISTORTED: Significant clipping detected (%.3f%%)",
750 100.0 * g_received_clipping_samples / g_received_stats.total_samples);
751 log_plain(" Likely causes: AGC too aggressive, gain too high, or codec compression artifacts");
752 } else if (low_energy_pct > 50.0 && recv_rms < 0.05f) {
753 log_plain(" ⚠️ SOUNDS MUDDY/QUIET: Over 50%% very quiet samples + low RMS");
754 log_plain(" Audio may sound unclear or like background noise rather than music");
755 } else if (received_silence_pct > 10.0) {
756 log_plain(" ⚠️ SOUNDS SCRATCHY: Excessive silence (%.1f%%) causes dropouts", received_silence_pct);
757 } else if (recv_rms > 0.08f && recv_zero_cross_pct < 6.0 && recv_sharp_pct < 1.0 &&
758 g_received_clipping_samples == 0) {
759 log_plain(" ✓ SOUNDS LIKE MUSIC: Good RMS (%.4f), clean waveform (%.2f%% zero crossings), minimal artifacts",
760 recv_rms, recv_zero_cross_pct);
761 log_plain(" Audio quality acceptable for communication");
762 } else {
763 log_plain(" ? BORDERLINE: Check specific metrics above");
764 }
765
766
767 if (low_energy_pct > 50.0) {
768 log_plain(" ⚠️ WARNING: Over 50%% of received samples are very quiet (< 0.05 amplitude)");
769 log_plain(" - This makes audio sound muddy, unclear, or hard to understand");
770 log_plain(" - Caused by: Mixing other clients' audio with your own at wrong levels");
771 }
772
773
774 if (g_received_packet_times_count >= 5) {
776 uint32_t inter_arrival_count = 0;
777 uint32_t min_interval_ms = 0xFFFFFFFF;
778 uint32_t max_interval_ms = 0;
779 uint64_t sum_intervals_ms = 0;
780 uint32_t intervals_around_50ms = 0;
781
782
783 for (uint32_t i = 1; i < g_received_packet_times_count; i++) {
784 uint64_t prev_ns = g_received_packet_times_ns[i - 1];
785 uint64_t curr_ns = g_received_packet_times_ns[i];
786
788 uint32_t gap_ms = (uint32_t)time_ns_to_ms(gap_ns);
789
790 inter_arrival_times_ms[inter_arrival_count++] = gap_ms;
791 if (gap_ms < min_interval_ms)
792 min_interval_ms = gap_ms;
793 if (gap_ms > max_interval_ms)
794 max_interval_ms = gap_ms;
795 sum_intervals_ms += gap_ms;
796
797
798 if (gap_ms >= 35 && gap_ms <= 70) {
799 intervals_around_50ms++;
800 }
801 }
802
803 uint32_t avg_interval_ms = (uint32_t)(sum_intervals_ms / inter_arrival_count);
804 uint32_t interval_consistency = (intervals_around_50ms * 100) / inter_arrival_count;
805
806
807
808
809 double avg_samples_per_packet =
810 g_received_stats.total_samples > 0 ? (double)g_received_stats.total_samples / inter_arrival_count : 0;
811 double frames_per_packet = avg_samples_per_packet / 960.0;
812 double ms_audio_per_packet = frames_per_packet * 20.0;
813
814
815 if (intervals_around_50ms >= (inter_arrival_count * 2 / 3)) {
816
817 log_plain(" 🔴 PERIODIC STUTTERING DETECTED: Server sends packets every ~%u ms (should be ~20ms)!",
818 avg_interval_ms);
819 log_plain(" - Packet inter-arrival: %u-%u ms (avg: %u ms)", min_interval_ms, max_interval_ms, avg_interval_ms);
820 log_plain(" - %u/%u packets (~%u%%) are ~50ms apart (CLEAR STUTTERING PATTERN)", intervals_around_50ms,
821 inter_arrival_count, interval_consistency);
822
823 log_plain(" - PACKET ANALYSIS:");
824 log_plain(" - Total audio samples: %llu over %u packets", (unsigned long long)g_received_stats.total_samples,
825 inter_arrival_count);
826 log_plain(" - Avg samples per packet: %.0f (= %.2f Opus frames = %.1f ms)", avg_samples_per_packet,
827 frames_per_packet, ms_audio_per_packet);
828
829 if (frames_per_packet < 1.5) {
830 log_plain(" - ❌ PROBLEM: Each packet contains < 1.5 frames (should be 2-3 frames!)");
831 log_plain(" - With only %.1f frames per packet arriving every %u ms, there are gaps between chunks",
832 frames_per_packet, avg_interval_ms);
833 log_plain(" - Audio plays for ~%.0f ms, then %u ms gap, then plays again", ms_audio_per_packet,
834 avg_interval_ms - (uint32_t)ms_audio_per_packet);
835 } else if (frames_per_packet > 2.5) {
836 log_plain(" - ✓ Packets contain %.1f frames (~%.0f ms audio each)", frames_per_packet,
837 ms_audio_per_packet);
838 log_plain(" - Should play smoothly if jitter buffer is large enough");
839 log_plain(" - If still stuttering, issue is jitter buffer depth or timing precision");
840 } else {
841 log_plain(" - Packets contain %.1f frames (~%.0f ms)", frames_per_packet, ms_audio_per_packet);
842 log_plain(" - Borderline: buffer needs to hold %.0f ms to bridge %.u ms gap", ms_audio_per_packet,
843 avg_interval_ms - (uint32_t)ms_audio_per_packet);
844 }
845 } else if (avg_interval_ms > 30) {
846 log_plain(" ⚠️ AUDIO DELIVERY INCONSISTENCY: Server packets arrive every ~%u ms (expected ~20ms)",
847 avg_interval_ms);
848 log_plain(" - Interval range: %u-%u ms", min_interval_ms, max_interval_ms);
849 log_plain(" - This causes dropouts and buffering issues");
850 }
851 }
852
853
854 if (g_received_stats.max_gap_ns > 40) {
855 log_plain(" ⚠️ DISTORTION DETECTED: Packet delivery gaps too large!");
856 log_plain(" - Max gap: %u ms (should be ~20ms for smooth audio)", g_received_stats.max_gap_ns);
857 if (g_received_stats.max_gap_ns > 80) {
858 log_plain(" - SEVERE: Gaps > 80ms cause severe distortion and dropouts");
859 } else if (g_received_stats.max_gap_ns > 50) {
860 log_plain(" - Gaps > 50ms cause noticeable distortion");
861 }
862 }
863 if (g_received_stats.discontinuity_count > 0) {
864 log_plain(" Packet delivery discontinuities: %llu gaps > 100ms detected",
865 (unsigned long long)g_received_stats.discontinuity_count);
866 }
867 if (g_received_stats.jitter_count > (g_received_stats.total_samples / 100)) {
868 log_plain(" High jitter detected: > 1%% of samples have rapid amplitude changes");
869 log_plain(" - May indicate buffer underruns from sparse packet delivery");
870 }
871
872 log_plain("================================================================================");
873}
#define MAX_PACKET_SAMPLES
uint64_t time_elapsed_ns(uint64_t start_ns, uint64_t end_ns)