ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
crc32.c
Go to the documentation of this file.
1
7#include "network/crc32.h"
8#include <string.h>
9#include <stdio.h>
10#include <stdatomic.h>
11// unistd.h is POSIX-only (not available on Windows)
12#ifndef _WIN32
13#include <unistd.h>
14#endif
15
16// Multi-architecture hardware acceleration support
17#if defined(__aarch64__)
18#include <arm_acle.h>
19#define ARCH_ARM64
20#elif defined(__x86_64__) && defined(HAVE_CRC32_HW)
21#include <immintrin.h>
22#ifdef _WIN32
23#include <intrin.h>
24#else
25#include <cpuid.h>
26#endif
27#define ARCH_X86_64
28#endif
29
30// Check if CRC32 instructions are available at runtime
31static bool crc32_hw_available = false;
32static atomic_bool crc32_hw_checked = false;
33
34static void check_crc32_hw_support(void) {
35 // Fast path: check if already initialized (atomic read)
36 if (atomic_load(&crc32_hw_checked)) {
37 return;
38 }
39
40 // Try to claim initialization (only one thread will succeed)
41 bool expected = false;
42 if (!atomic_compare_exchange_strong(&crc32_hw_checked, &expected, true)) {
43 // Another thread is initializing or already initialized, wait for it
44 // Add backoff sleep to prevent 100% CPU burn if init thread is preempted
45 int spin_count = 0;
46 while (!atomic_load(&crc32_hw_checked)) {
47 spin_count++;
48 if (spin_count > 100) {
49 // After 100 spins, sleep briefly to avoid CPU waste
50#ifdef _WIN32
51 Sleep(0); // Yield to other threads
52#else
53 usleep(1); // Sleep 1 microsecond
54#endif
55 spin_count = 0;
56 }
57 }
58 return;
59 }
60
61 // This thread won the race and will perform initialization
62
63 // clang-format off
64#ifdef ARCH_ARM64
65// On Apple Silicon, CRC32 is always available
66// On other ARM64 systems, we could check HWCAP_CRC32
67#ifdef __APPLE__
68 crc32_hw_available = true;
69#else
70 // For other ARM64 systems, we'd need to check auxiliary vector
71 // For now, assume available (can be made more sophisticated)
72 crc32_hw_available = true;
73#endif
74 // log_debug("ARM CRC32 hardware acceleration: %s", crc32_hw_available ? "enabled" : "disabled");
75#elif defined(ARCH_X86_64)
76 // Check for SSE4.2 support (includes CRC32 instruction)
77#ifdef _WIN32
78 int cpu_info[4];
79 __cpuid(cpu_info, 1);
80 // SSE4.2 is bit 20 of ECX
81 crc32_hw_available = (cpu_info[2] & (1 << 20)) != 0;
82#else
83 unsigned int eax, ebx, ecx, edx;
84 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
85 crc32_hw_available = (ecx & bit_SSE4_2) != 0;
86 } else {
87 crc32_hw_available = false;
88 }
89#endif
90 // log_debug("Intel CRC32 hardware acceleration (SSE4.2): %s", crc32_hw_available ? "enabled" : "disabled");
91#else
92 crc32_hw_available = false;
93 // log_debug("No hardware CRC32 acceleration available for this architecture");
94#endif // clang-format on
95
96 // Initialization complete - flag was already set to true by atomic_compare_exchange_strong above
97 // The compare-exchange with memory_order_seq_cst ensures all writes to crc32_hw_available
98 // are visible to other threads when they see crc32_hw_checked == true
99}
100
101#ifdef ARCH_ARM64
102// ARM CRC32-C hardware implementation using Castagnoli polynomial
103// IMPORTANT: Use __crc32cb (CRC32-C) NOT __crc32b (IEEE 802.3)
104// __crc32cb uses the Castagnoli polynomial (0x1EDC6F41), matching:
105// - Intel _mm_crc32_* intrinsics
106// - Our software fallback asciichat_crc32_sw()
107// Process byte-by-byte to ensure cross-platform consistency with x86
108__attribute__((target("arch=armv8-a+crc"))) static uint32_t crc32_arm_hw(const void *data, size_t len) {
109 const uint8_t *bytes = (const uint8_t *)data;
110 uint32_t crc = 0xFFFFFFFF;
111
112 // Process all bytes one at a time for guaranteed consistency
113 // Use CRC32-C intrinsics (__crc32cb) not CRC32 (__crc32b)
114 for (size_t i = 0; i < len; i++) {
115 crc = __crc32cb(crc, bytes[i]);
116 }
117
118 return ~crc;
119}
120#endif
121
122#ifdef ARCH_X86_64
123// Intel CRC32 hardware implementation using SSE4.2
124// Process byte-by-byte to ensure cross-platform consistency with ARM
125static uint32_t crc32_intel_hw(const void *data, size_t len) {
126 const uint8_t *bytes = (const uint8_t *)data;
127 uint32_t crc = 0xFFFFFFFF;
128
129 // Process all bytes one at a time for guaranteed consistency
130 for (size_t i = 0; i < len; i++) {
131 crc = _mm_crc32_u8(crc, bytes[i]);
132 }
133
134 return ~crc;
135}
136#endif
137
138// Multi-architecture hardware-accelerated CRC32
139uint32_t asciichat_crc32_hw(const void *data, size_t len) {
140 check_crc32_hw_support();
141
142 if (!crc32_hw_available) {
143 // DEBUG: Log fallback to software
144 static bool logged_fallback = false;
145 if (!logged_fallback) {
146 fprintf(stderr, "[CRC32 DEBUG] Using software CRC32 (no hardware acceleration)\n");
147 logged_fallback = true;
148 }
149 return asciichat_crc32_sw(data, len);
150 }
151
152#ifdef ARCH_ARM64
153 static bool logged_arm = false;
154 if (!logged_arm) {
155 fprintf(stderr, "[CRC32 DEBUG] Using ARM64 hardware CRC32\n");
156 logged_arm = true;
157 }
158 return crc32_arm_hw(data, len);
159#elif defined(ARCH_X86_64)
160 static bool logged_intel = false;
161 if (!logged_intel) {
162 fprintf(stderr, "[CRC32 DEBUG] Using Intel x86_64 hardware CRC32 (SSE4.2)\n");
163 logged_intel = true;
164 }
165 return crc32_intel_hw(data, len);
166#else
167 return asciichat_crc32_sw(data, len);
168#endif
169}
170
172 check_crc32_hw_support();
173 return crc32_hw_available;
174}
175
176// Software fallback implementation using CRC32-C (Castagnoli) polynomial
177// This matches the hardware implementations (__crc32* and _mm_crc32_*)
178uint32_t asciichat_crc32_sw(const void *data, size_t len) {
179 const uint8_t *bytes = (const uint8_t *)data;
180 uint32_t crc = 0xFFFFFFFF;
181
182 // CRC32-C (Castagnoli) polynomial: 0x1EDC6F41
183 // Reversed (for LSB-first): 0x82F63B78
184 for (size_t i = 0; i < len; i++) {
185 crc ^= bytes[i];
186 for (int j = 0; j < 8; j++) {
187 if (crc & 1) {
188 crc = (crc >> 1) ^ 0x82F63B78; // CRC32-C polynomial (reversed)
189 } else {
190 crc >>= 1;
191 }
192 }
193 }
194
195 return ~crc;
196}
Hardware-Accelerated CRC32 Checksum Computation.
unsigned int uint32_t
Definition common.h:58
unsigned char uint8_t
Definition common.h:56
bool crc32_hw_is_available(void)
Check if hardware CRC32 acceleration is available at runtime.
Definition crc32.c:171
uint32_t asciichat_crc32_sw(const void *data, size_t len)
Compute CRC32 checksum using software implementation only.
Definition crc32.c:178
uint32_t asciichat_crc32_hw(const void *data, size_t len)
Compute CRC32 checksum with hardware acceleration (if available)
Definition crc32.c:139
RGB pixel structure.
Definition video/image.h:80