ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
ascii_simd.c
Go to the documentation of this file.
1
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <time.h>
11#include <stdarg.h>
12
14#include "common.h"
15#include "ascii_simd.h"
16#include "video/palette.h"
17#include "../ascii.h"
18#include "video/output_buffer.h"
19#include "avx2.h"
20#include "util/math.h"
21#include "util/overflow.h"
22
24
25// Helper: write decimal RGB triplet using dec3 cache
26size_t write_rgb_triplet(uint8_t value, char *dst) {
27 const dec3_t *d = &g_dec3_cache.dec3_table[value];
28 memcpy(dst, d->s, d->len);
29 return d->len;
30}
31
32// Default luminance palette for legacy functions
34static bool g_default_palette_initialized = false;
35
36// Initialize default luminance palette
38 if (g_default_palette_initialized)
39 return;
40
41 // Build default luminance mapping using standard palette
42 const size_t len = DEFAULT_ASCII_PALETTE_LEN;
43 for (int i = 0; i < 256; i++) {
44 size_t palette_index = (i * (len - 1) + 127) / 255;
45 if (palette_index >= len) {
46 palette_index = len - 1;
47 }
49 }
50 g_default_palette_initialized = true;
51}
52
53// Helper function for benchmarks and fallback cases
54static void ensure_default_palette_ready(void) {
56}
57
58void init_dec3(void) {
60 return;
61 for (int v = 0; v < 256; ++v) {
62 int d2 = v / 100; // 0..2
63 int r = v - d2 * 100; // 0..99
64 int d1 = r / 10; // 0..9
65 int d0 = r - d1 * 10; // 0..9
66
67 if (d2) {
69 g_dec3_cache.dec3_table[v].s[0] = '0' + d2;
70 g_dec3_cache.dec3_table[v].s[1] = '0' + d1;
71 g_dec3_cache.dec3_table[v].s[2] = '0' + d0;
72 } else if (d1) {
74 g_dec3_cache.dec3_table[v].s[0] = '0' + d1;
75 g_dec3_cache.dec3_table[v].s[1] = '0' + d0;
76 } else {
78 g_dec3_cache.dec3_table[v].s[0] = '0' + d0;
79 }
80 }
82}
83
84// NOTE: Constructor disabled for musl static builds - causes hangs
85// __attribute__((constructor)) static void ascii_ctor(void) {
86// init_dec3();
87// init_default_luminance_palette();
88// }
89
90void ascii_simd_init(void) {
91 // Initialize SIMD lookup tables manually (constructor disabled for musl compatibility)
92 // Both init functions have guards to prevent double-initialization
93 init_dec3();
95}
96
97// Allocate a new image (RGB8), use SAFE_MALLOC for consistent error handling
98ImageRGB alloc_image(int w, int h) {
99 ImageRGB out;
100 out.w = w;
101 out.h = h;
102 size_t n = (size_t)w * (size_t)h * 3u;
103 out.pixels = SAFE_MALLOC(n, uint8_t *);
104 return out;
105}
106
107// String utility functions
108void str_init(Str *s) {
109 s->data = NULL;
110 s->len = 0;
111 s->cap = 0;
112}
113
114void str_free(Str *s) {
115 SAFE_FREE(s->data);
116 s->data = NULL;
117 s->len = s->cap = 0;
118}
119
120void str_reserve(Str *s, size_t need) {
121 if (need <= s->cap)
122 return;
123 size_t ncap = s->cap ? s->cap : 4096;
124 while (ncap < need)
125 ncap = (ncap * 3) / 2 + 64;
126 s->data = SAFE_REALLOC(s->data, ncap, char *);
127 s->cap = ncap;
128}
129
130void str_append_bytes(Str *s, const void *src, size_t n) {
131 str_reserve(s, s->len + n);
132 memcpy(s->data + s->len, src, n);
133 s->len += n;
134}
135
136void str_append_c(Str *s, char c) {
137 str_reserve(s, s->len + 1);
138 s->data[s->len++] = c;
139}
140
141void str_printf(Str *s, const char *fmt, ...) {
142 va_list ap;
143 va_start(ap, fmt);
144 char stackbuf[256];
145 int n = vsnprintf(stackbuf, sizeof(stackbuf), fmt, ap);
146 va_end(ap);
147 if (n < 0)
148 return;
149 if ((size_t)n < sizeof(stackbuf)) {
150 str_append_bytes(s, stackbuf, (size_t)n);
151 return;
152 }
153 char *heap;
154 heap = SAFE_MALLOC((size_t)n + 1, char *);
155 va_start(ap, fmt);
156 (void)vsnprintf(heap, (size_t)n + 1, fmt, ap);
157 va_end(ap);
158 str_append_bytes(s, heap, (size_t)n);
159 SAFE_FREE(heap);
160}
161
162/* ============================================================================
163 * Scalar Implementation (Baseline)
164 * ============================================================================
165 */
166
167void convert_pixels_scalar(const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256]) {
168 for (int i = 0; i < count; i++) {
169 const rgb_pixel_t *p = &pixels[i];
170
171 // Calculate luminance using integer arithmetic
172 int luminance = (LUMA_RED * p->r + LUMA_GREEN * p->g + LUMA_BLUE * p->b) >> 8;
173
174 // Clamp to [0, 255]
175 if (luminance > 255)
176 luminance = 255;
177
178 ascii_chars[i] = luminance_palette[luminance];
179 }
180}
181
182char *convert_pixels_scalar_with_newlines(image_t *image, const char luminance_palette[256]) {
183 const int h = image->h;
184 const int w = image->w;
185
186 // Get UTF-8 character cache for RLE emission
187 // Note: We need to reverse-engineer the palette chars from luminance_palette
188 // For now, use a simpler approach with direct luminance lookup
189
190 // Use outbuf_t for efficient UTF-8 RLE emission (same as SIMD renderers)
191 outbuf_t ob = {0};
192 const size_t max_char_bytes = 4; // Max UTF-8 character size
193 ob.cap = (size_t)h * ((size_t)w * max_char_bytes + 1);
194 ob.buf = SAFE_MALLOC(ob.cap ? ob.cap : 1, char *);
195 if (!ob.buf) {
196 log_error("Failed to allocate output buffer for scalar rendering");
197 return NULL;
198 }
199
200 // Process pixels with RLE optimization
201 for (int y = 0; y < h; y++) {
202 const rgb_pixel_t *row_pixels = (const rgb_pixel_t *)&image->pixels[y * w];
203
204 for (int x = 0; x < w;) {
205 const rgb_pixel_t *p = &row_pixels[x];
206
207 // Calculate luminance using integer arithmetic
208 int luminance = (LUMA_RED * p->r + LUMA_GREEN * p->g + LUMA_BLUE * p->b) >> 8;
209 if (luminance > 255)
210 luminance = 255;
211
212 char current_char = luminance_palette[luminance];
213
214 // Find run length for same character (RLE optimization)
215 int j = x + 1;
216 while (j < w) {
217 const rgb_pixel_t *next_p = &row_pixels[j];
218 int next_luminance = (LUMA_RED * next_p->r + LUMA_GREEN * next_p->g + LUMA_BLUE * next_p->b) >> 8;
219 if (next_luminance > 255)
220 next_luminance = 255;
221 char next_char = luminance_palette[next_luminance];
222 if (next_char != current_char)
223 break;
224 j++;
225 }
226 uint32_t run = (uint32_t)(j - x);
227
228 // Emit character with RLE (same as SIMD)
229 ob_putc(&ob, current_char);
230 if (rep_is_profitable(run)) {
231 emit_rep(&ob, run - 1);
232 } else {
233 for (uint32_t k = 1; k < run; k++) {
234 ob_putc(&ob, current_char);
235 }
236 }
237 x = j;
238 }
239
240 // Add newline (except for last row)
241 if (y != h - 1) {
242 ob_putc(&ob, '\n');
243 }
244 }
245
246 ob_term(&ob);
247 return ob.buf;
248}
249
250// --------------------------------------
251// SIMD-convert an image into ASCII characters and return it with newlines
252char *image_print_simd(image_t *image, const char *ascii_chars) {
253#if SIMD_SUPPORT_AVX2
254 return render_ascii_image_monochrome_avx2(image, ascii_chars);
255#elif SIMD_SUPPORT_SSSE3
256 return render_ascii_image_monochrome_ssse3(image, ascii_chars);
257#elif SIMD_SUPPORT_SSE2
258 return render_ascii_image_monochrome_sse2(image, ascii_chars);
259#elif SIMD_SUPPORT_NEON
260 return render_ascii_image_monochrome_neon(image, ascii_chars);
261#else
262 // Fallback to scalar implementation - use image_print which properly handles
263 // the palette string (convert_pixels_scalar_with_newlines expects a 256-element
264 // luminance lookup table, not the raw palette string)
265 return image_print(image, ascii_chars);
266#endif
267}
268
269// NOTE: image_print_simd_with_palette is now redundant - use image_print_simd() directly
270
271/* ============================================================================
272 * Auto-dispatch and any helpers
273 * ============================================================================
274 */
275
277 printf("SIMD Support:\n");
278#if SIMD_SUPPORT_AVX2
279 printf(" ✓ AVX2 (32 pixels/cycle)\n");
280#endif
281#if SIMD_SUPPORT_NEON
282 printf(" ✓ ARM NEON (16 pixels/cycle)\n");
283#endif
284#if SIMD_SUPPORT_SVE
285 printf(" ✓ ARM SVE (scalable pixels/cycle)\n");
286#endif
287#if SIMD_SUPPORT_SSSE3
288 printf(" ✓ SSSE3 (16 pixels/cycle)\n");
289#endif
290#if SIMD_SUPPORT_SSE2
291 printf(" ✓ SSE2 (16 pixels/cycle)\n");
292#endif
293 printf(" ✓ Scalar fallback (1 pixel/cycle)\n");
294}
295
296/* ============================================================================
297 * Benchmarking
298 * ============================================================================
299 */
300
301static double get_time_seconds(void) {
302 struct timespec ts;
303 if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
304 // Fallback to clock() if CLOCK_MONOTONIC not available
305 return (double)clock() / CLOCKS_PER_SEC;
306 }
307 return ts.tv_sec + ts.tv_nsec / 1e9;
308}
309
310// High-resolution adaptive timing for small workloads
311// Returns the number of iterations needed to achieve target_duration_ms minimum
312static int calculate_adaptive_iterations(int pixel_count, double __attribute__((unused)) target_duration_ms) {
313 // Base iterations: scale with image size for consistent measurement accuracy
314 int base_iterations = 100; // Minimum iterations for good statistics
315
316 // For very small images, use more iterations for better timing resolution
317 if (pixel_count < 5000) {
318 base_iterations = 100; // 80×24 = 1,920 pixels -> 100 iterations (was 1000 - too slow!)
319 } else if (pixel_count < 50000) {
320 base_iterations = 50; // 160×48 = 7,680 pixels -> 50 iterations
321 } else if (pixel_count < 200000) {
322 base_iterations = 20; // 320×240 = 76,800 pixels -> 20 iterations
323 } else if (pixel_count < 500000) {
324 base_iterations = 10; // 640×480 = 307,200 pixels -> 10 iterations
325 } else {
326 base_iterations = 5; // 1280×720 = 921,600 pixels -> 5 iterations
327 }
328
329 // Ensure we have at least the minimum for reliable timing
330 const int minimum_iterations = 10;
331 return (base_iterations > minimum_iterations) ? base_iterations : minimum_iterations;
332}
333
334simd_benchmark_t benchmark_simd_conversion(int width, int height, int __attribute__((unused)) iterations) {
335 simd_benchmark_t result = {0};
336
337 // Check for integer overflow in pixel count calculation
338 size_t pixel_count;
339 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
340 log_error("Image dimensions %d x %d too large (overflow)", width, height);
341 return result;
342 }
343
344 // Generate test data and test image
345 rgb_pixel_t *test_pixels;
346 char *output_buffer;
347 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
348 output_buffer = SAFE_MALLOC(pixel_count, char *);
349
350 // Create test image for new image-based functions
351 image_t *test_image = image_new(width, height);
352 if (!test_image) {
353 SAFE_FREE(test_pixels);
354 SAFE_FREE(output_buffer);
355 return result;
356 }
357
358 // Use synthetic data for consistent cross-platform testing
359 printf("Using synthetic gradient data for consistent benchmarking\n");
360 srand(12345); // Consistent results across runs // NOLINT(cert-msc32-c,cert-msc51-cpp,bugprone-random-generator-seed)
361 for (size_t i = 0; i < pixel_count; i++) {
362 int x = i % width;
363 int y = i / width;
364 // Create realistic gradient pattern with some variation
365 int base_r = (x * 255) / width;
366 int base_g = (y * 255) / height;
367 int base_b = ((x + y) * 127) / (width + height);
368
369 // Add small random variation to make it realistic
370 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
371 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
372 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
373
374 test_pixels[i].r = clamp_rgb(temp_r);
375 test_pixels[i].g = clamp_rgb(temp_g);
376 test_pixels[i].b = clamp_rgb(temp_b);
377 }
378
379 // Copy test data to test image pixels
380 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
381
382 // Calculate adaptive iterations for reliable timing
383 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
384 printf("Benchmarking MONO %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
385 height, pixel_count, adaptive_iterations);
386
387 // Benchmark scalar using image-based API
388 ensure_default_palette_ready();
389 double start_mono = get_time_seconds();
390 for (int i = 0; i < adaptive_iterations; i++) {
391 char *result_str = image_print(test_image, DEFAULT_ASCII_PALETTE);
392 if (result_str)
393 SAFE_FREE(result_str);
394 }
395 result.scalar_time = (get_time_seconds() - start_mono) / adaptive_iterations;
396
397#if SIMD_SUPPORT_SSE2
398 // Benchmark SSE2 using new image-based timing function
399 // Benchmark SSE2 monochrome rendering
400 double start_sse2 = get_time_seconds();
401 for (int i = 0; i < adaptive_iterations; i++) {
402 char *result_str = render_ascii_image_monochrome_sse2(test_image, DEFAULT_ASCII_PALETTE);
403 if (result_str)
404 SAFE_FREE(result_str);
405 }
406 result.sse2_time = (get_time_seconds() - start_sse2) / adaptive_iterations;
407#endif
408
409#if SIMD_SUPPORT_SSSE3
410 // Benchmark SSSE3 using new image-based timing function
411 // Benchmark SSSE3 monochrome rendering
412 double start_ssse3 = get_time_seconds();
413 for (int i = 0; i < adaptive_iterations; i++) {
414 char *result_str = render_ascii_image_monochrome_ssse3(test_image, DEFAULT_ASCII_PALETTE);
415 if (result_str)
416 SAFE_FREE(result_str);
417 }
418 result.ssse3_time = (get_time_seconds() - start_ssse3) / adaptive_iterations;
419#endif
420
421#if SIMD_SUPPORT_AVX2
422 // Benchmark AVX2 using optimized single-pass implementation
423 // Benchmark AVX2 monochrome rendering
424 double start_avx2 = get_time_seconds();
425 for (int i = 0; i < adaptive_iterations; i++) {
426 char *result_str = render_ascii_image_monochrome_avx2(test_image, DEFAULT_ASCII_PALETTE);
427 if (result_str)
428 SAFE_FREE(result_str);
429 }
430 result.avx2_time = (get_time_seconds() - start_avx2) / adaptive_iterations;
431#endif
432
433#if SIMD_SUPPORT_NEON
434 // Benchmark NEON using new image-based timing function
435 // TODO: Update benchmark to use custom palette testing
436 // Benchmark NEON monochrome rendering
437 double start_neon = get_time_seconds();
438 for (int i = 0; i < adaptive_iterations; i++) {
439 char *result_str = render_ascii_image_monochrome_neon(test_image, DEFAULT_ASCII_PALETTE);
440 if (result_str)
441 SAFE_FREE(result_str);
442 }
443 result.neon_time = (get_time_seconds() - start_neon) / adaptive_iterations;
444#endif
445
446#if SIMD_SUPPORT_SVE
447 // SVE benchmarking disabled - function removed
448 result.sve_time = 0.0;
449#endif
450
451 // Find best method
452 double best_time = result.scalar_time;
453 result.best_method = "scalar";
454
455#if SIMD_SUPPORT_SSE2
456 if (result.sse2_time > 0 && result.sse2_time < best_time) {
457 best_time = result.sse2_time;
458 result.best_method = "SSE2";
459 }
460#endif
461
462#if SIMD_SUPPORT_SSSE3
463 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
464 best_time = result.ssse3_time;
465 result.best_method = "SSSE3";
466 }
467#endif
468
469#if SIMD_SUPPORT_AVX2
470 if (result.avx2_time > 0 && result.avx2_time < best_time) {
471 best_time = result.avx2_time;
472 result.best_method = "AVX2";
473 }
474#endif
475
476#if SIMD_SUPPORT_NEON
477 if (result.neon_time > 0 && result.neon_time < best_time) {
478 best_time = result.neon_time;
479 result.best_method = "NEON";
480 }
481#endif
482
483 result.speedup_best = result.scalar_time / best_time;
484
485#if SIMD_SUPPORT_SVE
486 if (result.sve_time > 0 && result.sve_time < best_time) {
487 best_time = result.sve_time;
488 result.best_method = "SVE";
489 }
490#endif
491
492 // Cleanup
493 image_destroy(test_image);
494 SAFE_FREE(test_pixels);
495 SAFE_FREE(output_buffer);
496
497 return result;
498}
499
500simd_benchmark_t benchmark_simd_color_conversion(int width, int height, int iterations, bool background_mode) {
501 simd_benchmark_t result = {0};
502
503 // Check for integer overflow in pixel count calculation
504 size_t pixel_count;
505 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
506 log_error("Image dimensions %d x %d too large (overflow)", width, height);
507 return result;
508 }
509
510 // Estimate output buffer size for colored ASCII (much larger than monochrome)
511 // Each pixel can generate ~25 bytes of ANSI escape codes + 1 char
512 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10; // Extra for newlines/reset codes
513
514 // Generate test data and test image for unified functions
515 rgb_pixel_t *test_pixels;
516 char *output_buffer;
517 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
518 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
519
520 // Create test image for new unified functions
521 image_t *frame = image_new(width, height);
522 if (!frame) {
523 SAFE_FREE(test_pixels);
524 SAFE_FREE(output_buffer);
525 return result;
526 }
527
528 // Use synthetic gradient data for consistent cross-platform benchmarking
529 printf("Using coherent gradient data for realistic color testing\n");
530 // NOLINTNEXTLINE(bugprone-random-generator-seed)
531 srand(12345); // For consistent gradient variation across runs
532 for (size_t i = 0; i < pixel_count; i++) {
533 int x = i % width;
534 int y = i / width;
535 // Create smooth gradients with some variation (mimics real images)
536 int base_r = (x * 255) / width;
537 int base_g = (y * 255) / height;
538 int base_b = ((x + y) * 127) / (width + height);
539
540 // Add realistic variation
541 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
542 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
543 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
544
545 test_pixels[i].r = clamp_rgb(temp_r);
546 test_pixels[i].g = clamp_rgb(temp_g);
547 test_pixels[i].b = clamp_rgb(temp_b);
548 }
549
550 // Populate test image with same data as test_pixels
551 frame->pixels = test_pixels;
552
553 const char *mode_str = background_mode ? "background" : "foreground";
554 printf("Benchmarking COLOR %s %dx%d (%zu pixels) x %d iterations...\n", mode_str, width, height, pixel_count,
555 iterations);
556
557 // Benchmark scalar color version
558 double start = get_time_seconds();
559 for (int i = 0; i < iterations; i++) {
560 char *result_str = image_print_color(frame, DEFAULT_ASCII_PALETTE);
561 if (result_str)
562 SAFE_FREE(result_str);
563 }
564 result.scalar_time = get_time_seconds() - start;
565
566#if SIMD_SUPPORT_SSE2
567 // Benchmark SSE2 color using unified function
568 start = get_time_seconds();
569 for (int i = 0; i < iterations; i++) {
570 char *ascii_output = render_ascii_sse2_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
571 if (ascii_output)
572 SAFE_FREE(ascii_output);
573 }
574 result.sse2_time = get_time_seconds() - start;
575#endif
576
577#if SIMD_SUPPORT_SSSE3
578 // Benchmark SSSE3 color using unified function
579 start = get_time_seconds();
580 for (int i = 0; i < iterations; i++) {
581 char *ascii_output = render_ascii_ssse3_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
582 if (ascii_output)
583 SAFE_FREE(ascii_output);
584 }
585 result.ssse3_time = get_time_seconds() - start;
586#endif
587
588#if SIMD_SUPPORT_AVX2
589 // Benchmark AVX2 color using unified function
590 start = get_time_seconds();
591 for (int i = 0; i < iterations; i++) {
592 char *ascii_output = render_ascii_avx2_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
593 if (ascii_output)
594 SAFE_FREE(ascii_output);
595 }
596 result.avx2_time = get_time_seconds() - start;
597#endif
598
599#if SIMD_SUPPORT_NEON
600 // Benchmark NEON color
601 start = get_time_seconds();
602 for (int i = 0; i < iterations; i++) {
603 // Create temporary image for unified function
604 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
605 char *ascii_output = render_ascii_neon_unified_optimized(&temp_image, background_mode, true, DEFAULT_ASCII_PALETTE);
606 if (ascii_output)
607 SAFE_FREE(ascii_output);
608 }
609 result.neon_time = get_time_seconds() - start;
610#endif
611
612 // Find best method
613 double best_time = result.scalar_time;
614 result.best_method = "scalar";
615
616#if SIMD_SUPPORT_SSE2
617 if (result.sse2_time > 0 && result.sse2_time < best_time) {
618 best_time = result.sse2_time;
619 result.best_method = "SSE2";
620 }
621#endif
622
623#if SIMD_SUPPORT_SSSE3
624 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
625 best_time = result.ssse3_time;
626 result.best_method = "SSSE3";
627 }
628#endif
629
630#if SIMD_SUPPORT_AVX2
631 if (result.avx2_time > 0 && result.avx2_time < best_time) {
632 best_time = result.avx2_time;
633 result.best_method = "AVX2";
634 }
635#endif
636
637#if SIMD_SUPPORT_NEON
638 if (result.neon_time > 0 && result.neon_time < best_time) {
639 best_time = result.neon_time;
640 result.best_method = "NEON";
641 }
642#endif
643
644 result.speedup_best = result.scalar_time / best_time;
645
646 // Cleanup - frame owns test_pixels now
647 frame->pixels = NULL; // Don't double-free
648 image_destroy(frame);
649 SAFE_FREE(test_pixels);
650 SAFE_FREE(output_buffer);
651
652 return result;
653}
654
655// Enhanced benchmark function with image source support
656simd_benchmark_t benchmark_simd_conversion_with_source(int width, int height, int iterations, bool background_mode,
657 const image_t *source_image, bool use_256color) {
658 simd_benchmark_t result = {0};
659 (void)background_mode; // Suppress unused parameter warning
660 (void)use_256color; // Suppress unused parameter warning
661
662 // Check for integer overflow in pixel count calculation
663 size_t pixel_count;
664 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
665 log_error("Image dimensions %d x %d too large (overflow)", width, height);
666 return result;
667 }
668
669 // Generate test data
670 rgb_pixel_t *test_pixels;
671 char *output_buffer;
672 const size_t output_buffer_size = pixel_count * 16;
673 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
674 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
675
676 if (source_image && source_image->pixels) {
677 printf("Using provided image data (%dx%d) for testing\n", source_image->w, source_image->h);
678
679 // Resize source image to test dimensions if needed
680 if (source_image->w == width && source_image->h == height) {
681 // Direct copy
682 for (size_t i = 0; i < pixel_count; i++) {
683 test_pixels[i].r = source_image->pixels[i].r;
684 test_pixels[i].g = source_image->pixels[i].g;
685 test_pixels[i].b = source_image->pixels[i].b;
686 }
687 } else {
688 // Simple nearest-neighbor resize
689 for (int y = 0; y < height; y++) {
690 for (int x = 0; x < width; x++) {
691 int src_x = (x * source_image->w) / width;
692 int src_y = (y * source_image->h) / height;
693 // Use size_t for index calculations to prevent integer overflow
694 size_t src_idx = (size_t)src_y * (size_t)source_image->w + (size_t)src_x;
695 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
696
697 if (src_idx < (size_t)source_image->w * (size_t)source_image->h) {
698 test_pixels[dst_idx].r = source_image->pixels[src_idx].r;
699 test_pixels[dst_idx].g = source_image->pixels[src_idx].g;
700 test_pixels[dst_idx].b = source_image->pixels[src_idx].b;
701 }
702 }
703 }
704 printf("Resized image data from %dx%d to %dx%d\n", source_image->w, source_image->h, width, height);
705 }
706 } else {
707 // Fall back to synthetic gradient data
708 printf("No source image provided, using synthetic gradient data\n");
709 srand(12345); // NOLINT(cert-msc32-c,cert-msc51-cpp,bugprone-random-generator-seed)
710 for (size_t i = 0; i < pixel_count; i++) {
711 int x = i % width;
712 int y = i / width;
713 int base_r = (x * 255 / width);
714 int base_g = (y * 255 / height);
715 int base_b = ((x + y) * 127 / (width + height));
716
717 int temp_r = base_r + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
718 int temp_g = base_g + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
719 int temp_b = base_b + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
720
721 test_pixels[i].r = clamp_rgb(temp_r);
722 test_pixels[i].g = clamp_rgb(temp_g);
723 test_pixels[i].b = clamp_rgb(temp_b);
724 }
725 }
726
727 // Calculate adaptive iterations for reliable timing
728 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
729 printf("Benchmarking %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
730 height, pixel_count, adaptive_iterations);
731
732 // Benchmark all available SIMD variants using unified image-based API
733 image_t *frame = image_new(width, height);
734 memcpy(frame->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
735
736 // Benchmark scalar using color conversion
737 ensure_default_palette_ready();
738 double start_scalar = get_time_seconds();
739 for (int i = 0; i < iterations; i++) {
740 char *result_str = image_print_color(frame, DEFAULT_ASCII_PALETTE);
741 if (result_str)
742 SAFE_FREE(result_str);
743 }
744 result.scalar_time = (get_time_seconds() - start_scalar) / iterations;
745
746#if SIMD_SUPPORT_SSE2
747 // Benchmark SSE2 using unified optimized renderer
748 // Benchmark SSE2 color rendering
749 ensure_default_palette_ready();
750 double start_sse2_color = get_time_seconds();
751 for (int i = 0; i < iterations; i++) {
752 char *result_str = render_ascii_sse2_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
753 if (result_str)
754 SAFE_FREE(result_str);
755 }
756 result.sse2_time = (get_time_seconds() - start_sse2_color) / iterations;
757#endif
758
759#if SIMD_SUPPORT_SSSE3
760 // Benchmark SSSE3 using unified optimized renderer
761 // Benchmark SSSE3 color rendering
762 ensure_default_palette_ready();
763 double start_ssse3_color = get_time_seconds();
764 for (int i = 0; i < iterations; i++) {
765 char *result_str =
766 render_ascii_ssse3_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
767 if (result_str)
768 SAFE_FREE(result_str);
769 }
770 result.ssse3_time = (get_time_seconds() - start_ssse3_color) / iterations;
771#endif
772
773#if SIMD_SUPPORT_AVX2
774 // Benchmark AVX2 using unified optimized renderer
775 // Benchmark AVX2 color rendering
776 ensure_default_palette_ready();
777 double start_avx2_color = get_time_seconds();
778 for (int i = 0; i < iterations; i++) {
779 char *result_str = render_ascii_avx2_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
780 if (result_str)
781 SAFE_FREE(result_str);
782 }
783 result.avx2_time = (get_time_seconds() - start_avx2_color) / iterations;
784#endif
785
786#if SIMD_SUPPORT_NEON
787 // Benchmark NEON using unified optimized renderer
788 // Benchmark NEON color rendering
789 ensure_default_palette_ready();
790 double start_neon_color = get_time_seconds();
791 for (int i = 0; i < iterations; i++) {
792 char *result_str = render_ascii_neon_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
793 if (result_str)
794 SAFE_FREE(result_str);
795 }
796 result.neon_time = (get_time_seconds() - start_neon_color) / iterations;
797#endif
798
799#if SIMD_SUPPORT_SVE
800 // Benchmark SVE using unified optimized renderer
801 // Benchmark SVE color rendering
802 ensure_default_palette_ready();
803 double start_sve_color = get_time_seconds();
804 for (int i = 0; i < iterations; i++) {
805 char *result_str = render_ascii_sve_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
806 if (result_str)
807 SAFE_FREE(result_str);
808 }
809 result.sve_time = (get_time_seconds() - start_sve_color) / iterations;
810#endif
811
812 // Find best method
813 double best_time = result.scalar_time;
814 result.best_method = "scalar";
815
816#if SIMD_SUPPORT_SSE2
817 if (result.sse2_time > 0 && result.sse2_time < best_time) {
818 best_time = result.sse2_time;
819 result.best_method = "SSE2";
820 }
821#endif
822
823#if SIMD_SUPPORT_SSSE3
824 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
825 best_time = result.ssse3_time;
826 result.best_method = "SSSE3";
827 }
828#endif
829
830#if SIMD_SUPPORT_AVX2
831 if (result.avx2_time > 0 && result.avx2_time < best_time) {
832 best_time = result.avx2_time;
833 result.best_method = "AVX2";
834 }
835#endif
836
837#if SIMD_SUPPORT_NEON
838 if (result.neon_time > 0 && result.neon_time < best_time) {
839 best_time = result.neon_time;
840 result.best_method = "NEON";
841 }
842#endif
843
844 result.speedup_best = result.scalar_time / best_time;
845
846#if SIMD_SUPPORT_SVE
847 if (result.sve_time > 0 && result.sve_time < best_time) {
848 best_time = result.sve_time;
849 result.best_method = "SVE";
850 }
851#endif
852
853 image_destroy(frame);
854 SAFE_FREE(test_pixels);
855 SAFE_FREE(output_buffer);
856
857 return result;
858}
859
860// Enhanced color benchmark function with image source support
862 int __attribute__((unused)) iterations,
863 bool background_mode, const image_t *source_image,
864 bool use_256color) {
865 simd_benchmark_t result = {0};
866 (void)use_256color; // Suppress unused parameter warning
867
868 // Check for integer overflow in pixel count calculation
869 size_t pixel_count;
870 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
871 log_error("Image dimensions %d x %d too large (overflow)", width, height);
872 return result;
873 }
874
875 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
876
877 // Allocate buffers for benchmarking
878 rgb_pixel_t *test_pixels;
879 char *output_buffer;
880 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
881 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
882
883 // Calculate adaptive iterations for color benchmarking (ignore passed iterations)
884 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
885
886 const char *mode_str = background_mode ? "background" : "foreground";
887
888 // Variables for webcam capture cleanup
889
890 if (source_image) {
891 printf("Using provided source image data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
892 height, adaptive_iterations);
893
894 // Use provided source image - resize if needed
895 if (source_image->w == width && source_image->h == height) {
896 // Direct copy
897 for (size_t i = 0; i < pixel_count; i++) {
898 test_pixels[i].r = source_image->pixels[i].r;
899 test_pixels[i].g = source_image->pixels[i].g;
900 test_pixels[i].b = source_image->pixels[i].b;
901 }
902 } else {
903 // Resize source image to target dimensions
904 float x_ratio = (float)source_image->w / width;
905 float y_ratio = (float)source_image->h / height;
906
907 for (int y = 0; y < height; y++) {
908 for (int x = 0; x < width; x++) {
909 int src_x = (int)(x * x_ratio);
910 int src_y = (int)(y * y_ratio);
911
912 // Bounds check
913 if (src_x >= source_image->w)
914 src_x = source_image->w - 1;
915 if (src_y >= source_image->h)
916 src_y = source_image->h - 1;
917
918 // Use size_t for index calculations to prevent integer overflow
919 size_t src_idx = (size_t)src_y * (size_t)source_image->w + (size_t)src_x;
920 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
921
922 test_pixels[dst_idx].r = source_image->pixels[src_idx].r;
923 test_pixels[dst_idx].g = source_image->pixels[src_idx].g;
924 test_pixels[dst_idx].b = source_image->pixels[src_idx].b;
925 }
926 }
927 }
928 } else {
929 // No source image provided: use synthetic gradient data for consistent testing
930 printf("Using synthetic gradient data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
931 height, adaptive_iterations);
932
933 // NOLINTNEXTLINE(bugprone-random-generator-seed)
934 srand(12345); // Consistent results across runs
935 for (size_t i = 0; i < pixel_count; i++) {
936 int x = i % width;
937 int y = i / width;
938 int base_r = (x * 255) / width;
939 int base_g = (y * 255) / height;
940 int base_b = ((x + y) * 127) / (width + height);
941
942 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
943 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
944 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
945
946 test_pixels[i].r = clamp_rgb(temp_r);
947 test_pixels[i].g = clamp_rgb(temp_g);
948 test_pixels[i].b = clamp_rgb(temp_b);
949 }
950 }
951
952 printf("Benchmarking COLOR %s conversion using %d iterations...\n", mode_str, adaptive_iterations);
953
954 // FIX #5: Prewarm 256-color caches to avoid first-frame penalty (~1.5-2MB cache build)
955 prewarm_sgr256_fg_cache(); // Warmup 256-entry FG cache
956 prewarm_sgr256_cache(); // Warmup 65,536-entry FG+BG cache
957
958 // Benchmark scalar color conversion (pure conversion, no I/O)
959 double start = get_time_seconds();
960 for (int i = 0; i < adaptive_iterations; i++) {
961 image_t *test_image = image_new(width, height);
962 if (test_image == NULL) {
963 SAFE_FREE(test_pixels);
964 SAFE_FREE(output_buffer);
965 FATAL(ERROR_MEMORY, "Failed to allocate test_image in benchmark iteration %d", i);
966 }
967 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
968 char *result_ascii = ascii_convert(test_image, width, height, false, false, false, DEFAULT_ASCII_PALETTE,
970 if (result_ascii)
971 SAFE_FREE(result_ascii);
972 image_destroy(test_image);
973 }
974 result.scalar_time = get_time_seconds() - start;
975
976 // Find best method -- default to scalar and let simd beat it.
977 double best_time = result.scalar_time;
978 result.best_method = "scalar";
979
980#if SIMD_SUPPORT_SSE2
981 start = get_time_seconds();
982 for (int i = 0; i < adaptive_iterations; i++) {
983 image_t *test_image = image_new(width, height);
984 if (test_image) {
985 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
986 char *result_str =
987 render_ascii_sse2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
988 if (result_str)
989 SAFE_FREE(result_str);
990 image_destroy(test_image);
991 }
992 }
993 result.sse2_time = get_time_seconds() - start;
994#endif
995
996#if SIMD_SUPPORT_SSSE3
997 start = get_time_seconds();
998 for (int i = 0; i < adaptive_iterations; i++) {
999 image_t *test_image = image_new(width, height);
1000 if (test_image) {
1001 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1002 char *result_str =
1003 render_ascii_ssse3_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1004 if (result_str)
1005 SAFE_FREE(result_str);
1006 image_destroy(test_image);
1007 }
1008 }
1009 result.ssse3_time = get_time_seconds() - start;
1010#endif
1011
1012#if SIMD_SUPPORT_AVX2
1013 start = get_time_seconds();
1014 for (int i = 0; i < adaptive_iterations; i++) {
1015 image_t *test_image = image_new(width, height);
1016 if (test_image) {
1017 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1018 char *result_str =
1019 render_ascii_avx2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1020 if (result_str)
1021 SAFE_FREE(result_str);
1022 image_destroy(test_image);
1023 }
1024 }
1025 result.avx2_time = get_time_seconds() - start;
1026#endif
1027
1028#if SIMD_SUPPORT_NEON
1029 start = get_time_seconds();
1030 for (int i = 0; i < adaptive_iterations; i++) {
1031 // Create temporary image for unified function
1032 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1033 char *result =
1034 render_ascii_neon_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1035 if (result)
1036 SAFE_FREE(result);
1037 }
1038 result.neon_time = get_time_seconds() - start;
1039#endif
1040
1041#if SIMD_SUPPORT_SVE
1042 start = get_time_seconds();
1043 for (int i = 0; i < adaptive_iterations; i++) {
1044 // Create temporary image for unified function
1045 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1046 char *result =
1047 render_ascii_sve_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1048 if (result)
1049 SAFE_FREE(result);
1050 }
1051 result.sve_time = get_time_seconds() - start;
1052#endif
1053
1054#if SIMD_SUPPORT_SSE2
1055 if (result.sse2_time > 0 && result.sse2_time < best_time) {
1056 best_time = result.sse2_time;
1057 result.best_method = "SSE2";
1058 }
1059#endif
1060
1061#if SIMD_SUPPORT_SSSE3
1062 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
1063 best_time = result.ssse3_time;
1064 result.best_method = "SSSE3";
1065 }
1066#endif
1067
1068#if SIMD_SUPPORT_AVX2
1069 if (result.avx2_time > 0 && result.avx2_time < best_time) {
1070 best_time = result.avx2_time;
1071 result.best_method = "AVX2";
1072 }
1073#endif
1074
1075#if SIMD_SUPPORT_NEON
1076 if (result.neon_time > 0 && result.neon_time < best_time) {
1077 best_time = result.neon_time;
1078 result.best_method = "NEON";
1079 }
1080#endif
1081
1082 // Normalize timing results by iteration count to get per-frame times
1083 result.scalar_time /= adaptive_iterations;
1084 if (result.sse2_time > 0)
1085 result.sse2_time /= adaptive_iterations;
1086 if (result.ssse3_time > 0)
1087 result.ssse3_time /= adaptive_iterations;
1088 if (result.avx2_time > 0)
1089 result.avx2_time /= adaptive_iterations;
1090 if (result.neon_time > 0)
1091 result.neon_time /= adaptive_iterations;
1092 // Recalculate best time after normalization
1093 best_time = result.scalar_time;
1094
1095#if SIMD_SUPPORT_SSE2
1096 if (result.sse2_time > 0 && result.sse2_time < best_time)
1097 best_time = result.sse2_time;
1098#endif
1099#if SIMD_SUPPORT_SSSE3
1100 if (result.ssse3_time > 0 && result.ssse3_time < best_time)
1101 best_time = result.ssse3_time;
1102#endif
1103#if SIMD_SUPPORT_AVX2
1104 if (result.avx2_time > 0 && result.avx2_time < best_time)
1105 best_time = result.avx2_time;
1106#endif
1107#if SIMD_SUPPORT_NEON
1108 if (result.neon_time > 0 && result.neon_time < best_time)
1109 best_time = result.neon_time;
1110#endif
1111#if SIMD_SUPPORT_SVE
1112 if (result.sve_time > 0 && result.sve_time < best_time)
1113 best_time = result.sve_time;
1114#endif
1115
1116 result.speedup_best = result.scalar_time / best_time;
1117
1118 printf("------------\n");
1119 printf("scalar: %f\n", result.scalar_time);
1120 if (result.sse2_time > 0)
1121 printf("SSE2: %f\n", result.sse2_time);
1122 if (result.ssse3_time > 0)
1123 printf("SSSE3: %f\n", result.ssse3_time);
1124 if (result.avx2_time > 0)
1125 printf("avx2: %f\n", result.avx2_time);
1126 if (result.neon_time > 0)
1127 printf("neon: %f\n", result.neon_time);
1128 if (result.sve_time > 0)
1129 printf("sve: %f\n", result.sve_time);
1130 printf("Best method: %s, time: %f (%.2fx speedup (<1.0 = bad))\n", result.best_method, best_time,
1131 result.speedup_best);
1132 printf("------------\n");
1133
1134 // Frame data already cleaned up in webcam capture section
1135 SAFE_FREE(test_pixels);
1136 SAFE_FREE(output_buffer);
1137
1138 return result;
1139}
🔌 Cross-platform abstraction layer umbrella header for ascii-chat
simd_benchmark_t benchmark_simd_conversion(int width, int height, int __attribute__((unused)) iterations)
Definition ascii_simd.c:334
simd_benchmark_t benchmark_simd_color_conversion_with_source(int width, int height, int __attribute__((unused)) iterations, bool background_mode, const image_t *source_image, bool use_256color)
Definition ascii_simd.c:861
SIMD-optimized ASCII conversion interface.
AVX2-optimized ASCII rendering functions.
#define SAFE_REALLOC(ptr, size, cast)
Definition common.h:228
unsigned int uint32_t
Definition common.h:58
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_MALLOC(size, cast)
Definition common.h:208
#define SAFE_CALLOC_SIMD(count, size, cast)
Definition common.h:311
#define FATAL(code,...)
Exit with error code and custom message, with stack trace in debug builds.
Definition common.h:151
unsigned char uint8_t
Definition common.h:56
@ ERROR_MEMORY
Definition error_codes.h:53
@ ASCIICHAT_OK
Definition error_codes.h:48
#define log_error(...)
Log an ERROR message.
const size_t DEFAULT_ASCII_PALETTE_LEN
Length of default ASCII palette.
Definition palette.c:49
const char DEFAULT_ASCII_PALETTE[]
Default ASCII palette for legacy functions.
Definition palette.c:48
simd_benchmark_t benchmark_simd_color_conversion(int width, int height, int iterations, bool background_mode)
Benchmark SIMD color conversion methods.
Definition ascii_simd.c:500
global_dec3_cache_t g_dec3_cache
Global decimal cache instance.
Definition ascii_simd.c:23
#define LUMA_BLUE
Luminance blue coefficient (0.114 * 256 = 29)
Definition ascii_simd.h:76
#define LUMA_GREEN
Luminance green coefficient (0.587 * 256 = 150)
Definition ascii_simd.h:74
dec3_t dec3_table[256]
Definition ascii_simd.h:98
char * image_print(const image_t *p, const char *palette)
Print image as ASCII art (monochrome)
const char * best_method
Definition ascii_simd.h:279
char * convert_pixels_scalar_with_newlines(image_t *image, const char luminance_palette[256])
Convert image to ASCII with newlines (scalar fallback)
Definition ascii_simd.c:182
void init_dec3(void)
Initialize decimal lookup table.
Definition ascii_simd.c:58
void str_reserve(Str *s, size_t need)
Reserve space in string buffer.
Definition ascii_simd.c:120
void str_printf(Str *s, const char *fmt,...)
Append formatted string to buffer.
Definition ascii_simd.c:141
uint8_t len
Definition ascii_simd.h:88
void str_free(Str *s)
Free string buffer.
Definition ascii_simd.c:114
void str_append_c(Str *s, char c)
Append character to string buffer.
Definition ascii_simd.c:136
void str_init(Str *s)
Initialize string buffer.
Definition ascii_simd.c:108
void ob_term(outbuf_t *ob)
Append null terminator to buffer.
char * image_print_color(const image_t *p, const char *palette)
Print image as ASCII art with color.
size_t write_rgb_triplet(uint8_t value, char *dst)
Write decimal RGB triplet using dec3 cache.
Definition ascii_simd.c:26
void prewarm_sgr256_cache(void)
Prewarm 256-color foreground/background cache for benchmarks.
void ob_putc(outbuf_t *ob, char c)
Append a character to buffer.
void print_simd_capabilities(void)
Print detected SIMD capabilities.
Definition ascii_simd.c:276
simd_benchmark_t benchmark_simd_conversion_with_source(int width, int height, int iterations, bool background_mode, const image_t *source_image, bool use_256color)
Benchmark SIMD conversion with source image.
Definition ascii_simd.c:656
bool rep_is_profitable(uint32_t runlen)
Check if run-length encoding is profitable.
void convert_pixels_scalar(const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256])
Convert pixels to ASCII (scalar fallback)
Definition ascii_simd.c:167
char g_default_luminance_palette[256]
Default luminance palette (256 characters)
Definition ascii_simd.c:33
void emit_rep(outbuf_t *ob, uint32_t extra)
Emit run-length encoded sequence.
void prewarm_sgr256_fg_cache(void)
Prewarm 256-color foreground cache for benchmarks.
ImageRGB alloc_image(int w, int h)
Allocate a new ImageRGB (RGB8 format)
Definition ascii_simd.c:98
size_t len
Definition ascii_simd.h:146
uint8_t * pixels
Definition ascii_simd.h:228
char s[3]
Definition ascii_simd.h:89
size_t cap
Definition ascii_simd.h:147
char * image_print_simd(image_t *image, const char *ascii_chars)
Print image as ASCII using SIMD (monochrome)
Definition ascii_simd.c:252
void init_default_luminance_palette(void)
Initialize default luminance palette.
Definition ascii_simd.c:37
char * ascii_convert(image_t *original, const ssize_t width, const ssize_t height, const bool color, const bool _aspect_ratio, const bool stretch, const char *palette_chars, const char luminance_palette[256])
Convert image to ASCII art.
Definition ascii.c:67
char * data
Definition ascii_simd.h:145
void str_append_bytes(Str *s, const void *src, size_t n)
Append bytes to string buffer.
Definition ascii_simd.c:130
void ascii_simd_init(void)
Initialize SIMD subsystem.
Definition ascii_simd.c:90
void image_destroy(image_t *p)
Destroy an image allocated with image_new()
Definition video/image.c:85
#define LUMA_RED
Luminance red coefficient (0.299 * 256 = 77)
Definition ascii_simd.h:72
image_t * image_new(size_t width, size_t height)
Create a new image with standard allocation.
Definition video/image.c:36
@ IMAGE_ALLOC_SIMD
Pixels allocated with SAFE_MALLOC_SIMD()
🔢 Mathematical Utility Functions
Dynamic Output Buffer with ANSI Sequence Support.
✅ Safe Integer Arithmetic and Overflow Detection
ASCII Palette Management for Video-to-ASCII Conversion.
ImageRGB structure for NEON renderers.
Definition ascii_simd.h:225
Dynamic string buffer structure.
Definition ascii_simd.h:144
RGB pixel structure.
Definition video/image.h:80
Decimal conversion cache structure (1-3 digits)
Definition ascii_simd.h:87
Global decimal cache for digit conversion.
Definition ascii_simd.h:97
Image structure.
int w
Image width in pixels (must be > 0)
int h
Image height in pixels (must be > 0)
rgb_pixel_t * pixels
Pixel data array (width * height RGB pixels, row-major order)
Dynamic output buffer (auto-expanding)
size_t cap
Buffer capacity in bytes (maximum length before reallocation)
char * buf
Buffer pointer (allocated, owned by caller, must be freed)
SIMD benchmark results structure.
Definition ascii_simd.h:271
⏱️ High-precision timing utilities using sokol_time.h and uthash
Common SIMD utilities and structures.