ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
ascii_simd.c
Go to the documentation of this file.
1
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <time.h>
11#include <stdarg.h>
12
13#include <ascii-chat/platform/abstraction.h>
14#include <ascii-chat/platform/init.h>
15#include <ascii-chat/common.h>
16#include <ascii-chat/video/simd/ascii_simd.h>
17#include <ascii-chat/video/palette.h>
18#include <ascii-chat/video/ascii.h>
19#include <ascii-chat/video/output_buffer.h>
20#include <ascii-chat/video/simd/avx2.h>
21#include <ascii-chat/util/math.h>
22#include <ascii-chat/util/overflow.h>
23#include <ascii-chat/util/time.h>
24
25global_dec3_cache_t g_dec3_cache = {.dec3_initialized = false};
26
27// Helper: write decimal RGB triplet using dec3 cache
28size_t write_rgb_triplet(uint8_t value, char *dst) {
29 const dec3_t *d = &g_dec3_cache.dec3_table[value];
30 memcpy(dst, d->s, d->len);
31 return d->len;
32}
33
34// Default luminance palette for legacy functions
36static bool g_default_palette_initialized = false;
37static static_mutex_t g_default_palette_mutex = STATIC_MUTEX_INIT;
38
39// Initialize default luminance palette (thread-safe with mutex protection)
41 static_mutex_lock(&g_default_palette_mutex);
42
43 if (g_default_palette_initialized) {
44 static_mutex_unlock(&g_default_palette_mutex);
45 return;
46 }
47
48 // Build default luminance mapping using standard palette
49 const size_t len = DEFAULT_ASCII_PALETTE_LEN;
50 for (int i = 0; i < 256; i++) {
51 size_t palette_index = (i * (len - 1) + 127) / 255;
52 if (palette_index >= len) {
53 palette_index = len - 1;
54 }
56 }
57 g_default_palette_initialized = true;
58 static_mutex_unlock(&g_default_palette_mutex);
59}
60
61// Helper function for benchmarks and fallback cases
62static void ensure_default_palette_ready(void) {
64}
65
66void init_dec3(void) {
67 if (g_dec3_cache.dec3_initialized)
68 return;
69 for (int v = 0; v < 256; ++v) {
70 int d2 = v / 100; // 0..2
71 int r = v - d2 * 100; // 0..99
72 int d1 = r / 10; // 0..9
73 int d0 = r - d1 * 10; // 0..9
74
75 if (d2) {
76 g_dec3_cache.dec3_table[v].len = 3;
77 g_dec3_cache.dec3_table[v].s[0] = '0' + d2;
78 g_dec3_cache.dec3_table[v].s[1] = '0' + d1;
79 g_dec3_cache.dec3_table[v].s[2] = '0' + d0;
80 } else if (d1) {
81 g_dec3_cache.dec3_table[v].len = 2;
82 g_dec3_cache.dec3_table[v].s[0] = '0' + d1;
83 g_dec3_cache.dec3_table[v].s[1] = '0' + d0;
84 } else {
85 g_dec3_cache.dec3_table[v].len = 1;
86 g_dec3_cache.dec3_table[v].s[0] = '0' + d0;
87 }
88 }
89 g_dec3_cache.dec3_initialized = true;
90}
91
92// NOTE: Constructor disabled for musl static builds - causes hangs
93// __attribute__((constructor)) static void ascii_ctor(void) {
94// init_dec3();
95// init_default_luminance_palette();
96// }
97
98void ascii_simd_init(void) {
99 // Initialize SIMD lookup tables manually (constructor disabled for musl compatibility)
100 // Both init functions have guards to prevent double-initialization
101 init_dec3();
103}
104
105// Allocate a new image (RGB8), use SAFE_MALLOC for consistent error handling
106ImageRGB alloc_image(int w, int h) {
107 ImageRGB out;
108 out.w = w;
109 out.h = h;
110 size_t n = (size_t)w * (size_t)h * 3u;
111 out.pixels = SAFE_MALLOC(n, uint8_t *);
112 return out;
113}
114
115// String utility functions
116void str_init(Str *s) {
117 s->data = NULL;
118 s->len = 0;
119 s->cap = 0;
120}
121
122void str_destroy(Str *s) {
123 SAFE_FREE(s->data);
124 s->data = NULL;
125 s->len = s->cap = 0;
126}
127
128void str_reserve(Str *s, size_t need) {
129 if (need <= s->cap)
130 return;
131 size_t ncap = s->cap ? s->cap : 4096;
132 while (ncap < need)
133 ncap = (ncap * 3) / 2 + 64;
134 s->data = SAFE_REALLOC(s->data, ncap, char *);
135 s->cap = ncap;
136}
137
138void str_append_bytes(Str *s, const void *src, size_t n) {
139 str_reserve(s, s->len + n);
140 memcpy(s->data + s->len, src, n);
141 s->len += n;
142}
143
144void str_append_c(Str *s, char c) {
145 str_reserve(s, s->len + 1);
146 s->data[s->len++] = c;
147}
148
149void str_printf(Str *s, const char *fmt, ...) {
150 va_list ap;
151 va_start(ap, fmt);
152 char stackbuf[256];
153 int n = safe_vsnprintf(stackbuf, sizeof(stackbuf), fmt, ap);
154 va_end(ap);
155 if (n < 0)
156 return;
157 if ((size_t)n < sizeof(stackbuf)) {
158 str_append_bytes(s, stackbuf, (size_t)n);
159 return;
160 }
161 char *heap;
162 heap = SAFE_MALLOC((size_t)n + 1, char *);
163 va_start(ap, fmt);
164 safe_vsnprintf(heap, (size_t)n + 1, fmt, ap);
165 va_end(ap);
166 str_append_bytes(s, heap, (size_t)n);
167 SAFE_FREE(heap);
168}
169
170/* ============================================================================
171 * Scalar Implementation (Baseline)
172 * ============================================================================
173 */
174
175void convert_pixels_scalar(const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256]) {
176 for (int i = 0; i < count; i++) {
177 const rgb_pixel_t *p = &pixels[i];
178
179 // Calculate luminance using integer arithmetic
180 int luminance = (LUMA_RED * p->r + LUMA_GREEN * p->g + LUMA_BLUE * p->b) >> 8;
181
182 // Clamp to [0, 255]
183 if (luminance > 255)
184 luminance = 255;
185
186 ascii_chars[i] = luminance_palette[luminance];
187 }
188}
189
190char *convert_pixels_scalar_with_newlines(image_t *image, const char luminance_palette[256]) {
191 const int h = image->h;
192 const int w = image->w;
193
194 // Get UTF-8 character cache for RLE emission
195 // Note: We need to reverse-engineer the palette chars from luminance_palette
196 // For now, use a simpler approach with direct luminance lookup
197
198 // Use outbuf_t for efficient UTF-8 RLE emission (same as SIMD renderers)
199 outbuf_t ob = {0};
200 const size_t max_char_bytes = 4; // Max UTF-8 character size
201 ob.cap = (size_t)h * ((size_t)w * max_char_bytes + 1);
202 ob.buf = SAFE_MALLOC(ob.cap ? ob.cap : 1, char *);
203 if (!ob.buf) {
204 log_error("Failed to allocate output buffer for scalar rendering");
205 return NULL;
206 }
207
208 // Process pixels with RLE optimization
209 for (int y = 0; y < h; y++) {
210 const rgb_pixel_t *row_pixels = (const rgb_pixel_t *)&image->pixels[y * w];
211
212 for (int x = 0; x < w;) {
213 const rgb_pixel_t *p = &row_pixels[x];
214
215 // Calculate luminance using integer arithmetic
216 int luminance = (LUMA_RED * p->r + LUMA_GREEN * p->g + LUMA_BLUE * p->b) >> 8;
217 if (luminance > 255)
218 luminance = 255;
219
220 char current_char = luminance_palette[luminance];
221
222 // Find run length for same character (RLE optimization)
223 int j = x + 1;
224 while (j < w) {
225 const rgb_pixel_t *next_p = &row_pixels[j];
226 int next_luminance = (LUMA_RED * next_p->r + LUMA_GREEN * next_p->g + LUMA_BLUE * next_p->b) >> 8;
227 if (next_luminance > 255)
228 next_luminance = 255;
229 char next_char = luminance_palette[next_luminance];
230 if (next_char != current_char)
231 break;
232 j++;
233 }
234 uint32_t run = (uint32_t)(j - x);
235
236 // Emit character with RLE (same as SIMD)
237 ob_putc(&ob, current_char);
238 if (rep_is_profitable(run)) {
239 emit_rep(&ob, run - 1);
240 } else {
241 for (uint32_t k = 1; k < run; k++) {
242 ob_putc(&ob, current_char);
243 }
244 }
245 x = j;
246 }
247
248 // Add newline (except for last row)
249 if (y != h - 1) {
250 ob_putc(&ob, '\n');
251 }
252 }
253
254 ob_term(&ob);
255 return ob.buf;
256}
257
258// --------------------------------------
259// SIMD-convert an image into ASCII characters and return it with newlines
260char *image_print_simd(image_t *image, const char *ascii_chars) {
261#if SIMD_SUPPORT_AVX2
262 return render_ascii_image_monochrome_avx2(image, ascii_chars);
263#elif SIMD_SUPPORT_SSSE3
264 return render_ascii_image_monochrome_ssse3(image, ascii_chars);
265#elif SIMD_SUPPORT_SSE2
266 return render_ascii_image_monochrome_sse2(image, ascii_chars);
267#elif SIMD_SUPPORT_NEON
268 return render_ascii_image_monochrome_neon(image, ascii_chars);
269#else
270 // Fallback to scalar implementation - use image_print which properly handles
271 // the palette string (convert_pixels_scalar_with_newlines expects a 256-element
272 // luminance lookup table, not the raw palette string)
273 return image_print(image, ascii_chars);
274#endif
275}
276
277// NOTE: image_print_simd_with_palette is now redundant - use image_print_simd() directly
278
279/* ============================================================================
280 * Auto-dispatch and any helpers
281 * ============================================================================
282 */
283
285 printf("SIMD Support:\n");
286#if SIMD_SUPPORT_AVX2
287 printf(" ✓ AVX2 (32 pixels/cycle)\n");
288#endif
289#if SIMD_SUPPORT_NEON
290 printf(" ✓ ARM NEON (16 pixels/cycle)\n");
291#endif
292#if SIMD_SUPPORT_SVE
293 printf(" ✓ ARM SVE (scalable pixels/cycle)\n");
294#endif
295#if SIMD_SUPPORT_SSSE3
296 printf(" ✓ SSSE3 (16 pixels/cycle)\n");
297#endif
298#if SIMD_SUPPORT_SSE2
299 printf(" ✓ SSE2 (16 pixels/cycle)\n");
300#endif
301 printf(" ✓ Scalar fallback (1 pixel/cycle)\n");
302}
303
304/* ============================================================================
305 * Benchmarking
306 * ============================================================================
307 */
308
309// High-resolution adaptive timing for small workloads
310// Returns the number of iterations needed to achieve target_duration_ms minimum
311static int calculate_adaptive_iterations(int pixel_count, double target_duration_ms) {
312 (void)target_duration_ms; // Intentionally unused - uses fixed iteration counts based on pixel_count
313 // Base iterations: scale with image size for consistent measurement accuracy
314 int base_iterations = 100; // Minimum iterations for good statistics
315
316 // For very small images, use more iterations for better timing resolution
317 if (pixel_count < 5000) {
318 base_iterations = 100; // 80×24 = 1,920 pixels -> 100 iterations (was 1000 - too slow!)
319 } else if (pixel_count < 50000) {
320 base_iterations = 50; // 160×48 = 7,680 pixels -> 50 iterations
321 } else if (pixel_count < 200000) {
322 base_iterations = 20; // 320×240 = 76,800 pixels -> 20 iterations
323 } else if (pixel_count < 500000) {
324 base_iterations = 10; // 640×480 = 307,200 pixels -> 10 iterations
325 } else {
326 base_iterations = 5; // 1280×720 = 921,600 pixels -> 5 iterations
327 }
328
329 // Ensure we have at least the minimum for reliable timing
330 const int minimum_iterations = 10;
331 return (base_iterations > minimum_iterations) ? base_iterations : minimum_iterations;
332}
333
334simd_benchmark_t benchmark_simd_conversion(int width, int height, int iterations) {
335 (void)iterations; // Intentionally unused - uses adaptive iteration counts instead
336 simd_benchmark_t result = {0};
337
338 // Check for integer overflow in pixel count calculation
339 size_t pixel_count;
340 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
341 log_error("Image dimensions %d x %d too large (overflow)", width, height);
342 return result;
343 }
344
345 // Generate test data and test image
346 rgb_pixel_t *test_pixels;
347 char *output_buffer;
348 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
349 output_buffer = SAFE_MALLOC(pixel_count, char *);
350
351 // Create test image for new image-based functions
352 image_t *test_image = image_new(width, height);
353 if (!test_image) {
354 SAFE_FREE(test_pixels);
355 SAFE_FREE(output_buffer);
356 return result;
357 }
358
359 // Use synthetic data for consistent cross-platform testing
360 printf("Using synthetic gradient data for consistent benchmarking\n");
361 srand(12345); // Consistent results across runs // NOLINT(cert-msc32-c,cert-msc51-cpp,bugprone-random-generator-seed)
362 for (size_t i = 0; i < pixel_count; i++) {
363 int x = i % width;
364 int y = i / width;
365 // Create realistic gradient pattern with some variation
366 int base_r = (x * 255) / width;
367 int base_g = (y * 255) / height;
368 int base_b = ((x + y) * 127) / (width + height);
369
370 // Add small random variation to make it realistic
371 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
372 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
373 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
374
375 test_pixels[i].r = clamp_rgb(temp_r);
376 test_pixels[i].g = clamp_rgb(temp_g);
377 test_pixels[i].b = clamp_rgb(temp_b);
378 }
379
380 // Copy test data to test image pixels
381 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
382
383 // Calculate adaptive iterations for reliable timing
384 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
385 printf("Benchmarking MONO %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
386 height, pixel_count, adaptive_iterations);
387
388 // Benchmark scalar using image-based API
389 ensure_default_palette_ready();
390 double start_mono = time_ns_to_s(time_get_ns());
391 for (int i = 0; i < adaptive_iterations; i++) {
392 char *result_str = image_print(test_image, DEFAULT_ASCII_PALETTE);
393 if (result_str)
394 SAFE_FREE(result_str);
395 }
396 result.scalar_time = (time_ns_to_s(time_get_ns()) - start_mono) / adaptive_iterations;
397
398#if SIMD_SUPPORT_SSE2
399 // Benchmark SSE2 using new image-based timing function
400 // Benchmark SSE2 monochrome rendering
401 double start_sse2 = time_ns_to_s(time_get_ns());
402 for (int i = 0; i < adaptive_iterations; i++) {
403 char *result_str = render_ascii_image_monochrome_sse2(test_image, DEFAULT_ASCII_PALETTE);
404 if (result_str)
405 SAFE_FREE(result_str);
406 }
407 result.sse2_time = (time_ns_to_s(time_get_ns()) - start_sse2) / adaptive_iterations;
408#endif
409
410#if SIMD_SUPPORT_SSSE3
411 // Benchmark SSSE3 using new image-based timing function
412 // Benchmark SSSE3 monochrome rendering
413 double start_ssse3 = time_ns_to_s(time_get_ns());
414 for (int i = 0; i < adaptive_iterations; i++) {
415 char *result_str = render_ascii_image_monochrome_ssse3(test_image, DEFAULT_ASCII_PALETTE);
416 if (result_str)
417 SAFE_FREE(result_str);
418 }
419 result.ssse3_time = (time_ns_to_s(time_get_ns()) - start_ssse3) / adaptive_iterations;
420#endif
421
422#if SIMD_SUPPORT_AVX2
423 // Benchmark AVX2 using optimized single-pass implementation
424 // Benchmark AVX2 monochrome rendering
425 double start_avx2 = time_ns_to_s(time_get_ns());
426 for (int i = 0; i < adaptive_iterations; i++) {
427 char *result_str = render_ascii_image_monochrome_avx2(test_image, DEFAULT_ASCII_PALETTE);
428 if (result_str)
429 SAFE_FREE(result_str);
430 }
431 result.avx2_time = (time_ns_to_s(time_get_ns()) - start_avx2) / adaptive_iterations;
432#endif
433
434#if SIMD_SUPPORT_NEON
435 // Benchmark NEON using new image-based timing function
436 // TODO: Update benchmark to use custom palette testing
437 // Benchmark NEON monochrome rendering
438 double start_neon = time_ns_to_s(time_get_ns());
439 for (int i = 0; i < adaptive_iterations; i++) {
440 char *result_str = render_ascii_image_monochrome_neon(test_image, DEFAULT_ASCII_PALETTE);
441 if (result_str)
442 SAFE_FREE(result_str);
443 }
444 result.neon_time = (time_ns_to_s(time_get_ns()) - start_neon) / adaptive_iterations;
445#endif
446
447#if SIMD_SUPPORT_SVE
448 // SVE benchmarking disabled - function removed
449 result.sve_time = 0.0;
450#endif
451
452 // Find best method
453 double best_time = result.scalar_time;
454 result.best_method = "scalar";
455
456#if SIMD_SUPPORT_SSE2
457 if (result.sse2_time > 0 && result.sse2_time < best_time) {
458 best_time = result.sse2_time;
459 result.best_method = "SSE2";
460 }
461#endif
462
463#if SIMD_SUPPORT_SSSE3
464 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
465 best_time = result.ssse3_time;
466 result.best_method = "SSSE3";
467 }
468#endif
469
470#if SIMD_SUPPORT_AVX2
471 if (result.avx2_time > 0 && result.avx2_time < best_time) {
472 best_time = result.avx2_time;
473 result.best_method = "AVX2";
474 }
475#endif
476
477#if SIMD_SUPPORT_NEON
478 if (result.neon_time > 0 && result.neon_time < best_time) {
479 best_time = result.neon_time;
480 result.best_method = "NEON";
481 }
482#endif
483
484 result.speedup_best = result.scalar_time / best_time;
485
486#if SIMD_SUPPORT_SVE
487 if (result.sve_time > 0 && result.sve_time < best_time) {
488 best_time = result.sve_time;
489 result.best_method = "SVE";
490 }
491#endif
492
493 // Cleanup
494 image_destroy(test_image);
495 SAFE_FREE(test_pixels);
496 SAFE_FREE(output_buffer);
497
498 return result;
499}
500
501simd_benchmark_t benchmark_simd_color_conversion(int width, int height, int iterations, bool background_mode) {
502 simd_benchmark_t result = {0};
503
504 // Check for integer overflow in pixel count calculation
505 size_t pixel_count;
506 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
507 log_error("Image dimensions %d x %d too large (overflow)", width, height);
508 return result;
509 }
510
511 // Estimate output buffer size for colored ASCII (much larger than monochrome)
512 // Each pixel can generate ~25 bytes of ANSI escape codes + 1 char
513 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10; // Extra for newlines/reset codes
514
515 // Generate test data and test image for unified functions
516 rgb_pixel_t *test_pixels;
517 char *output_buffer;
518 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
519 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
520
521 // Create test image for new unified functions
522 image_t *frame = image_new(width, height);
523 if (!frame) {
524 SAFE_FREE(test_pixels);
525 SAFE_FREE(output_buffer);
526 return result;
527 }
528
529 // Use synthetic gradient data for consistent cross-platform benchmarking
530 printf("Using coherent gradient data for realistic color testing\n");
531 srand(12345); // For consistent gradient variation across runs
532 for (size_t i = 0; i < pixel_count; i++) {
533 int x = i % width;
534 int y = i / width;
535 // Create smooth gradients with some variation (mimics real images)
536 int base_r = (x * 255) / width;
537 int base_g = (y * 255) / height;
538 int base_b = ((x + y) * 127) / (width + height);
539
540 // Add realistic variation
541 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
542 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
543 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
544
545 test_pixels[i].r = clamp_rgb(temp_r);
546 test_pixels[i].g = clamp_rgb(temp_g);
547 test_pixels[i].b = clamp_rgb(temp_b);
548 }
549
550 // Populate test image with same data as test_pixels
551 frame->pixels = test_pixels;
552
553 const char *mode_str = background_mode ? "background" : "foreground";
554 printf("Benchmarking COLOR %s %dx%d (%zu pixels) x %d iterations...\n", mode_str, width, height, pixel_count,
555 iterations);
556
557 // Benchmark scalar color version
558 double start = time_ns_to_s(time_get_ns());
559 for (int i = 0; i < iterations; i++) {
560 char *result_str = image_print_color(frame, DEFAULT_ASCII_PALETTE);
561 if (result_str)
562 SAFE_FREE(result_str);
563 }
564 result.scalar_time = time_ns_to_s(time_get_ns()) - start;
565
566#if SIMD_SUPPORT_SSE2
567 // Benchmark SSE2 color using unified function
568 start = time_ns_to_s(time_get_ns());
569 for (int i = 0; i < iterations; i++) {
570 char *ascii_output = render_ascii_sse2_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
571 if (ascii_output)
572 SAFE_FREE(ascii_output);
573 }
574 result.sse2_time = time_ns_to_s(time_get_ns()) - start;
575#endif
576
577#if SIMD_SUPPORT_SSSE3
578 // Benchmark SSSE3 color using unified function
579 start = time_ns_to_s(time_get_ns());
580 for (int i = 0; i < iterations; i++) {
581 char *ascii_output = render_ascii_ssse3_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
582 if (ascii_output)
583 SAFE_FREE(ascii_output);
584 }
585 result.ssse3_time = time_ns_to_s(time_get_ns()) - start;
586#endif
587
588#if SIMD_SUPPORT_AVX2
589 // Benchmark AVX2 color using unified function
590 start = time_ns_to_s(time_get_ns());
591 for (int i = 0; i < iterations; i++) {
592 char *ascii_output = render_ascii_avx2_unified_optimized(frame, background_mode, true, DEFAULT_ASCII_PALETTE);
593 if (ascii_output)
594 SAFE_FREE(ascii_output);
595 }
596 result.avx2_time = time_ns_to_s(time_get_ns()) - start;
597#endif
598
599#if SIMD_SUPPORT_NEON
600 // Benchmark NEON color
601 start = time_ns_to_s(time_get_ns());
602 for (int i = 0; i < iterations; i++) {
603 // Create temporary image for unified function
604 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
605 char *ascii_output = render_ascii_neon_unified_optimized(&temp_image, background_mode, true, DEFAULT_ASCII_PALETTE);
606 if (ascii_output)
607 SAFE_FREE(ascii_output);
608 }
609 result.neon_time = time_ns_to_s(time_get_ns()) - start;
610#endif
611
612 // Find best method
613 double best_time = result.scalar_time;
614 result.best_method = "scalar";
615
616#if SIMD_SUPPORT_SSE2
617 if (result.sse2_time > 0 && result.sse2_time < best_time) {
618 best_time = result.sse2_time;
619 result.best_method = "SSE2";
620 }
621#endif
622
623#if SIMD_SUPPORT_SSSE3
624 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
625 best_time = result.ssse3_time;
626 result.best_method = "SSSE3";
627 }
628#endif
629
630#if SIMD_SUPPORT_AVX2
631 if (result.avx2_time > 0 && result.avx2_time < best_time) {
632 best_time = result.avx2_time;
633 result.best_method = "AVX2";
634 }
635#endif
636
637#if SIMD_SUPPORT_NEON
638 if (result.neon_time > 0 && result.neon_time < best_time) {
639 best_time = result.neon_time;
640 result.best_method = "NEON";
641 }
642#endif
643
644 result.speedup_best = result.scalar_time / best_time;
645
646 // Cleanup - frame owns test_pixels now
647 frame->pixels = NULL; // Don't double-free
648 image_destroy(frame);
649 SAFE_FREE(test_pixels);
650 SAFE_FREE(output_buffer);
651
652 return result;
653}
654
655// Enhanced benchmark function with image source support
656simd_benchmark_t benchmark_simd_conversion_with_source(int width, int height, int iterations, bool background_mode,
657 const image_t *source_image, bool use_256color) {
658 simd_benchmark_t result = {0};
659 (void)background_mode; // Suppress unused parameter warning
660 (void)use_256color; // Suppress unused parameter warning
661
662 // Check for integer overflow in pixel count calculation
663 size_t pixel_count;
664 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
665 log_error("Image dimensions %d x %d too large (overflow)", width, height);
666 return result;
667 }
668
669 // Generate test data
670 rgb_pixel_t *test_pixels;
671 char *output_buffer;
672 const size_t output_buffer_size = pixel_count * 16;
673 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
674 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
675
676 if (source_image && source_image->pixels) {
677 printf("Using provided image data (%dx%d) for testing\n", source_image->w, source_image->h);
678
679 // Resize source image to test dimensions if needed
680 if (source_image->w == width && source_image->h == height) {
681 // Direct copy
682 for (size_t i = 0; i < pixel_count; i++) {
683 test_pixels[i].r = source_image->pixels[i].r;
684 test_pixels[i].g = source_image->pixels[i].g;
685 test_pixels[i].b = source_image->pixels[i].b;
686 }
687 } else {
688 // Simple nearest-neighbor resize
689 for (int y = 0; y < height; y++) {
690 for (int x = 0; x < width; x++) {
691 int src_x = (x * source_image->w) / width;
692 int src_y = (y * source_image->h) / height;
693 // Use size_t for index calculations to prevent integer overflow
694 size_t src_idx = (size_t)src_y * (size_t)source_image->w + (size_t)src_x;
695 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
696
697 if (src_idx < (size_t)source_image->w * (size_t)source_image->h) {
698 test_pixels[dst_idx].r = source_image->pixels[src_idx].r;
699 test_pixels[dst_idx].g = source_image->pixels[src_idx].g;
700 test_pixels[dst_idx].b = source_image->pixels[src_idx].b;
701 }
702 }
703 }
704 printf("Resized image data from %dx%d to %dx%d\n", source_image->w, source_image->h, width, height);
705 }
706 } else {
707 // Fall back to synthetic gradient data
708 printf("No source image provided, using synthetic gradient data\n");
709 srand(12345); // NOLINT(cert-msc32-c,cert-msc51-cpp,bugprone-random-generator-seed)
710 for (size_t i = 0; i < pixel_count; i++) {
711 int x = i % width;
712 int y = i / width;
713 int base_r = (x * 255 / width);
714 int base_g = (y * 255 / height);
715 int base_b = ((x + y) * 127 / (width + height));
716
717 int temp_r = base_r + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
718 int temp_g = base_g + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
719 int temp_b = base_b + (rand() % 16 - 8); // NOLINT(cert-msc30-c,cert-msc50-cpp)
720
721 test_pixels[i].r = clamp_rgb(temp_r);
722 test_pixels[i].g = clamp_rgb(temp_g);
723 test_pixels[i].b = clamp_rgb(temp_b);
724 }
725 }
726
727 // Calculate adaptive iterations for reliable timing
728 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
729 printf("Benchmarking %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
730 height, pixel_count, adaptive_iterations);
731
732 // Benchmark all available SIMD variants using unified image-based API
733 image_t *frame = image_new(width, height);
734 memcpy(frame->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
735
736 // Benchmark scalar using color conversion
737 ensure_default_palette_ready();
738 double start_scalar = time_ns_to_s(time_get_ns());
739 for (int i = 0; i < iterations; i++) {
740 char *result_str = image_print_color(frame, DEFAULT_ASCII_PALETTE);
741 if (result_str)
742 SAFE_FREE(result_str);
743 }
744 result.scalar_time = (time_ns_to_s(time_get_ns()) - start_scalar) / iterations;
745
746#if SIMD_SUPPORT_SSE2
747 // Benchmark SSE2 using unified optimized renderer
748 // Benchmark SSE2 color rendering
749 ensure_default_palette_ready();
750 double start_sse2_color = time_ns_to_s(time_get_ns());
751 for (int i = 0; i < iterations; i++) {
752 char *result_str = render_ascii_sse2_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
753 if (result_str)
754 SAFE_FREE(result_str);
755 }
756 result.sse2_time = (time_ns_to_s(time_get_ns()) - start_sse2_color) / iterations;
757#endif
758
759#if SIMD_SUPPORT_SSSE3
760 // Benchmark SSSE3 using unified optimized renderer
761 // Benchmark SSSE3 color rendering
762 ensure_default_palette_ready();
763 double start_ssse3_color = time_ns_to_s(time_get_ns());
764 for (int i = 0; i < iterations; i++) {
765 char *result_str =
766 render_ascii_ssse3_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
767 if (result_str)
768 SAFE_FREE(result_str);
769 }
770 result.ssse3_time = (time_ns_to_s(time_get_ns()) - start_ssse3_color) / iterations;
771#endif
772
773#if SIMD_SUPPORT_AVX2
774 // Benchmark AVX2 using unified optimized renderer
775 // Benchmark AVX2 color rendering
776 ensure_default_palette_ready();
777 double start_avx2_color = time_ns_to_s(time_get_ns());
778 for (int i = 0; i < iterations; i++) {
779 char *result_str = render_ascii_avx2_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
780 if (result_str)
781 SAFE_FREE(result_str);
782 }
783 result.avx2_time = (time_ns_to_s(time_get_ns()) - start_avx2_color) / iterations;
784#endif
785
786#if SIMD_SUPPORT_NEON
787 // Benchmark NEON using unified optimized renderer
788 // Benchmark NEON color rendering
789 ensure_default_palette_ready();
790 double start_neon_color = time_ns_to_s(time_get_ns());
791 for (int i = 0; i < iterations; i++) {
792 char *result_str = render_ascii_neon_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
793 if (result_str)
794 SAFE_FREE(result_str);
795 }
796 result.neon_time = (time_ns_to_s(time_get_ns()) - start_neon_color) / iterations;
797#endif
798
799#if SIMD_SUPPORT_SVE
800 // Benchmark SVE using unified optimized renderer
801 // Benchmark SVE color rendering
802 ensure_default_palette_ready();
803 double start_sve_color = time_ns_to_s(time_get_ns());
804 for (int i = 0; i < iterations; i++) {
805 char *result_str = render_ascii_sve_unified_optimized(frame, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
806 if (result_str)
807 SAFE_FREE(result_str);
808 }
809 result.sve_time = (time_ns_to_s(time_get_ns()) - start_sve_color) / iterations;
810#endif
811
812 // Find best method
813 double best_time = result.scalar_time;
814 result.best_method = "scalar";
815
816#if SIMD_SUPPORT_SSE2
817 if (result.sse2_time > 0 && result.sse2_time < best_time) {
818 best_time = result.sse2_time;
819 result.best_method = "SSE2";
820 }
821#endif
822
823#if SIMD_SUPPORT_SSSE3
824 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
825 best_time = result.ssse3_time;
826 result.best_method = "SSSE3";
827 }
828#endif
829
830#if SIMD_SUPPORT_AVX2
831 if (result.avx2_time > 0 && result.avx2_time < best_time) {
832 best_time = result.avx2_time;
833 result.best_method = "AVX2";
834 }
835#endif
836
837#if SIMD_SUPPORT_NEON
838 if (result.neon_time > 0 && result.neon_time < best_time) {
839 best_time = result.neon_time;
840 result.best_method = "NEON";
841 }
842#endif
843
844 result.speedup_best = result.scalar_time / best_time;
845
846#if SIMD_SUPPORT_SVE
847 if (result.sve_time > 0 && result.sve_time < best_time) {
848 best_time = result.sve_time;
849 result.best_method = "SVE";
850 }
851#endif
852
853 image_destroy(frame);
854 SAFE_FREE(test_pixels);
855 SAFE_FREE(output_buffer);
856
857 return result;
858}
859
860// Enhanced color benchmark function with image source support
861simd_benchmark_t benchmark_simd_color_conversion_with_source(int width, int height, int iterations,
862 bool background_mode, const image_t *source_image,
863 bool use_256color) {
864 (void)iterations; // Intentionally unused - uses adaptive iteration counts instead
865 simd_benchmark_t result = {0};
866 (void)use_256color; // Suppress unused parameter warning
867
868 // Check for integer overflow in pixel count calculation
869 size_t pixel_count;
870 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
871 log_error("Image dimensions %d x %d too large (overflow)", width, height);
872 return result;
873 }
874
875 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
876
877 // Allocate buffers for benchmarking
878 rgb_pixel_t *test_pixels;
879 char *output_buffer;
880 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
881 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
882
883 // Calculate adaptive iterations for color benchmarking (ignore passed iterations)
884 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
885
886 const char *mode_str = background_mode ? "background" : "foreground";
887
888 // Variables for webcam capture cleanup
889
890 if (source_image) {
891 printf("Using provided source image data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
892 height, adaptive_iterations);
893
894 // Use provided source image - resize if needed
895 if (source_image->w == width && source_image->h == height) {
896 // Direct copy
897 for (size_t i = 0; i < pixel_count; i++) {
898 test_pixels[i].r = source_image->pixels[i].r;
899 test_pixels[i].g = source_image->pixels[i].g;
900 test_pixels[i].b = source_image->pixels[i].b;
901 }
902 } else {
903 // Resize source image to target dimensions
904 float x_ratio = (float)source_image->w / width;
905 float y_ratio = (float)source_image->h / height;
906
907 for (int y = 0; y < height; y++) {
908 for (int x = 0; x < width; x++) {
909 int src_x = (int)(x * x_ratio);
910 int src_y = (int)(y * y_ratio);
911
912 // Bounds check
913 if (src_x >= source_image->w)
914 src_x = source_image->w - 1;
915 if (src_y >= source_image->h)
916 src_y = source_image->h - 1;
917
918 // Use size_t for index calculations to prevent integer overflow
919 size_t src_idx = (size_t)src_y * (size_t)source_image->w + (size_t)src_x;
920 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
921
922 test_pixels[dst_idx].r = source_image->pixels[src_idx].r;
923 test_pixels[dst_idx].g = source_image->pixels[src_idx].g;
924 test_pixels[dst_idx].b = source_image->pixels[src_idx].b;
925 }
926 }
927 }
928 } else {
929 // No source image provided: use synthetic gradient data for consistent testing
930 printf("Using synthetic gradient data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
931 height, adaptive_iterations);
932
933 srand(12345); // Consistent results across runs
934 for (size_t i = 0; i < pixel_count; i++) {
935 int x = i % width;
936 int y = i / width;
937 int base_r = (x * 255) / width;
938 int base_g = (y * 255) / height;
939 int base_b = ((x + y) * 127) / (width + height);
940
941 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
942 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
943 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
944
945 test_pixels[i].r = clamp_rgb(temp_r);
946 test_pixels[i].g = clamp_rgb(temp_g);
947 test_pixels[i].b = clamp_rgb(temp_b);
948 }
949 }
950
951 printf("Benchmarking COLOR %s conversion using %d iterations...\n", mode_str, adaptive_iterations);
952
953 // FIX #5: Prewarm 256-color caches to avoid first-frame penalty (~1.5-2MB cache build)
954 prewarm_sgr256_fg_cache(); // Warmup 256-entry FG cache
955 prewarm_sgr256_cache(); // Warmup 65,536-entry FG+BG cache
956
957 // Benchmark scalar color conversion (pure conversion, no I/O)
958 double start = time_ns_to_s(time_get_ns());
959 for (int i = 0; i < adaptive_iterations; i++) {
960 image_t *test_image = image_new(width, height);
961 if (test_image == NULL) {
962 SAFE_FREE(test_pixels);
963 SAFE_FREE(output_buffer);
964 FATAL(ERROR_MEMORY, "Failed to allocate test_image in benchmark iteration %d", i);
965 }
966 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
967 char *result_ascii = ascii_convert(test_image, width, height, false, false, false, DEFAULT_ASCII_PALETTE,
969 if (result_ascii)
970 SAFE_FREE(result_ascii);
971 image_destroy(test_image);
972 }
973 result.scalar_time = time_ns_to_s(time_get_ns()) - start;
974
975 // Find best method -- default to scalar and let simd beat it.
976 double best_time = result.scalar_time;
977 result.best_method = "scalar";
978
979#if SIMD_SUPPORT_SSE2
980 start = time_ns_to_s(time_get_ns());
981 for (int i = 0; i < adaptive_iterations; i++) {
982 image_t *test_image = image_new(width, height);
983 if (test_image) {
984 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
985 char *result_str =
986 render_ascii_sse2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
987 if (result_str)
988 SAFE_FREE(result_str);
989 image_destroy(test_image);
990 }
991 }
992 result.sse2_time = time_ns_to_s(time_get_ns()) - start;
993#endif
994
995#if SIMD_SUPPORT_SSSE3
996 start = time_ns_to_s(time_get_ns());
997 for (int i = 0; i < adaptive_iterations; i++) {
998 image_t *test_image = image_new(width, height);
999 if (test_image) {
1000 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1001 char *result_str =
1002 render_ascii_ssse3_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1003 if (result_str)
1004 SAFE_FREE(result_str);
1005 image_destroy(test_image);
1006 }
1007 }
1008 result.ssse3_time = time_ns_to_s(time_get_ns()) - start;
1009#endif
1010
1011#if SIMD_SUPPORT_AVX2
1012 start = time_ns_to_s(time_get_ns());
1013 for (int i = 0; i < adaptive_iterations; i++) {
1014 image_t *test_image = image_new(width, height);
1015 if (test_image) {
1016 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1017 char *result_str =
1018 render_ascii_avx2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1019 if (result_str)
1020 SAFE_FREE(result_str);
1021 image_destroy(test_image);
1022 }
1023 }
1024 result.avx2_time = time_ns_to_s(time_get_ns()) - start;
1025#endif
1026
1027#if SIMD_SUPPORT_NEON
1028 start = time_ns_to_s(time_get_ns());
1029 for (int i = 0; i < adaptive_iterations; i++) {
1030 // Create temporary image for unified function
1031 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1032 char *result =
1033 render_ascii_neon_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1034 if (result)
1035 SAFE_FREE(result);
1036 }
1037 result.neon_time = time_ns_to_s(time_get_ns()) - start;
1038#endif
1039
1040#if SIMD_SUPPORT_SVE
1041 start = time_ns_to_s(time_get_ns());
1042 for (int i = 0; i < adaptive_iterations; i++) {
1043 // Create temporary image for unified function
1044 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1045 char *result =
1046 render_ascii_sve_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1047 if (result)
1048 SAFE_FREE(result);
1049 }
1050 result.sve_time = time_ns_to_s(time_get_ns()) - start;
1051#endif
1052
1053#if SIMD_SUPPORT_SSE2
1054 if (result.sse2_time > 0 && result.sse2_time < best_time) {
1055 best_time = result.sse2_time;
1056 result.best_method = "SSE2";
1057 }
1058#endif
1059
1060#if SIMD_SUPPORT_SSSE3
1061 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
1062 best_time = result.ssse3_time;
1063 result.best_method = "SSSE3";
1064 }
1065#endif
1066
1067#if SIMD_SUPPORT_AVX2
1068 if (result.avx2_time > 0 && result.avx2_time < best_time) {
1069 best_time = result.avx2_time;
1070 result.best_method = "AVX2";
1071 }
1072#endif
1073
1074#if SIMD_SUPPORT_NEON
1075 if (result.neon_time > 0 && result.neon_time < best_time) {
1076 best_time = result.neon_time;
1077 result.best_method = "NEON";
1078 }
1079#endif
1080
1081 // Normalize timing results by iteration count to get per-frame times
1082 result.scalar_time /= adaptive_iterations;
1083 if (result.sse2_time > 0)
1084 result.sse2_time /= adaptive_iterations;
1085 if (result.ssse3_time > 0)
1086 result.ssse3_time /= adaptive_iterations;
1087 if (result.avx2_time > 0)
1088 result.avx2_time /= adaptive_iterations;
1089 if (result.neon_time > 0)
1090 result.neon_time /= adaptive_iterations;
1091 // Recalculate best time after normalization
1092 best_time = result.scalar_time;
1093
1094#if SIMD_SUPPORT_SSE2
1095 if (result.sse2_time > 0 && result.sse2_time < best_time)
1096 best_time = result.sse2_time;
1097#endif
1098#if SIMD_SUPPORT_SSSE3
1099 if (result.ssse3_time > 0 && result.ssse3_time < best_time)
1100 best_time = result.ssse3_time;
1101#endif
1102#if SIMD_SUPPORT_AVX2
1103 if (result.avx2_time > 0 && result.avx2_time < best_time)
1104 best_time = result.avx2_time;
1105#endif
1106#if SIMD_SUPPORT_NEON
1107 if (result.neon_time > 0 && result.neon_time < best_time)
1108 best_time = result.neon_time;
1109#endif
1110#if SIMD_SUPPORT_SVE
1111 if (result.sve_time > 0 && result.sve_time < best_time)
1112 best_time = result.sve_time;
1113#endif
1114
1115 result.speedup_best = result.scalar_time / best_time;
1116
1117 printf("------------\n");
1118 printf("scalar: %f\n", result.scalar_time);
1119 if (result.sse2_time > 0)
1120 printf("SSE2: %f\n", result.sse2_time);
1121 if (result.ssse3_time > 0)
1122 printf("SSSE3: %f\n", result.ssse3_time);
1123 if (result.avx2_time > 0)
1124 printf("avx2: %f\n", result.avx2_time);
1125 if (result.neon_time > 0)
1126 printf("neon: %f\n", result.neon_time);
1127 if (result.sve_time > 0)
1128 printf("sve: %f\n", result.sve_time);
1129 printf("Best method: %s, time: %f (%.2fx speedup (<1.0 = bad))\n", result.best_method, best_time,
1130 result.speedup_best);
1131 printf("------------\n");
1132
1133 // Frame data already cleaned up in webcam capture section
1134 SAFE_FREE(test_pixels);
1135 SAFE_FREE(output_buffer);
1136
1137 return result;
1138}
char * ascii_convert(image_t *original, const ssize_t width, const ssize_t height, const bool color, const bool _aspect_ratio, const bool stretch, const char *palette_chars, const char luminance_palette[256])
Definition ascii.c:69
simd_benchmark_t benchmark_simd_color_conversion(int width, int height, int iterations, bool background_mode)
Definition ascii_simd.c:501
global_dec3_cache_t g_dec3_cache
Definition ascii_simd.c:25
void str_destroy(Str *s)
Definition ascii_simd.c:122
simd_benchmark_t benchmark_simd_conversion(int width, int height, int iterations)
Definition ascii_simd.c:334
char * convert_pixels_scalar_with_newlines(image_t *image, const char luminance_palette[256])
Definition ascii_simd.c:190
void init_dec3(void)
Definition ascii_simd.c:66
void str_reserve(Str *s, size_t need)
Definition ascii_simd.c:128
void str_printf(Str *s, const char *fmt,...)
Definition ascii_simd.c:149
void str_append_c(Str *s, char c)
Definition ascii_simd.c:144
void str_init(Str *s)
Definition ascii_simd.c:116
size_t write_rgb_triplet(uint8_t value, char *dst)
Definition ascii_simd.c:28
simd_benchmark_t benchmark_simd_color_conversion_with_source(int width, int height, int iterations, bool background_mode, const image_t *source_image, bool use_256color)
Definition ascii_simd.c:861
void print_simd_capabilities(void)
Definition ascii_simd.c:284
simd_benchmark_t benchmark_simd_conversion_with_source(int width, int height, int iterations, bool background_mode, const image_t *source_image, bool use_256color)
Definition ascii_simd.c:656
void convert_pixels_scalar(const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256])
Definition ascii_simd.c:175
char g_default_luminance_palette[256]
Definition ascii_simd.c:35
ImageRGB alloc_image(int w, int h)
Definition ascii_simd.c:106
char * image_print_simd(image_t *image, const char *ascii_chars)
Definition ascii_simd.c:260
void init_default_luminance_palette(void)
Definition ascii_simd.c:40
void str_append_bytes(Str *s, const void *src, size_t n)
Definition ascii_simd.c:138
void ascii_simd_init(void)
Definition ascii_simd.c:98
void prewarm_sgr256_cache(void)
void prewarm_sgr256_fg_cache(void)
void ob_term(outbuf_t *ob)
void ob_putc(outbuf_t *ob, char c)
bool rep_is_profitable(uint32_t runlen)
void emit_rep(outbuf_t *ob, uint32_t extra)
const size_t DEFAULT_ASCII_PALETTE_LEN
Definition palette.c:26
const char DEFAULT_ASCII_PALETTE[]
Definition palette.c:25
int safe_vsnprintf(char *buffer, size_t buffer_size, const char *format, va_list ap)
Safe formatted string printing with va_list.
Definition system.c:507
uint64_t time_get_ns(void)
Definition util/time.c:48
char * image_print(const image_t *p, const char *palette)
char * image_print_color(const image_t *p, const char *palette)
void image_destroy(image_t *p)
Definition video/image.c:85
image_t * image_new(size_t width, size_t height)
Definition video/image.c:36