28 memcpy(dst, d->
s, d->
len);
34static bool g_default_palette_initialized =
false;
38 if (g_default_palette_initialized)
43 for (
int i = 0; i < 256; i++) {
44 size_t palette_index = (i * (len - 1) + 127) / 255;
45 if (palette_index >= len) {
46 palette_index = len - 1;
50 g_default_palette_initialized =
true;
54static void ensure_default_palette_ready(
void) {
61 for (
int v = 0; v < 256; ++v) {
102 size_t n = (size_t)w * (
size_t)h * 3u;
123 size_t ncap = s->
cap ? s->
cap : 4096;
125 ncap = (ncap * 3) / 2 + 64;
132 memcpy(s->
data + s->
len, src, n);
145 int n = vsnprintf(stackbuf,
sizeof(stackbuf), fmt, ap);
149 if ((
size_t)n <
sizeof(stackbuf)) {
156 (void)vsnprintf(heap, (
size_t)n + 1, fmt, ap);
167void convert_pixels_scalar(
const rgb_pixel_t *pixels,
char *ascii_chars,
int count,
const char luminance_palette[256]) {
168 for (
int i = 0; i < count; i++) {
169 const rgb_pixel_t *p = &pixels[i];
178 ascii_chars[i] = luminance_palette[luminance];
183 const int h = image->
h;
184 const int w = image->
w;
192 const size_t max_char_bytes = 4;
193 ob.
cap = (size_t)h * ((
size_t)w * max_char_bytes + 1);
196 log_error(
"Failed to allocate output buffer for scalar rendering");
201 for (
int y = 0; y < h; y++) {
202 const rgb_pixel_t *row_pixels = (
const rgb_pixel_t *)&image->
pixels[y * w];
204 for (
int x = 0; x < w;) {
205 const rgb_pixel_t *p = &row_pixels[x];
212 char current_char = luminance_palette[luminance];
217 const rgb_pixel_t *next_p = &row_pixels[j];
219 if (next_luminance > 255)
220 next_luminance = 255;
221 char next_char = luminance_palette[next_luminance];
222 if (next_char != current_char)
233 for (
uint32_t k = 1; k < run; k++) {
254 return render_ascii_image_monochrome_avx2(image, ascii_chars);
255#elif SIMD_SUPPORT_SSSE3
256 return render_ascii_image_monochrome_ssse3(image, ascii_chars);
257#elif SIMD_SUPPORT_SSE2
258 return render_ascii_image_monochrome_sse2(image, ascii_chars);
259#elif SIMD_SUPPORT_NEON
260 return render_ascii_image_monochrome_neon(image, ascii_chars);
277 printf(
"SIMD Support:\n");
279 printf(
" ✓ AVX2 (32 pixels/cycle)\n");
282 printf(
" ✓ ARM NEON (16 pixels/cycle)\n");
285 printf(
" ✓ ARM SVE (scalable pixels/cycle)\n");
287#if SIMD_SUPPORT_SSSE3
288 printf(
" ✓ SSSE3 (16 pixels/cycle)\n");
291 printf(
" ✓ SSE2 (16 pixels/cycle)\n");
293 printf(
" ✓ Scalar fallback (1 pixel/cycle)\n");
301static double get_time_seconds(
void) {
303 if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
305 return (
double)clock() / CLOCKS_PER_SEC;
307 return ts.tv_sec + ts.tv_nsec / 1e9;
312static int calculate_adaptive_iterations(
int pixel_count,
double __attribute__((unused)) target_duration_ms) {
314 int base_iterations = 100;
317 if (pixel_count < 5000) {
318 base_iterations = 100;
319 }
else if (pixel_count < 50000) {
320 base_iterations = 50;
321 }
else if (pixel_count < 200000) {
322 base_iterations = 20;
323 }
else if (pixel_count < 500000) {
324 base_iterations = 10;
330 const int minimum_iterations = 10;
331 return (base_iterations > minimum_iterations) ? base_iterations : minimum_iterations;
339 if (checked_size_mul((
size_t)width, (
size_t)height, &pixel_count) !=
ASCIICHAT_OK) {
340 log_error(
"Image dimensions %d x %d too large (overflow)", width, height);
345 rgb_pixel_t *test_pixels;
347 test_pixels =
SAFE_CALLOC_SIMD(pixel_count,
sizeof(rgb_pixel_t), rgb_pixel_t *);
359 printf(
"Using synthetic gradient data for consistent benchmarking\n");
361 for (
size_t i = 0; i < pixel_count; i++) {
365 int base_r = (x * 255) / width;
366 int base_g = (y * 255) / height;
367 int base_b = ((x + y) * 127) / (width + height);
370 int temp_r = base_r + (rand() % 32 - 16);
371 int temp_g = base_g + (rand() % 32 - 16);
372 int temp_b = base_b + (rand() % 32 - 16);
374 test_pixels[i].r = clamp_rgb(temp_r);
375 test_pixels[i].g = clamp_rgb(temp_g);
376 test_pixels[i].b = clamp_rgb(temp_b);
380 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
383 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
384 printf(
"Benchmarking MONO %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
385 height, pixel_count, adaptive_iterations);
388 ensure_default_palette_ready();
389 double start_mono = get_time_seconds();
390 for (
int i = 0; i < adaptive_iterations; i++) {
395 result.
scalar_time = (get_time_seconds() - start_mono) / adaptive_iterations;
400 double start_sse2 = get_time_seconds();
401 for (
int i = 0; i < adaptive_iterations; i++) {
406 result.
sse2_time = (get_time_seconds() - start_sse2) / adaptive_iterations;
409#if SIMD_SUPPORT_SSSE3
412 double start_ssse3 = get_time_seconds();
413 for (
int i = 0; i < adaptive_iterations; i++) {
418 result.
ssse3_time = (get_time_seconds() - start_ssse3) / adaptive_iterations;
424 double start_avx2 = get_time_seconds();
425 for (
int i = 0; i < adaptive_iterations; i++) {
430 result.
avx2_time = (get_time_seconds() - start_avx2) / adaptive_iterations;
437 double start_neon = get_time_seconds();
438 for (
int i = 0; i < adaptive_iterations; i++) {
443 result.
neon_time = (get_time_seconds() - start_neon) / adaptive_iterations;
462#if SIMD_SUPPORT_SSSE3
505 if (checked_size_mul((
size_t)width, (
size_t)height, &pixel_count) !=
ASCIICHAT_OK) {
506 log_error(
"Image dimensions %d x %d too large (overflow)", width, height);
512 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
515 rgb_pixel_t *test_pixels;
517 test_pixels =
SAFE_CALLOC_SIMD(pixel_count,
sizeof(rgb_pixel_t), rgb_pixel_t *);
518 output_buffer =
SAFE_MALLOC(output_buffer_size,
char *);
529 printf(
"Using coherent gradient data for realistic color testing\n");
532 for (
size_t i = 0; i < pixel_count; i++) {
536 int base_r = (x * 255) / width;
537 int base_g = (y * 255) / height;
538 int base_b = ((x + y) * 127) / (width + height);
541 int temp_r = base_r + (rand() % 32 - 16);
542 int temp_g = base_g + (rand() % 32 - 16);
543 int temp_b = base_b + (rand() % 32 - 16);
545 test_pixels[i].r = clamp_rgb(temp_r);
546 test_pixels[i].g = clamp_rgb(temp_g);
547 test_pixels[i].b = clamp_rgb(temp_b);
551 frame->
pixels = test_pixels;
553 const char *mode_str = background_mode ?
"background" :
"foreground";
554 printf(
"Benchmarking COLOR %s %dx%d (%zu pixels) x %d iterations...\n", mode_str, width, height, pixel_count,
558 double start = get_time_seconds();
559 for (
int i = 0; i < iterations; i++) {
568 start = get_time_seconds();
569 for (
int i = 0; i < iterations; i++) {
570 char *ascii_output = render_ascii_sse2_unified_optimized(frame, background_mode,
true,
DEFAULT_ASCII_PALETTE);
574 result.
sse2_time = get_time_seconds() - start;
577#if SIMD_SUPPORT_SSSE3
579 start = get_time_seconds();
580 for (
int i = 0; i < iterations; i++) {
581 char *ascii_output = render_ascii_ssse3_unified_optimized(frame, background_mode,
true,
DEFAULT_ASCII_PALETTE);
585 result.
ssse3_time = get_time_seconds() - start;
590 start = get_time_seconds();
591 for (
int i = 0; i < iterations; i++) {
592 char *ascii_output = render_ascii_avx2_unified_optimized(frame, background_mode,
true,
DEFAULT_ASCII_PALETTE);
596 result.
avx2_time = get_time_seconds() - start;
601 start = get_time_seconds();
602 for (
int i = 0; i < iterations; i++) {
605 char *ascii_output = render_ascii_neon_unified_optimized(&temp_image, background_mode,
true,
DEFAULT_ASCII_PALETTE);
609 result.
neon_time = get_time_seconds() - start;
623#if SIMD_SUPPORT_SSSE3
657 const image_t *source_image,
bool use_256color) {
659 (void)background_mode;
664 if (checked_size_mul((
size_t)width, (size_t)height, &pixel_count) !=
ASCIICHAT_OK) {
665 log_error(
"Image dimensions %d x %d too large (overflow)", width, height);
670 rgb_pixel_t *test_pixels;
672 const size_t output_buffer_size = pixel_count * 16;
673 test_pixels =
SAFE_CALLOC_SIMD(pixel_count,
sizeof(rgb_pixel_t), rgb_pixel_t *);
674 output_buffer =
SAFE_MALLOC(output_buffer_size,
char *);
676 if (source_image && source_image->
pixels) {
677 printf(
"Using provided image data (%dx%d) for testing\n", source_image->
w, source_image->
h);
680 if (source_image->
w == width && source_image->
h == height) {
682 for (
size_t i = 0; i < pixel_count; i++) {
683 test_pixels[i].r = source_image->
pixels[i].r;
684 test_pixels[i].g = source_image->
pixels[i].g;
685 test_pixels[i].b = source_image->
pixels[i].b;
689 for (
int y = 0; y < height; y++) {
690 for (
int x = 0; x < width; x++) {
691 int src_x = (x * source_image->
w) / width;
692 int src_y = (y * source_image->
h) / height;
694 size_t src_idx = (size_t)src_y * (
size_t)source_image->
w + (size_t)src_x;
695 size_t dst_idx = (size_t)y * (
size_t)width + (size_t)x;
697 if (src_idx < (
size_t)source_image->
w * (size_t)source_image->
h) {
698 test_pixels[dst_idx].r = source_image->
pixels[src_idx].r;
699 test_pixels[dst_idx].g = source_image->
pixels[src_idx].g;
700 test_pixels[dst_idx].b = source_image->
pixels[src_idx].b;
704 printf(
"Resized image data from %dx%d to %dx%d\n", source_image->
w, source_image->
h, width, height);
708 printf(
"No source image provided, using synthetic gradient data\n");
710 for (
size_t i = 0; i < pixel_count; i++) {
713 int base_r = (x * 255 / width);
714 int base_g = (y * 255 / height);
715 int base_b = ((x + y) * 127 / (width + height));
717 int temp_r = base_r + (rand() % 16 - 8);
718 int temp_g = base_g + (rand() % 16 - 8);
719 int temp_b = base_b + (rand() % 16 - 8);
721 test_pixels[i].r = clamp_rgb(temp_r);
722 test_pixels[i].g = clamp_rgb(temp_g);
723 test_pixels[i].b = clamp_rgb(temp_b);
728 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
729 printf(
"Benchmarking %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
730 height, pixel_count, adaptive_iterations);
734 memcpy(frame->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
737 ensure_default_palette_ready();
738 double start_scalar = get_time_seconds();
739 for (
int i = 0; i < iterations; i++) {
744 result.
scalar_time = (get_time_seconds() - start_scalar) / iterations;
749 ensure_default_palette_ready();
750 double start_sse2_color = get_time_seconds();
751 for (
int i = 0; i < iterations; i++) {
752 char *result_str = render_ascii_sse2_unified_optimized(frame, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
756 result.
sse2_time = (get_time_seconds() - start_sse2_color) / iterations;
759#if SIMD_SUPPORT_SSSE3
762 ensure_default_palette_ready();
763 double start_ssse3_color = get_time_seconds();
764 for (
int i = 0; i < iterations; i++) {
770 result.
ssse3_time = (get_time_seconds() - start_ssse3_color) / iterations;
776 ensure_default_palette_ready();
777 double start_avx2_color = get_time_seconds();
778 for (
int i = 0; i < iterations; i++) {
779 char *result_str = render_ascii_avx2_unified_optimized(frame, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
783 result.
avx2_time = (get_time_seconds() - start_avx2_color) / iterations;
789 ensure_default_palette_ready();
790 double start_neon_color = get_time_seconds();
791 for (
int i = 0; i < iterations; i++) {
792 char *result_str = render_ascii_neon_unified_optimized(frame, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
796 result.
neon_time = (get_time_seconds() - start_neon_color) / iterations;
802 ensure_default_palette_ready();
803 double start_sve_color = get_time_seconds();
804 for (
int i = 0; i < iterations; i++) {
805 char *result_str = render_ascii_sve_unified_optimized(frame, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
809 result.
sve_time = (get_time_seconds() - start_sve_color) / iterations;
823#if SIMD_SUPPORT_SSSE3
863 bool background_mode,
const image_t *source_image,
870 if (checked_size_mul((
size_t)width, (
size_t)height, &pixel_count) !=
ASCIICHAT_OK) {
871 log_error(
"Image dimensions %d x %d too large (overflow)", width, height);
875 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
878 rgb_pixel_t *test_pixels;
880 test_pixels =
SAFE_CALLOC_SIMD(pixel_count,
sizeof(rgb_pixel_t), rgb_pixel_t *);
881 output_buffer =
SAFE_MALLOC(output_buffer_size,
char *);
884 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
886 const char *mode_str = background_mode ?
"background" :
"foreground";
891 printf(
"Using provided source image data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
892 height, adaptive_iterations);
895 if (source_image->
w == width && source_image->
h == height) {
897 for (
size_t i = 0; i < pixel_count; i++) {
898 test_pixels[i].r = source_image->
pixels[i].r;
899 test_pixels[i].g = source_image->
pixels[i].g;
900 test_pixels[i].b = source_image->
pixels[i].b;
904 float x_ratio = (float)source_image->
w / width;
905 float y_ratio = (float)source_image->
h / height;
907 for (
int y = 0; y < height; y++) {
908 for (
int x = 0; x < width; x++) {
909 int src_x = (int)(x * x_ratio);
910 int src_y = (int)(y * y_ratio);
913 if (src_x >= source_image->
w)
914 src_x = source_image->
w - 1;
915 if (src_y >= source_image->
h)
916 src_y = source_image->
h - 1;
919 size_t src_idx = (size_t)src_y * (
size_t)source_image->
w + (size_t)src_x;
920 size_t dst_idx = (size_t)y * (
size_t)width + (size_t)x;
922 test_pixels[dst_idx].r = source_image->
pixels[src_idx].r;
923 test_pixels[dst_idx].g = source_image->
pixels[src_idx].g;
924 test_pixels[dst_idx].b = source_image->
pixels[src_idx].b;
930 printf(
"Using synthetic gradient data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
931 height, adaptive_iterations);
935 for (
size_t i = 0; i < pixel_count; i++) {
938 int base_r = (x * 255) / width;
939 int base_g = (y * 255) / height;
940 int base_b = ((x + y) * 127) / (width + height);
942 int temp_r = base_r + (rand() % 32 - 16);
943 int temp_g = base_g + (rand() % 32 - 16);
944 int temp_b = base_b + (rand() % 32 - 16);
946 test_pixels[i].r = clamp_rgb(temp_r);
947 test_pixels[i].g = clamp_rgb(temp_g);
948 test_pixels[i].b = clamp_rgb(temp_b);
952 printf(
"Benchmarking COLOR %s conversion using %d iterations...\n", mode_str, adaptive_iterations);
959 double start = get_time_seconds();
960 for (
int i = 0; i < adaptive_iterations; i++) {
962 if (test_image == NULL) {
965 FATAL(
ERROR_MEMORY,
"Failed to allocate test_image in benchmark iteration %d", i);
967 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
981 start = get_time_seconds();
982 for (
int i = 0; i < adaptive_iterations; i++) {
985 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
987 render_ascii_sse2_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
993 result.
sse2_time = get_time_seconds() - start;
996#if SIMD_SUPPORT_SSSE3
997 start = get_time_seconds();
998 for (
int i = 0; i < adaptive_iterations; i++) {
1001 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
1003 render_ascii_ssse3_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1009 result.
ssse3_time = get_time_seconds() - start;
1012#if SIMD_SUPPORT_AVX2
1013 start = get_time_seconds();
1014 for (
int i = 0; i < adaptive_iterations; i++) {
1017 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
1019 render_ascii_avx2_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1025 result.
avx2_time = get_time_seconds() - start;
1028#if SIMD_SUPPORT_NEON
1029 start = get_time_seconds();
1030 for (
int i = 0; i < adaptive_iterations; i++) {
1034 render_ascii_neon_unified_optimized(&temp_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1038 result.
neon_time = get_time_seconds() - start;
1042 start = get_time_seconds();
1043 for (
int i = 0; i < adaptive_iterations; i++) {
1047 render_ascii_sve_unified_optimized(&temp_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1051 result.
sve_time = get_time_seconds() - start;
1054#if SIMD_SUPPORT_SSE2
1061#if SIMD_SUPPORT_SSSE3
1068#if SIMD_SUPPORT_AVX2
1075#if SIMD_SUPPORT_NEON
1085 result.
sse2_time /= adaptive_iterations;
1089 result.
avx2_time /= adaptive_iterations;
1091 result.
neon_time /= adaptive_iterations;
1095#if SIMD_SUPPORT_SSE2
1099#if SIMD_SUPPORT_SSSE3
1103#if SIMD_SUPPORT_AVX2
1107#if SIMD_SUPPORT_NEON
1118 printf(
"------------\n");
1129 printf(
"sve: %f\n", result.
sve_time);
1130 printf(
"Best method: %s, time: %f (%.2fx speedup (<1.0 = bad))\n", result.
best_method, best_time,
1132 printf(
"------------\n");
🔌 Cross-platform abstraction layer umbrella header for ascii-chat
simd_benchmark_t benchmark_simd_conversion(int width, int height, int __attribute__((unused)) iterations)
simd_benchmark_t benchmark_simd_color_conversion_with_source(int width, int height, int __attribute__((unused)) iterations, bool background_mode, const image_t *source_image, bool use_256color)
SIMD-optimized ASCII conversion interface.
AVX2-optimized ASCII rendering functions.
#define SAFE_REALLOC(ptr, size, cast)
#define SAFE_MALLOC(size, cast)
#define SAFE_CALLOC_SIMD(count, size, cast)
#define FATAL(code,...)
Exit with error code and custom message, with stack trace in debug builds.
#define log_error(...)
Log an ERROR message.
const size_t DEFAULT_ASCII_PALETTE_LEN
Length of default ASCII palette.
const char DEFAULT_ASCII_PALETTE[]
Default ASCII palette for legacy functions.
simd_benchmark_t benchmark_simd_color_conversion(int width, int height, int iterations, bool background_mode)
Benchmark SIMD color conversion methods.
global_dec3_cache_t g_dec3_cache
Global decimal cache instance.
#define LUMA_BLUE
Luminance blue coefficient (0.114 * 256 = 29)
#define LUMA_GREEN
Luminance green coefficient (0.587 * 256 = 150)
char * image_print(const image_t *p, const char *palette)
Print image as ASCII art (monochrome)
char * convert_pixels_scalar_with_newlines(image_t *image, const char luminance_palette[256])
Convert image to ASCII with newlines (scalar fallback)
void init_dec3(void)
Initialize decimal lookup table.
void str_reserve(Str *s, size_t need)
Reserve space in string buffer.
void str_printf(Str *s, const char *fmt,...)
Append formatted string to buffer.
void str_free(Str *s)
Free string buffer.
void str_append_c(Str *s, char c)
Append character to string buffer.
void str_init(Str *s)
Initialize string buffer.
void ob_term(outbuf_t *ob)
Append null terminator to buffer.
char * image_print_color(const image_t *p, const char *palette)
Print image as ASCII art with color.
size_t write_rgb_triplet(uint8_t value, char *dst)
Write decimal RGB triplet using dec3 cache.
void prewarm_sgr256_cache(void)
Prewarm 256-color foreground/background cache for benchmarks.
void ob_putc(outbuf_t *ob, char c)
Append a character to buffer.
void print_simd_capabilities(void)
Print detected SIMD capabilities.
simd_benchmark_t benchmark_simd_conversion_with_source(int width, int height, int iterations, bool background_mode, const image_t *source_image, bool use_256color)
Benchmark SIMD conversion with source image.
bool rep_is_profitable(uint32_t runlen)
Check if run-length encoding is profitable.
void convert_pixels_scalar(const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256])
Convert pixels to ASCII (scalar fallback)
char g_default_luminance_palette[256]
Default luminance palette (256 characters)
void emit_rep(outbuf_t *ob, uint32_t extra)
Emit run-length encoded sequence.
void prewarm_sgr256_fg_cache(void)
Prewarm 256-color foreground cache for benchmarks.
ImageRGB alloc_image(int w, int h)
Allocate a new ImageRGB (RGB8 format)
char * image_print_simd(image_t *image, const char *ascii_chars)
Print image as ASCII using SIMD (monochrome)
void init_default_luminance_palette(void)
Initialize default luminance palette.
char * ascii_convert(image_t *original, const ssize_t width, const ssize_t height, const bool color, const bool _aspect_ratio, const bool stretch, const char *palette_chars, const char luminance_palette[256])
Convert image to ASCII art.
void str_append_bytes(Str *s, const void *src, size_t n)
Append bytes to string buffer.
void ascii_simd_init(void)
Initialize SIMD subsystem.
void image_destroy(image_t *p)
Destroy an image allocated with image_new()
#define LUMA_RED
Luminance red coefficient (0.299 * 256 = 77)
image_t * image_new(size_t width, size_t height)
Create a new image with standard allocation.
@ IMAGE_ALLOC_SIMD
Pixels allocated with SAFE_MALLOC_SIMD()
🔢 Mathematical Utility Functions
Dynamic Output Buffer with ANSI Sequence Support.
✅ Safe Integer Arithmetic and Overflow Detection
ASCII Palette Management for Video-to-ASCII Conversion.
ImageRGB structure for NEON renderers.
Dynamic string buffer structure.
Decimal conversion cache structure (1-3 digits)
Global decimal cache for digit conversion.
int w
Image width in pixels (must be > 0)
int h
Image height in pixels (must be > 0)
rgb_pixel_t * pixels
Pixel data array (width * height RGB pixels, row-major order)
Dynamic output buffer (auto-expanding)
size_t cap
Buffer capacity in bytes (maximum length before reallocation)
char * buf
Buffer pointer (allocated, owned by caller, must be freed)
SIMD benchmark results structure.
⏱️ High-precision timing utilities using sokol_time.h and uthash
Common SIMD utilities and structures.