ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
ascii_simd.c File Reference

⚡ Main SIMD ASCII rendering dispatcher with architecture detection and fallback handling More...

Go to the source code of this file.

Functions

size_t write_rgb_triplet (uint8_t value, char *dst)
 Write decimal RGB triplet using dec3 cache.
 
void init_default_luminance_palette (void)
 Initialize default luminance palette.
 
void init_dec3 (void)
 Initialize decimal lookup table.
 
void ascii_simd_init (void)
 Initialize SIMD subsystem.
 
ImageRGB alloc_image (int w, int h)
 Allocate a new ImageRGB (RGB8 format)
 
void str_init (Str *s)
 Initialize string buffer.
 
void str_free (Str *s)
 Free string buffer.
 
void str_reserve (Str *s, size_t need)
 Reserve space in string buffer.
 
void str_append_bytes (Str *s, const void *src, size_t n)
 Append bytes to string buffer.
 
void str_append_c (Str *s, char c)
 Append character to string buffer.
 
void str_printf (Str *s, const char *fmt,...)
 Append formatted string to buffer.
 
void convert_pixels_scalar (const rgb_pixel_t *pixels, char *ascii_chars, int count, const char luminance_palette[256])
 Convert pixels to ASCII (scalar fallback)
 
char * convert_pixels_scalar_with_newlines (image_t *image, const char luminance_palette[256])
 Convert image to ASCII with newlines (scalar fallback)
 
char * image_print_simd (image_t *image, const char *ascii_chars)
 Print image as ASCII using SIMD (monochrome)
 
void print_simd_capabilities (void)
 Print detected SIMD capabilities.
 
simd_benchmark_t benchmark_simd_conversion (int width, int height, int __attribute__((unused)) iterations)
 
simd_benchmark_t benchmark_simd_color_conversion (int width, int height, int iterations, bool background_mode)
 Benchmark SIMD color conversion methods.
 
simd_benchmark_t benchmark_simd_conversion_with_source (int width, int height, int iterations, bool background_mode, const image_t *source_image, bool use_256color)
 Benchmark SIMD conversion with source image.
 
simd_benchmark_t benchmark_simd_color_conversion_with_source (int width, int height, int __attribute__((unused)) iterations, bool background_mode, const image_t *source_image, bool use_256color)
 

Variables

global_dec3_cache_t g_dec3_cache = {.dec3_initialized = false}
 Global decimal cache instance.
 
char g_default_luminance_palette [256]
 Default luminance palette (256 characters)
 

Detailed Description

⚡ Main SIMD ASCII rendering dispatcher with architecture detection and fallback handling

Definition in file ascii_simd.c.

Function Documentation

◆ benchmark_simd_color_conversion_with_source()

simd_benchmark_t benchmark_simd_color_conversion_with_source ( int  width,
int  height,
int __attribute__((unused))  iterations,
bool  background_mode,
const image_t source_image,
bool  use_256color 
)

Definition at line 861 of file ascii_simd.c.

864 {
865 simd_benchmark_t result = {0};
866 (void)use_256color; // Suppress unused parameter warning
867
868 // Check for integer overflow in pixel count calculation
869 size_t pixel_count;
870 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
871 log_error("Image dimensions %d x %d too large (overflow)", width, height);
872 return result;
873 }
874
875 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
876
877 // Allocate buffers for benchmarking
878 rgb_pixel_t *test_pixels;
879 char *output_buffer;
880 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
881 output_buffer = SAFE_MALLOC(output_buffer_size, char *);
882
883 // Calculate adaptive iterations for color benchmarking (ignore passed iterations)
884 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
885
886 const char *mode_str = background_mode ? "background" : "foreground";
887
888 // Variables for webcam capture cleanup
889
890 if (source_image) {
891 printf("Using provided source image data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
892 height, adaptive_iterations);
893
894 // Use provided source image - resize if needed
895 if (source_image->w == width && source_image->h == height) {
896 // Direct copy
897 for (size_t i = 0; i < pixel_count; i++) {
898 test_pixels[i].r = source_image->pixels[i].r;
899 test_pixels[i].g = source_image->pixels[i].g;
900 test_pixels[i].b = source_image->pixels[i].b;
901 }
902 } else {
903 // Resize source image to target dimensions
904 float x_ratio = (float)source_image->w / width;
905 float y_ratio = (float)source_image->h / height;
906
907 for (int y = 0; y < height; y++) {
908 for (int x = 0; x < width; x++) {
909 int src_x = (int)(x * x_ratio);
910 int src_y = (int)(y * y_ratio);
911
912 // Bounds check
913 if (src_x >= source_image->w)
914 src_x = source_image->w - 1;
915 if (src_y >= source_image->h)
916 src_y = source_image->h - 1;
917
918 // Use size_t for index calculations to prevent integer overflow
919 size_t src_idx = (size_t)src_y * (size_t)source_image->w + (size_t)src_x;
920 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
921
922 test_pixels[dst_idx].r = source_image->pixels[src_idx].r;
923 test_pixels[dst_idx].g = source_image->pixels[src_idx].g;
924 test_pixels[dst_idx].b = source_image->pixels[src_idx].b;
925 }
926 }
927 }
928 } else {
929 // No source image provided: use synthetic gradient data for consistent testing
930 printf("Using synthetic gradient data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
931 height, adaptive_iterations);
932
933 // NOLINTNEXTLINE(bugprone-random-generator-seed)
934 srand(12345); // Consistent results across runs
935 for (size_t i = 0; i < pixel_count; i++) {
936 int x = i % width;
937 int y = i / width;
938 int base_r = (x * 255) / width;
939 int base_g = (y * 255) / height;
940 int base_b = ((x + y) * 127) / (width + height);
941
942 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
943 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
944 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
945
946 test_pixels[i].r = clamp_rgb(temp_r);
947 test_pixels[i].g = clamp_rgb(temp_g);
948 test_pixels[i].b = clamp_rgb(temp_b);
949 }
950 }
951
952 printf("Benchmarking COLOR %s conversion using %d iterations...\n", mode_str, adaptive_iterations);
953
954 // FIX #5: Prewarm 256-color caches to avoid first-frame penalty (~1.5-2MB cache build)
955 prewarm_sgr256_fg_cache(); // Warmup 256-entry FG cache
956 prewarm_sgr256_cache(); // Warmup 65,536-entry FG+BG cache
957
958 // Benchmark scalar color conversion (pure conversion, no I/O)
959 double start = get_time_seconds();
960 for (int i = 0; i < adaptive_iterations; i++) {
961 image_t *test_image = image_new(width, height);
962 if (test_image == NULL) {
963 SAFE_FREE(test_pixels);
964 SAFE_FREE(output_buffer);
965 FATAL(ERROR_MEMORY, "Failed to allocate test_image in benchmark iteration %d", i);
966 }
967 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
968 char *result_ascii = ascii_convert(test_image, width, height, false, false, false, DEFAULT_ASCII_PALETTE,
970 if (result_ascii)
971 SAFE_FREE(result_ascii);
972 image_destroy(test_image);
973 }
974 result.scalar_time = get_time_seconds() - start;
975
976 // Find best method -- default to scalar and let simd beat it.
977 double best_time = result.scalar_time;
978 result.best_method = "scalar";
979
980#if SIMD_SUPPORT_SSE2
981 start = get_time_seconds();
982 for (int i = 0; i < adaptive_iterations; i++) {
983 image_t *test_image = image_new(width, height);
984 if (test_image) {
985 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
986 char *result_str =
987 render_ascii_sse2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
988 if (result_str)
989 SAFE_FREE(result_str);
990 image_destroy(test_image);
991 }
992 }
993 result.sse2_time = get_time_seconds() - start;
994#endif
995
996#if SIMD_SUPPORT_SSSE3
997 start = get_time_seconds();
998 for (int i = 0; i < adaptive_iterations; i++) {
999 image_t *test_image = image_new(width, height);
1000 if (test_image) {
1001 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1002 char *result_str =
1003 render_ascii_ssse3_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1004 if (result_str)
1005 SAFE_FREE(result_str);
1006 image_destroy(test_image);
1007 }
1008 }
1009 result.ssse3_time = get_time_seconds() - start;
1010#endif
1011
1012#if SIMD_SUPPORT_AVX2
1013 start = get_time_seconds();
1014 for (int i = 0; i < adaptive_iterations; i++) {
1015 image_t *test_image = image_new(width, height);
1016 if (test_image) {
1017 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
1018 char *result_str =
1019 render_ascii_avx2_unified_optimized(test_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1020 if (result_str)
1021 SAFE_FREE(result_str);
1022 image_destroy(test_image);
1023 }
1024 }
1025 result.avx2_time = get_time_seconds() - start;
1026#endif
1027
1028#if SIMD_SUPPORT_NEON
1029 start = get_time_seconds();
1030 for (int i = 0; i < adaptive_iterations; i++) {
1031 // Create temporary image for unified function
1032 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1033 char *result =
1034 render_ascii_neon_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1035 if (result)
1036 SAFE_FREE(result);
1037 }
1038 result.neon_time = get_time_seconds() - start;
1039#endif
1040
1041#if SIMD_SUPPORT_SVE
1042 start = get_time_seconds();
1043 for (int i = 0; i < adaptive_iterations; i++) {
1044 // Create temporary image for unified function
1045 image_t temp_image = {.pixels = test_pixels, .w = width, .h = height, .alloc_method = IMAGE_ALLOC_SIMD};
1046 char *result =
1047 render_ascii_sve_unified_optimized(&temp_image, background_mode, use_256color, DEFAULT_ASCII_PALETTE);
1048 if (result)
1049 SAFE_FREE(result);
1050 }
1051 result.sve_time = get_time_seconds() - start;
1052#endif
1053
1054#if SIMD_SUPPORT_SSE2
1055 if (result.sse2_time > 0 && result.sse2_time < best_time) {
1056 best_time = result.sse2_time;
1057 result.best_method = "SSE2";
1058 }
1059#endif
1060
1061#if SIMD_SUPPORT_SSSE3
1062 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
1063 best_time = result.ssse3_time;
1064 result.best_method = "SSSE3";
1065 }
1066#endif
1067
1068#if SIMD_SUPPORT_AVX2
1069 if (result.avx2_time > 0 && result.avx2_time < best_time) {
1070 best_time = result.avx2_time;
1071 result.best_method = "AVX2";
1072 }
1073#endif
1074
1075#if SIMD_SUPPORT_NEON
1076 if (result.neon_time > 0 && result.neon_time < best_time) {
1077 best_time = result.neon_time;
1078 result.best_method = "NEON";
1079 }
1080#endif
1081
1082 // Normalize timing results by iteration count to get per-frame times
1083 result.scalar_time /= adaptive_iterations;
1084 if (result.sse2_time > 0)
1085 result.sse2_time /= adaptive_iterations;
1086 if (result.ssse3_time > 0)
1087 result.ssse3_time /= adaptive_iterations;
1088 if (result.avx2_time > 0)
1089 result.avx2_time /= adaptive_iterations;
1090 if (result.neon_time > 0)
1091 result.neon_time /= adaptive_iterations;
1092 // Recalculate best time after normalization
1093 best_time = result.scalar_time;
1094
1095#if SIMD_SUPPORT_SSE2
1096 if (result.sse2_time > 0 && result.sse2_time < best_time)
1097 best_time = result.sse2_time;
1098#endif
1099#if SIMD_SUPPORT_SSSE3
1100 if (result.ssse3_time > 0 && result.ssse3_time < best_time)
1101 best_time = result.ssse3_time;
1102#endif
1103#if SIMD_SUPPORT_AVX2
1104 if (result.avx2_time > 0 && result.avx2_time < best_time)
1105 best_time = result.avx2_time;
1106#endif
1107#if SIMD_SUPPORT_NEON
1108 if (result.neon_time > 0 && result.neon_time < best_time)
1109 best_time = result.neon_time;
1110#endif
1111#if SIMD_SUPPORT_SVE
1112 if (result.sve_time > 0 && result.sve_time < best_time)
1113 best_time = result.sve_time;
1114#endif
1115
1116 result.speedup_best = result.scalar_time / best_time;
1117
1118 printf("------------\n");
1119 printf("scalar: %f\n", result.scalar_time);
1120 if (result.sse2_time > 0)
1121 printf("SSE2: %f\n", result.sse2_time);
1122 if (result.ssse3_time > 0)
1123 printf("SSSE3: %f\n", result.ssse3_time);
1124 if (result.avx2_time > 0)
1125 printf("avx2: %f\n", result.avx2_time);
1126 if (result.neon_time > 0)
1127 printf("neon: %f\n", result.neon_time);
1128 if (result.sve_time > 0)
1129 printf("sve: %f\n", result.sve_time);
1130 printf("Best method: %s, time: %f (%.2fx speedup (<1.0 = bad))\n", result.best_method, best_time,
1131 result.speedup_best);
1132 printf("------------\n");
1133
1134 // Frame data already cleaned up in webcam capture section
1135 SAFE_FREE(test_pixels);
1136 SAFE_FREE(output_buffer);
1137
1138 return result;
1139}
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_MALLOC(size, cast)
Definition common.h:208
#define SAFE_CALLOC_SIMD(count, size, cast)
Definition common.h:311
#define FATAL(code,...)
Exit with error code and custom message, with stack trace in debug builds.
Definition common.h:151
@ ERROR_MEMORY
Definition error_codes.h:53
@ ASCIICHAT_OK
Definition error_codes.h:48
#define log_error(...)
Log an ERROR message.
const char DEFAULT_ASCII_PALETTE[]
Default ASCII palette for legacy functions.
Definition palette.c:48
const char * best_method
Definition ascii_simd.h:279
void prewarm_sgr256_cache(void)
Prewarm 256-color foreground/background cache for benchmarks.
char g_default_luminance_palette[256]
Default luminance palette (256 characters)
Definition ascii_simd.c:33
void prewarm_sgr256_fg_cache(void)
Prewarm 256-color foreground cache for benchmarks.
char * ascii_convert(image_t *original, const ssize_t width, const ssize_t height, const bool color, const bool _aspect_ratio, const bool stretch, const char *palette_chars, const char luminance_palette[256])
Convert image to ASCII art.
Definition ascii.c:67
void image_destroy(image_t *p)
Destroy an image allocated with image_new()
Definition video/image.c:85
image_t * image_new(size_t width, size_t height)
Create a new image with standard allocation.
Definition video/image.c:36
@ IMAGE_ALLOC_SIMD
Pixels allocated with SAFE_MALLOC_SIMD()
Image structure.
int w
Image width in pixels (must be > 0)
int h
Image height in pixels (must be > 0)
rgb_pixel_t * pixels
Pixel data array (width * height RGB pixels, row-major order)
SIMD benchmark results structure.
Definition ascii_simd.h:271

References ascii_convert(), ASCIICHAT_OK, simd_benchmark_t::avx2_time, simd_benchmark_t::best_method, DEFAULT_ASCII_PALETTE, ERROR_MEMORY, FATAL, g_default_luminance_palette, image_t::h, IMAGE_ALLOC_SIMD, image_destroy(), image_new(), log_error, simd_benchmark_t::neon_time, image_t::pixels, prewarm_sgr256_cache(), prewarm_sgr256_fg_cache(), SAFE_CALLOC_SIMD, SAFE_FREE, SAFE_MALLOC, simd_benchmark_t::scalar_time, simd_benchmark_t::speedup_best, simd_benchmark_t::sse2_time, simd_benchmark_t::ssse3_time, simd_benchmark_t::sve_time, and image_t::w.

◆ benchmark_simd_conversion()

simd_benchmark_t benchmark_simd_conversion ( int  width,
int  height,
int __attribute__((unused))  iterations 
)

Definition at line 334 of file ascii_simd.c.

334 {
335 simd_benchmark_t result = {0};
336
337 // Check for integer overflow in pixel count calculation
338 size_t pixel_count;
339 if (checked_size_mul((size_t)width, (size_t)height, &pixel_count) != ASCIICHAT_OK) {
340 log_error("Image dimensions %d x %d too large (overflow)", width, height);
341 return result;
342 }
343
344 // Generate test data and test image
345 rgb_pixel_t *test_pixels;
346 char *output_buffer;
347 test_pixels = SAFE_CALLOC_SIMD(pixel_count, sizeof(rgb_pixel_t), rgb_pixel_t *);
348 output_buffer = SAFE_MALLOC(pixel_count, char *);
349
350 // Create test image for new image-based functions
351 image_t *test_image = image_new(width, height);
352 if (!test_image) {
353 SAFE_FREE(test_pixels);
354 SAFE_FREE(output_buffer);
355 return result;
356 }
357
358 // Use synthetic data for consistent cross-platform testing
359 printf("Using synthetic gradient data for consistent benchmarking\n");
360 srand(12345); // Consistent results across runs // NOLINT(cert-msc32-c,cert-msc51-cpp,bugprone-random-generator-seed)
361 for (size_t i = 0; i < pixel_count; i++) {
362 int x = i % width;
363 int y = i / width;
364 // Create realistic gradient pattern with some variation
365 int base_r = (x * 255) / width;
366 int base_g = (y * 255) / height;
367 int base_b = ((x + y) * 127) / (width + height);
368
369 // Add small random variation to make it realistic
370 int temp_r = base_r + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
371 int temp_g = base_g + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
372 int temp_b = base_b + (rand() % 32 - 16); // NOLINT(cert-msc30-c,cert-msc50-cpp)
373
374 test_pixels[i].r = clamp_rgb(temp_r);
375 test_pixels[i].g = clamp_rgb(temp_g);
376 test_pixels[i].b = clamp_rgb(temp_b);
377 }
378
379 // Copy test data to test image pixels
380 memcpy(test_image->pixels, test_pixels, pixel_count * sizeof(rgb_pixel_t));
381
382 // Calculate adaptive iterations for reliable timing
383 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
384 printf("Benchmarking MONO %dx%d (%zu pixels) using %d adaptive iterations (ignoring passed iterations)...\n", width,
385 height, pixel_count, adaptive_iterations);
386
387 // Benchmark scalar using image-based API
388 ensure_default_palette_ready();
389 double start_mono = get_time_seconds();
390 for (int i = 0; i < adaptive_iterations; i++) {
391 char *result_str = image_print(test_image, DEFAULT_ASCII_PALETTE);
392 if (result_str)
393 SAFE_FREE(result_str);
394 }
395 result.scalar_time = (get_time_seconds() - start_mono) / adaptive_iterations;
396
397#if SIMD_SUPPORT_SSE2
398 // Benchmark SSE2 using new image-based timing function
399 // Benchmark SSE2 monochrome rendering
400 double start_sse2 = get_time_seconds();
401 for (int i = 0; i < adaptive_iterations; i++) {
402 char *result_str = render_ascii_image_monochrome_sse2(test_image, DEFAULT_ASCII_PALETTE);
403 if (result_str)
404 SAFE_FREE(result_str);
405 }
406 result.sse2_time = (get_time_seconds() - start_sse2) / adaptive_iterations;
407#endif
408
409#if SIMD_SUPPORT_SSSE3
410 // Benchmark SSSE3 using new image-based timing function
411 // Benchmark SSSE3 monochrome rendering
412 double start_ssse3 = get_time_seconds();
413 for (int i = 0; i < adaptive_iterations; i++) {
414 char *result_str = render_ascii_image_monochrome_ssse3(test_image, DEFAULT_ASCII_PALETTE);
415 if (result_str)
416 SAFE_FREE(result_str);
417 }
418 result.ssse3_time = (get_time_seconds() - start_ssse3) / adaptive_iterations;
419#endif
420
421#if SIMD_SUPPORT_AVX2
422 // Benchmark AVX2 using optimized single-pass implementation
423 // Benchmark AVX2 monochrome rendering
424 double start_avx2 = get_time_seconds();
425 for (int i = 0; i < adaptive_iterations; i++) {
426 char *result_str = render_ascii_image_monochrome_avx2(test_image, DEFAULT_ASCII_PALETTE);
427 if (result_str)
428 SAFE_FREE(result_str);
429 }
430 result.avx2_time = (get_time_seconds() - start_avx2) / adaptive_iterations;
431#endif
432
433#if SIMD_SUPPORT_NEON
434 // Benchmark NEON using new image-based timing function
435 // TODO: Update benchmark to use custom palette testing
436 // Benchmark NEON monochrome rendering
437 double start_neon = get_time_seconds();
438 for (int i = 0; i < adaptive_iterations; i++) {
439 char *result_str = render_ascii_image_monochrome_neon(test_image, DEFAULT_ASCII_PALETTE);
440 if (result_str)
441 SAFE_FREE(result_str);
442 }
443 result.neon_time = (get_time_seconds() - start_neon) / adaptive_iterations;
444#endif
445
446#if SIMD_SUPPORT_SVE
447 // SVE benchmarking disabled - function removed
448 result.sve_time = 0.0;
449#endif
450
451 // Find best method
452 double best_time = result.scalar_time;
453 result.best_method = "scalar";
454
455#if SIMD_SUPPORT_SSE2
456 if (result.sse2_time > 0 && result.sse2_time < best_time) {
457 best_time = result.sse2_time;
458 result.best_method = "SSE2";
459 }
460#endif
461
462#if SIMD_SUPPORT_SSSE3
463 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
464 best_time = result.ssse3_time;
465 result.best_method = "SSSE3";
466 }
467#endif
468
469#if SIMD_SUPPORT_AVX2
470 if (result.avx2_time > 0 && result.avx2_time < best_time) {
471 best_time = result.avx2_time;
472 result.best_method = "AVX2";
473 }
474#endif
475
476#if SIMD_SUPPORT_NEON
477 if (result.neon_time > 0 && result.neon_time < best_time) {
478 best_time = result.neon_time;
479 result.best_method = "NEON";
480 }
481#endif
482
483 result.speedup_best = result.scalar_time / best_time;
484
485#if SIMD_SUPPORT_SVE
486 if (result.sve_time > 0 && result.sve_time < best_time) {
487 best_time = result.sve_time;
488 result.best_method = "SVE";
489 }
490#endif
491
492 // Cleanup
493 image_destroy(test_image);
494 SAFE_FREE(test_pixels);
495 SAFE_FREE(output_buffer);
496
497 return result;
498}
char * image_print(const image_t *p, const char *palette)
Print image as ASCII art (monochrome)

References ASCIICHAT_OK, simd_benchmark_t::avx2_time, simd_benchmark_t::best_method, DEFAULT_ASCII_PALETTE, image_destroy(), image_new(), image_print(), log_error, simd_benchmark_t::neon_time, image_t::pixels, SAFE_CALLOC_SIMD, SAFE_FREE, SAFE_MALLOC, simd_benchmark_t::scalar_time, simd_benchmark_t::speedup_best, simd_benchmark_t::sse2_time, simd_benchmark_t::ssse3_time, and simd_benchmark_t::sve_time.