864 {
866 (void)use_256color;
867
868
869 size_t pixel_count;
870 if (checked_size_mul((
size_t)width, (
size_t)height, &pixel_count) !=
ASCIICHAT_OK) {
871 log_error(
"Image dimensions %d x %d too large (overflow)", width, height);
872 return result;
873 }
874
875 size_t output_buffer_size = pixel_count * 30 + (size_t)width * 10;
876
877
878 rgb_pixel_t *test_pixels;
879 char *output_buffer;
880 test_pixels =
SAFE_CALLOC_SIMD(pixel_count,
sizeof(rgb_pixel_t), rgb_pixel_t *);
881 output_buffer =
SAFE_MALLOC(output_buffer_size,
char *);
882
883
884 int adaptive_iterations = calculate_adaptive_iterations(pixel_count, 10.0);
885
886 const char *mode_str = background_mode ? "background" : "foreground";
887
888
889
890 if (source_image) {
891 printf("Using provided source image data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
892 height, adaptive_iterations);
893
894
895 if (source_image->
w == width && source_image->
h == height) {
896
897 for (size_t i = 0; i < pixel_count; i++) {
898 test_pixels[i].r = source_image->
pixels[i].r;
899 test_pixels[i].g = source_image->
pixels[i].g;
900 test_pixels[i].b = source_image->
pixels[i].b;
901 }
902 } else {
903
904 float x_ratio = (float)source_image->
w / width;
905 float y_ratio = (float)source_image->
h / height;
906
907 for (int y = 0; y < height; y++) {
908 for (int x = 0; x < width; x++) {
909 int src_x = (int)(x * x_ratio);
910 int src_y = (int)(y * y_ratio);
911
912
913 if (src_x >= source_image->
w)
914 src_x = source_image->
w - 1;
915 if (src_y >= source_image->
h)
916 src_y = source_image->
h - 1;
917
918
919 size_t src_idx = (size_t)src_y * (
size_t)source_image->
w + (size_t)src_x;
920 size_t dst_idx = (size_t)y * (size_t)width + (size_t)x;
921
922 test_pixels[dst_idx].r = source_image->
pixels[src_idx].r;
923 test_pixels[dst_idx].g = source_image->
pixels[src_idx].g;
924 test_pixels[dst_idx].b = source_image->
pixels[src_idx].b;
925 }
926 }
927 }
928 } else {
929
930 printf("Using synthetic gradient data for COLOR %s %dx%d benchmarking with %d iterations...\n", mode_str, width,
931 height, adaptive_iterations);
932
933
934 srand(12345);
935 for (size_t i = 0; i < pixel_count; i++) {
936 int x = i % width;
937 int y = i / width;
938 int base_r = (x * 255) / width;
939 int base_g = (y * 255) / height;
940 int base_b = ((x + y) * 127) / (width + height);
941
942 int temp_r = base_r + (rand() % 32 - 16);
943 int temp_g = base_g + (rand() % 32 - 16);
944 int temp_b = base_b + (rand() % 32 - 16);
945
946 test_pixels[i].r = clamp_rgb(temp_r);
947 test_pixels[i].g = clamp_rgb(temp_g);
948 test_pixels[i].b = clamp_rgb(temp_b);
949 }
950 }
951
952 printf("Benchmarking COLOR %s conversion using %d iterations...\n", mode_str, adaptive_iterations);
953
954
957
958
959 double start = get_time_seconds();
960 for (int i = 0; i < adaptive_iterations; i++) {
962 if (test_image == NULL) {
965 FATAL(
ERROR_MEMORY,
"Failed to allocate test_image in benchmark iteration %d", i);
966 }
967 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
970 if (result_ascii)
973 }
975
976
979
980#if SIMD_SUPPORT_SSE2
981 start = get_time_seconds();
982 for (int i = 0; i < adaptive_iterations; i++) {
984 if (test_image) {
985 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
986 char *result_str =
987 render_ascii_sse2_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
988 if (result_str)
991 }
992 }
993 result.
sse2_time = get_time_seconds() - start;
994#endif
995
996#if SIMD_SUPPORT_SSSE3
997 start = get_time_seconds();
998 for (int i = 0; i < adaptive_iterations; i++) {
1000 if (test_image) {
1001 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
1002 char *result_str =
1003 render_ascii_ssse3_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1004 if (result_str)
1007 }
1008 }
1009 result.
ssse3_time = get_time_seconds() - start;
1010#endif
1011
1012#if SIMD_SUPPORT_AVX2
1013 start = get_time_seconds();
1014 for (int i = 0; i < adaptive_iterations; i++) {
1016 if (test_image) {
1017 memcpy(test_image->
pixels, test_pixels, pixel_count *
sizeof(rgb_pixel_t));
1018 char *result_str =
1019 render_ascii_avx2_unified_optimized(test_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1020 if (result_str)
1023 }
1024 }
1025 result.
avx2_time = get_time_seconds() - start;
1026#endif
1027
1028#if SIMD_SUPPORT_NEON
1029 start = get_time_seconds();
1030 for (int i = 0; i < adaptive_iterations; i++) {
1031
1033 char *result =
1034 render_ascii_neon_unified_optimized(&temp_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1035 if (result)
1037 }
1038 result.neon_time = get_time_seconds() - start;
1039#endif
1040
1041#if SIMD_SUPPORT_SVE
1042 start = get_time_seconds();
1043 for (int i = 0; i < adaptive_iterations; i++) {
1044
1046 char *result =
1047 render_ascii_sve_unified_optimized(&temp_image, background_mode, use_256color,
DEFAULT_ASCII_PALETTE);
1048 if (result)
1050 }
1051 result.sve_time = get_time_seconds() - start;
1052#endif
1053
1054#if SIMD_SUPPORT_SSE2
1055 if (result.sse2_time > 0 && result.sse2_time < best_time) {
1056 best_time = result.sse2_time;
1057 result.best_method = "SSE2";
1058 }
1059#endif
1060
1061#if SIMD_SUPPORT_SSSE3
1062 if (result.ssse3_time > 0 && result.ssse3_time < best_time) {
1063 best_time = result.ssse3_time;
1064 result.best_method = "SSSE3";
1065 }
1066#endif
1067
1068#if SIMD_SUPPORT_AVX2
1069 if (result.avx2_time > 0 && result.avx2_time < best_time) {
1070 best_time = result.avx2_time;
1071 result.best_method = "AVX2";
1072 }
1073#endif
1074
1075#if SIMD_SUPPORT_NEON
1076 if (result.neon_time > 0 && result.neon_time < best_time) {
1077 best_time = result.neon_time;
1078 result.best_method = "NEON";
1079 }
1080#endif
1081
1082
1083 result.scalar_time /= adaptive_iterations;
1084 if (result.sse2_time > 0)
1085 result.sse2_time /= adaptive_iterations;
1086 if (result.ssse3_time > 0)
1087 result.ssse3_time /= adaptive_iterations;
1088 if (result.avx2_time > 0)
1089 result.avx2_time /= adaptive_iterations;
1090 if (result.neon_time > 0)
1091 result.neon_time /= adaptive_iterations;
1092
1093 best_time = result.scalar_time;
1094
1095#if SIMD_SUPPORT_SSE2
1096 if (result.sse2_time > 0 && result.sse2_time < best_time)
1097 best_time = result.sse2_time;
1098#endif
1099#if SIMD_SUPPORT_SSSE3
1100 if (result.ssse3_time > 0 && result.ssse3_time < best_time)
1101 best_time = result.ssse3_time;
1102#endif
1103#if SIMD_SUPPORT_AVX2
1104 if (result.avx2_time > 0 && result.avx2_time < best_time)
1105 best_time = result.avx2_time;
1106#endif
1107#if SIMD_SUPPORT_NEON
1108 if (result.neon_time > 0 && result.neon_time < best_time)
1109 best_time = result.neon_time;
1110#endif
1111#if SIMD_SUPPORT_SVE
1112 if (result.sve_time > 0 && result.sve_time < best_time)
1113 best_time = result.sve_time;
1114#endif
1115
1116 result.speedup_best = result.scalar_time / best_time;
1117
1118 printf("------------\n");
1119 printf("scalar: %f\n", result.scalar_time);
1120 if (result.sse2_time > 0)
1121 printf("SSE2: %f\n", result.sse2_time);
1122 if (result.ssse3_time > 0)
1123 printf("SSSE3: %f\n", result.ssse3_time);
1124 if (result.avx2_time > 0)
1125 printf("avx2: %f\n", result.avx2_time);
1126 if (result.neon_time > 0)
1127 printf("neon: %f\n", result.neon_time);
1128 if (result.sve_time > 0)
1129 printf("sve: %f\n", result.sve_time);
1130 printf("Best method: %s, time: %f (%.2fx speedup (<1.0 = bad))\n", result.best_method, best_time,
1131 result.speedup_best);
1132 printf("------------\n");
1133
1134
1137
1138 return result;
1139}
#define SAFE_MALLOC(size, cast)
#define SAFE_CALLOC_SIMD(count, size, cast)
#define FATAL(code,...)
Exit with error code and custom message, with stack trace in debug builds.
#define log_error(...)
Log an ERROR message.
const char DEFAULT_ASCII_PALETTE[]
Default ASCII palette for legacy functions.
void prewarm_sgr256_cache(void)
Prewarm 256-color foreground/background cache for benchmarks.
char g_default_luminance_palette[256]
Default luminance palette (256 characters)
void prewarm_sgr256_fg_cache(void)
Prewarm 256-color foreground cache for benchmarks.
char * ascii_convert(image_t *original, const ssize_t width, const ssize_t height, const bool color, const bool _aspect_ratio, const bool stretch, const char *palette_chars, const char luminance_palette[256])
Convert image to ASCII art.
void image_destroy(image_t *p)
Destroy an image allocated with image_new()
image_t * image_new(size_t width, size_t height)
Create a new image with standard allocation.
@ IMAGE_ALLOC_SIMD
Pixels allocated with SAFE_MALLOC_SIMD()
int w
Image width in pixels (must be > 0)
int h
Image height in pixels (must be > 0)
rgb_pixel_t * pixels
Pixel data array (width * height RGB pixels, row-major order)
SIMD benchmark results structure.