ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
ascii_simd_color.c
Go to the documentation of this file.
1
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <time.h>
11#include <assert.h>
12#include <ascii-chat/platform/abstraction.h>
13#include <stdint.h>
14#include <stdbool.h>
15#include <ascii-chat/video/simd/ascii_simd.h>
16
17#include <ascii-chat/common.h>
18#include <ascii-chat/video/image.h>
19#include <ascii-chat/video/palette.h>
20#include <ascii-chat/util/number.h> // For write_u8
21#include <ascii-chat/util/time.h>
22#include <ascii-chat/log/logging.h>
23
24/* ============================================================================
25 * SIMD-Optimized Colored ASCII Generation
26 *
27 * This extends the basic SIMD luminance conversion to include full
28 * ANSI color code generation for maximum performance.
29 * ============================================================================
30 */
31
32// Background ASCII luminance threshold - same as NEON version
33#ifndef BGASCII_LUMA_THRESHOLD
34#define BGASCII_LUMA_THRESHOLD 128 // Y >= 128 -> black text; else white text
35#endif
36
37#ifndef CUBE_GRAY_THRESHOLD
38#define CUBE_GRAY_THRESHOLD 10
39#endif
40
41/* ============================================================================
42 * 256-Color ANSI Escape Sequence Generation (cached)
43 * ============================================================================
44 * Pre-generates all 256 color sequences at startup and caches them.
45 * This avoids repeated generation during per-pixel rendering.
46 */
47
48/* write_u8() is now in util/number.h */
49
53typedef struct {
54 char seq[12];
55 uint8_t len;
57
58static sgr256_seq_t sgr256_fg_cache[256];
59static sgr256_seq_t sgr256_bg_cache[256];
60static bool sgr256_cache_initialized = false;
61
62// Build and cache all 256 foreground and background color sequences
63static void init_sgr256_cache(void) {
64 if (sgr256_cache_initialized)
65 return;
66
67 // Cache foreground colors: \e[38;5;NNNm
68 for (int i = 0; i < 256; i++) {
69 char *p = sgr256_fg_cache[i].seq;
70 *p++ = '\033';
71 *p++ = '[';
72 *p++ = '3';
73 *p++ = '8';
74 *p++ = ';';
75 *p++ = '5';
76 *p++ = ';';
77 p = write_u8(p, (uint8_t)i);
78 *p++ = 'm';
79 sgr256_fg_cache[i].len = (uint8_t)(p - sgr256_fg_cache[i].seq);
80 }
81
82 // Cache background colors: \e[48;5;NNNm
83 for (int i = 0; i < 256; i++) {
84 char *p = sgr256_bg_cache[i].seq;
85 *p++ = '\033';
86 *p++ = '[';
87 *p++ = '4';
88 *p++ = '8';
89 *p++ = ';';
90 *p++ = '5';
91 *p++ = ';';
92 p = write_u8(p, (uint8_t)i);
93 *p++ = 'm';
94 sgr256_bg_cache[i].len = (uint8_t)(p - sgr256_bg_cache[i].seq);
95 }
96
97 sgr256_cache_initialized = true;
98}
99
100// Generate "\e[38;5;NNN;48;5;NNNm" (foreground + background)
101static inline char *build_sgr256_fgbg(char *buf, uint8_t fg, uint8_t bg, uint8_t *len_out) {
102 char *p = buf;
103 *p++ = '\033';
104 *p++ = '[';
105 *p++ = '3';
106 *p++ = '8';
107 *p++ = ';';
108 *p++ = '5';
109 *p++ = ';';
110 p = write_u8(p, fg);
111 *p++ = ';';
112 *p++ = '4';
113 *p++ = '8';
114 *p++ = ';';
115 *p++ = '5';
116 *p++ = ';';
117 p = write_u8(p, bg);
118 *p++ = 'm';
119 *len_out = (uint8_t)(p - buf);
120 return buf;
121}
122
123// Public API wrappers
125 init_sgr256_cache();
126}
127
129 init_sgr256_cache();
130}
131
132// Fast SGR generation for SIMD implementations - uses cached sequences
133char *get_sgr256_fg_string(uint8_t fg, uint8_t *len_out) {
134 if (!sgr256_cache_initialized) {
135 init_sgr256_cache();
136 }
137 *len_out = sgr256_fg_cache[fg].len;
138 return sgr256_fg_cache[fg].seq;
139}
140
141char *get_sgr256_bg_string(uint8_t bg, uint8_t *len_out) {
142 if (!sgr256_cache_initialized) {
143 init_sgr256_cache();
144 }
145 *len_out = sgr256_bg_cache[bg].len;
146 return sgr256_bg_cache[bg].seq;
147}
148
149char *get_sgr256_fg_bg_string(uint8_t fg, uint8_t bg, uint8_t *len_out) {
150 // For FG+BG, still build on-demand since we'd need 256*256 cache
151 static __thread char buf[32];
152 return build_sgr256_fgbg(buf, fg, bg, len_out);
153}
154
155inline char *append_sgr_reset(char *dst) {
156 // "\x1b[0m"
157 static const char RESET[] = "\033[0m";
158 memcpy(dst, RESET, sizeof(RESET) - 1);
159 return dst + (sizeof(RESET) - 1);
160}
161
162// OPTIMIZATION 9: Direct writes instead of memcpy - \x1b[38;2;R;G;Bm
163inline char *append_sgr_truecolor_fg(char *dst, uint8_t r, uint8_t g, uint8_t b) {
164 // Constructor ensures initialization
165
166 // Direct character writes (compiler will optimize to word operations)
167 *dst++ = '\033';
168 *dst++ = '[';
169 *dst++ = '3';
170 *dst++ = '8';
171 *dst++ = ';';
172 *dst++ = '2';
173 *dst++ = ';';
174
175 // Fast digit copying for 1-3 digit numbers (avoid memcpy overhead)
176 const dec3_t *rd = &g_dec3_cache.dec3_table[r];
177 if (rd->len == 1) {
178 *dst++ = rd->s[0];
179 } else if (rd->len == 2) {
180 dst[0] = rd->s[0];
181 dst[1] = rd->s[1];
182 dst += 2;
183 } else {
184 dst[0] = rd->s[0];
185 dst[1] = rd->s[1];
186 dst[2] = rd->s[2];
187 dst += 3;
188 }
189 *dst++ = ';';
190
191 const dec3_t *gd = &g_dec3_cache.dec3_table[g];
192 if (gd->len == 1) {
193 *dst++ = gd->s[0];
194 } else if (gd->len == 2) {
195 dst[0] = gd->s[0];
196 dst[1] = gd->s[1];
197 dst += 2;
198 } else {
199 dst[0] = gd->s[0];
200 dst[1] = gd->s[1];
201 dst[2] = gd->s[2];
202 dst += 3;
203 }
204 *dst++ = ';';
205
206 const dec3_t *bd = &g_dec3_cache.dec3_table[b];
207 if (bd->len == 1) {
208 *dst++ = bd->s[0];
209 } else if (bd->len == 2) {
210 dst[0] = bd->s[0];
211 dst[1] = bd->s[1];
212 dst += 2;
213 } else {
214 dst[0] = bd->s[0];
215 dst[1] = bd->s[1];
216 dst[2] = bd->s[2];
217 dst += 3;
218 }
219 *dst++ = 'm';
220 return dst;
221}
222
223// OPTIMIZATION 9: Direct writes - \x1b[48;2;R;G;Bm
224inline char *append_sgr_truecolor_bg(char *dst, uint8_t r, uint8_t g, uint8_t b) {
225 // Constructor ensures initialization
226
227 // Direct character writes for "\033[48;2;"
228 *dst++ = '\033';
229 *dst++ = '[';
230 *dst++ = '4';
231 *dst++ = '8';
232 *dst++ = ';';
233 *dst++ = '2';
234 *dst++ = ';';
235
236 // Optimized digit copying
237 const dec3_t *rd = &g_dec3_cache.dec3_table[r];
238 if (rd->len == 1) {
239 *dst++ = rd->s[0];
240 } else if (rd->len == 2) {
241 dst[0] = rd->s[0];
242 dst[1] = rd->s[1];
243 dst += 2;
244 } else {
245 dst[0] = rd->s[0];
246 dst[1] = rd->s[1];
247 dst[2] = rd->s[2];
248 dst += 3;
249 }
250 *dst++ = ';';
251
252 const dec3_t *gd = &g_dec3_cache.dec3_table[g];
253 if (gd->len == 1) {
254 *dst++ = gd->s[0];
255 } else if (gd->len == 2) {
256 dst[0] = gd->s[0];
257 dst[1] = gd->s[1];
258 dst += 2;
259 } else {
260 dst[0] = gd->s[0];
261 dst[1] = gd->s[1];
262 dst[2] = gd->s[2];
263 dst += 3;
264 }
265 *dst++ = ';';
266
267 const dec3_t *bd = &g_dec3_cache.dec3_table[b];
268 if (bd->len == 1) {
269 *dst++ = bd->s[0];
270 } else if (bd->len == 2) {
271 dst[0] = bd->s[0];
272 dst[1] = bd->s[1];
273 dst += 2;
274 } else {
275 dst[0] = bd->s[0];
276 dst[1] = bd->s[1];
277 dst[2] = bd->s[2];
278 dst += 3;
279 }
280 *dst++ = 'm';
281 return dst;
282}
283
284// OPTIMIZATION 9: Optimized FG+BG - \x1b[38;2;R;G;B;48;2;r;g;bm (eliminate all memcpy calls)
285inline char *append_sgr_truecolor_fg_bg(char *dst, uint8_t fr, uint8_t fg, uint8_t fb, uint8_t br, uint8_t bg,
286 uint8_t bb) {
287 // Constructor ensures initialization
288
289 // Write "\033[38;2;" directly (7 chars)
290 *dst++ = '\033';
291 *dst++ = '[';
292 *dst++ = '3';
293 *dst++ = '8';
294 *dst++ = ';';
295 *dst++ = '2';
296 *dst++ = ';';
297
298 // Foreground RGB digits
299 const dec3_t *d = &g_dec3_cache.dec3_table[fr];
300 if (d->len == 1) {
301 *dst++ = d->s[0];
302 } else if (d->len == 2) {
303 dst[0] = d->s[0];
304 dst[1] = d->s[1];
305 dst += 2;
306 } else {
307 dst[0] = d->s[0];
308 dst[1] = d->s[1];
309 dst[2] = d->s[2];
310 dst += 3;
311 }
312 *dst++ = ';';
313
314 d = &g_dec3_cache.dec3_table[fg];
315 if (d->len == 1) {
316 *dst++ = d->s[0];
317 } else if (d->len == 2) {
318 dst[0] = d->s[0];
319 dst[1] = d->s[1];
320 dst += 2;
321 } else {
322 dst[0] = d->s[0];
323 dst[1] = d->s[1];
324 dst[2] = d->s[2];
325 dst += 3;
326 }
327 *dst++ = ';';
328
329 d = &g_dec3_cache.dec3_table[fb];
330 if (d->len == 1) {
331 *dst++ = d->s[0];
332 } else if (d->len == 2) {
333 dst[0] = d->s[0];
334 dst[1] = d->s[1];
335 dst += 2;
336 } else {
337 dst[0] = d->s[0];
338 dst[1] = d->s[1];
339 dst[2] = d->s[2];
340 dst += 3;
341 }
342
343 // Write ";48;2;" directly (6 chars)
344 *dst++ = ';';
345 *dst++ = '4';
346 *dst++ = '8';
347 *dst++ = ';';
348 *dst++ = '2';
349 *dst++ = ';';
350
351 // Background RGB digits
352 d = &g_dec3_cache.dec3_table[br];
353 if (d->len == 1) {
354 *dst++ = d->s[0];
355 } else if (d->len == 2) {
356 dst[0] = d->s[0];
357 dst[1] = d->s[1];
358 dst += 2;
359 } else {
360 dst[0] = d->s[0];
361 dst[1] = d->s[1];
362 dst[2] = d->s[2];
363 dst += 3;
364 }
365 *dst++ = ';';
366
367 d = &g_dec3_cache.dec3_table[bg];
368 if (d->len == 1) {
369 *dst++ = d->s[0];
370 } else if (d->len == 2) {
371 dst[0] = d->s[0];
372 dst[1] = d->s[1];
373 dst += 2;
374 } else {
375 dst[0] = d->s[0];
376 dst[1] = d->s[1];
377 dst[2] = d->s[2];
378 dst += 3;
379 }
380 *dst++ = ';';
381
382 d = &g_dec3_cache.dec3_table[bb];
383 if (d->len == 1) {
384 *dst++ = d->s[0];
385 } else if (d->len == 2) {
386 dst[0] = d->s[0];
387 dst[1] = d->s[1];
388 dst += 2;
389 } else {
390 dst[0] = d->s[0];
391 dst[1] = d->s[1];
392 dst[2] = d->s[2];
393 dst += 3;
394 }
395
396 *dst++ = 'm';
397 return dst;
398}
399
400/* ============================================================================
401 * All platform-specific implementations moved to lib/video/simd/
402 * ============================================================================
403 */
404
405// Row-based scalar function removed - use image_print_color() instead
406
407/* ============================================================================
408 * OPTIMIZATION #4: Fast 256-color implementations (defined after SGR functions)
409 * ============================================================================
410 */
411
412char *image_print_color_simd(image_t *image, bool use_background_mode, bool use_256color, const char *ascii_chars) {
413 log_dev_every(4500 * US_PER_MS_INT, "image_print_color_simd called: width=%d, height=%d, use_256color=%d",
414 image ? image->w : -1, image ? image->h : -1, use_256color);
415
416#if SIMD_SUPPORT_AVX2
417 log_debug_every(10 * US_PER_SEC_INT, "Taking AVX2 path: width=%d, height=%d", image->w, image->h);
418 START_TIMER("render_avx2");
419 char *result = render_ascii_avx2_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
420 STOP_TIMER_AND_LOG_EVERY(dev, 3 * NS_PER_SEC_INT, 5 * NS_PER_MS_INT, "render_avx2", "RENDER_AVX2: Complete");
421 return result;
422#elif SIMD_SUPPORT_SSSE3
423 log_info_every(10 * US_PER_SEC_INT, "WASM: Taking SSSE3 path with use_256color=%d", use_256color);
424 START_TIMER("render_ssse3");
425 char *result = render_ascii_ssse3_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
426 STOP_TIMER_AND_LOG_EVERY(dev, 3 * NS_PER_SEC_INT, 5 * NS_PER_MS_INT, "render_ssse3", "RENDER_SSSE3: Complete");
427 return result;
428#elif SIMD_SUPPORT_SSE2
429 log_info_every(10 * US_PER_SEC_INT, "WASM: Taking SSE2 path with use_256color=%d", use_256color);
430 START_TIMER("render_sse2");
431 char *result = render_ascii_sse2_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
432 STOP_TIMER_AND_LOG_EVERY(dev, 3 * NS_PER_SEC_INT, 5 * NS_PER_MS_INT, "render_sse2", "RENDER_SSE2: Complete");
433 return result;
434#elif SIMD_SUPPORT_NEON
435 log_info_every(10 * US_PER_SEC_INT, "WASM: Taking NEON path with use_256color=%d", use_256color);
436 START_TIMER("render_neon");
437 char *result = render_ascii_neon_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
438 STOP_TIMER_AND_LOG_EVERY(dev, 3 * NS_PER_SEC_INT, 5 * NS_PER_MS_INT, "render_neon", "RENDER_NEON: Complete");
439 return result;
440#else
441 log_info_every(10 * US_PER_SEC_INT, "WASM: Taking FALLBACK path (no SIMD), use_256color=%d is IGNORED", use_256color);
442 // Fallback implementation for non-SIMD platforms
443 // Use scalar image function for fallback path - no SIMD allocation needed
444 (void)use_256color; // Suppress unused parameter warning
445 (void)use_background_mode; // Suppress unused parameter warning
446 START_TIMER("render_color_fallback");
447 char *result = image_print_color(image, ascii_chars);
448 STOP_TIMER_AND_LOG_EVERY(dev, 3 * NS_PER_SEC_INT, 5 * NS_PER_MS_INT, "render_color_fallback",
449 "RENDER_COLOR_FALLBACK: Complete");
450 return result;
451#endif
452}
global_dec3_cache_t g_dec3_cache
Definition ascii_simd.c:25
char * append_sgr_reset(char *dst)
char * append_sgr_truecolor_fg_bg(char *dst, uint8_t fr, uint8_t fg, uint8_t fb, uint8_t br, uint8_t bg, uint8_t bb)
char * get_sgr256_fg_bg_string(uint8_t fg, uint8_t bg, uint8_t *len_out)
void prewarm_sgr256_cache(void)
void prewarm_sgr256_fg_cache(void)
char * append_sgr_truecolor_bg(char *dst, uint8_t r, uint8_t g, uint8_t b)
char * get_sgr256_bg_string(uint8_t bg, uint8_t *len_out)
char * get_sgr256_fg_string(uint8_t fg, uint8_t *len_out)
char * append_sgr_truecolor_fg(char *dst, uint8_t r, uint8_t g, uint8_t b)
char * image_print_color_simd(image_t *image, bool use_background_mode, bool use_256color, const char *ascii_chars)
Pre-computed 256-color ANSI SGR sequence.
char seq[12]
ANSI sequence string (max 11 bytes for "\e[38;5;NNNm")
uint8_t len
Length of sequence string.
char * image_print_color(const image_t *p, const char *palette)