ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
ascii_simd_color.c
Go to the documentation of this file.
1
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <time.h>
11#include <assert.h>
13#include <stdint.h>
14#include <stdbool.h>
15#include "ascii_simd.h"
16
17#include "common.h"
18#include "../image.h"
19#include "video/palette.h"
20#include "util/number.h" // For write_u8
21
22/* ============================================================================
23 * SIMD-Optimized Colored ASCII Generation
24 *
25 * This extends the basic SIMD luminance conversion to include full
26 * ANSI color code generation for maximum performance.
27 * ============================================================================
28 */
29
30// Background ASCII luminance threshold - same as NEON version
31#ifndef BGASCII_LUMA_THRESHOLD
32#define BGASCII_LUMA_THRESHOLD 128 // Y >= 128 -> black text; else white text
33#endif
34
35#ifndef CUBE_GRAY_THRESHOLD
36#define CUBE_GRAY_THRESHOLD 10
37#endif
38
39/* ============================================================================
40 * 256-Color ANSI Escape Sequence Generation (inline, no cache)
41 * ============================================================================
42 * Generates ANSI sequences on-demand. Modern CPUs execute this in ~10-20ns,
43 * which is negligible compared to terminal I/O (microseconds).
44 */
45
46/* write_u8() is now in util/number.h */
47
48// Generate "\e[38;5;NNNm" (foreground only)
49static inline char *build_sgr256_fg(char *buf, uint8_t fg, uint8_t *len_out) {
50 char *p = buf;
51 *p++ = '\033';
52 *p++ = '[';
53 *p++ = '3';
54 *p++ = '8';
55 *p++ = ';';
56 *p++ = '5';
57 *p++ = ';';
58 p = write_u8(p, fg);
59 *p++ = 'm';
60 *len_out = (uint8_t)(p - buf);
61 return buf;
62}
63
64// Generate "\e[38;5;NNN;48;5;NNNm" (foreground + background)
65static inline char *build_sgr256_fgbg(char *buf, uint8_t fg, uint8_t bg, uint8_t *len_out) {
66 char *p = buf;
67 *p++ = '\033';
68 *p++ = '[';
69 *p++ = '3';
70 *p++ = '8';
71 *p++ = ';';
72 *p++ = '5';
73 *p++ = ';';
74 p = write_u8(p, fg);
75 *p++ = ';';
76 *p++ = '4';
77 *p++ = '8';
78 *p++ = ';';
79 *p++ = '5';
80 *p++ = ';';
81 p = write_u8(p, bg);
82 *p++ = 'm';
83 *len_out = (uint8_t)(p - buf);
84 return buf;
85}
86
87// Public API wrappers
89 // No-op: cache removed
90}
91
93 // No-op: cache removed
94}
95
96// Fast SGR generation for SIMD implementations
98 static __thread char buf[16]; // Thread-local buffer
99 return build_sgr256_fg(buf, fg, len_out);
100}
101
103 static __thread char buf[32]; // Thread-local buffer
104 return build_sgr256_fgbg(buf, fg, bg, len_out);
105}
106
107inline char *append_sgr_reset(char *dst) {
108 // "\x1b[0m"
109 static const char RESET[] = "\033[0m";
110 memcpy(dst, RESET, sizeof(RESET) - 1);
111 return dst + (sizeof(RESET) - 1);
112}
113
114// OPTIMIZATION 9: Direct writes instead of memcpy - \x1b[38;2;R;G;Bm
115inline char *append_sgr_truecolor_fg(char *dst, uint8_t r, uint8_t g, uint8_t b) {
116 // Constructor ensures initialization
117
118 // Direct character writes (compiler will optimize to word operations)
119 *dst++ = '\033';
120 *dst++ = '[';
121 *dst++ = '3';
122 *dst++ = '8';
123 *dst++ = ';';
124 *dst++ = '2';
125 *dst++ = ';';
126
127 // Fast digit copying for 1-3 digit numbers (avoid memcpy overhead)
128 const dec3_t *rd = &g_dec3_cache.dec3_table[r];
129 if (rd->len == 1) {
130 *dst++ = rd->s[0];
131 } else if (rd->len == 2) {
132 dst[0] = rd->s[0];
133 dst[1] = rd->s[1];
134 dst += 2;
135 } else {
136 dst[0] = rd->s[0];
137 dst[1] = rd->s[1];
138 dst[2] = rd->s[2];
139 dst += 3;
140 }
141 *dst++ = ';';
142
143 const dec3_t *gd = &g_dec3_cache.dec3_table[g];
144 if (gd->len == 1) {
145 *dst++ = gd->s[0];
146 } else if (gd->len == 2) {
147 dst[0] = gd->s[0];
148 dst[1] = gd->s[1];
149 dst += 2;
150 } else {
151 dst[0] = gd->s[0];
152 dst[1] = gd->s[1];
153 dst[2] = gd->s[2];
154 dst += 3;
155 }
156 *dst++ = ';';
157
158 const dec3_t *bd = &g_dec3_cache.dec3_table[b];
159 if (bd->len == 1) {
160 *dst++ = bd->s[0];
161 } else if (bd->len == 2) {
162 dst[0] = bd->s[0];
163 dst[1] = bd->s[1];
164 dst += 2;
165 } else {
166 dst[0] = bd->s[0];
167 dst[1] = bd->s[1];
168 dst[2] = bd->s[2];
169 dst += 3;
170 }
171 *dst++ = 'm';
172 return dst;
173}
174
175// OPTIMIZATION 9: Direct writes - \x1b[48;2;R;G;Bm
176inline char *append_sgr_truecolor_bg(char *dst, uint8_t r, uint8_t g, uint8_t b) {
177 // Constructor ensures initialization
178
179 // Direct character writes for "\033[48;2;"
180 *dst++ = '\033';
181 *dst++ = '[';
182 *dst++ = '4';
183 *dst++ = '8';
184 *dst++ = ';';
185 *dst++ = '2';
186 *dst++ = ';';
187
188 // Optimized digit copying
189 const dec3_t *rd = &g_dec3_cache.dec3_table[r];
190 if (rd->len == 1) {
191 *dst++ = rd->s[0];
192 } else if (rd->len == 2) {
193 dst[0] = rd->s[0];
194 dst[1] = rd->s[1];
195 dst += 2;
196 } else {
197 dst[0] = rd->s[0];
198 dst[1] = rd->s[1];
199 dst[2] = rd->s[2];
200 dst += 3;
201 }
202 *dst++ = ';';
203
204 const dec3_t *gd = &g_dec3_cache.dec3_table[g];
205 if (gd->len == 1) {
206 *dst++ = gd->s[0];
207 } else if (gd->len == 2) {
208 dst[0] = gd->s[0];
209 dst[1] = gd->s[1];
210 dst += 2;
211 } else {
212 dst[0] = gd->s[0];
213 dst[1] = gd->s[1];
214 dst[2] = gd->s[2];
215 dst += 3;
216 }
217 *dst++ = ';';
218
219 const dec3_t *bd = &g_dec3_cache.dec3_table[b];
220 if (bd->len == 1) {
221 *dst++ = bd->s[0];
222 } else if (bd->len == 2) {
223 dst[0] = bd->s[0];
224 dst[1] = bd->s[1];
225 dst += 2;
226 } else {
227 dst[0] = bd->s[0];
228 dst[1] = bd->s[1];
229 dst[2] = bd->s[2];
230 dst += 3;
231 }
232 *dst++ = 'm';
233 return dst;
234}
235
236// OPTIMIZATION 9: Optimized FG+BG - \x1b[38;2;R;G;B;48;2;r;g;bm (eliminate all memcpy calls)
237inline char *append_sgr_truecolor_fg_bg(char *dst, uint8_t fr, uint8_t fg, uint8_t fb, uint8_t br, uint8_t bg,
238 uint8_t bb) {
239 // Constructor ensures initialization
240
241 // Write "\033[38;2;" directly (7 chars)
242 *dst++ = '\033';
243 *dst++ = '[';
244 *dst++ = '3';
245 *dst++ = '8';
246 *dst++ = ';';
247 *dst++ = '2';
248 *dst++ = ';';
249
250 // Foreground RGB digits
251 const dec3_t *d = &g_dec3_cache.dec3_table[fr];
252 if (d->len == 1) {
253 *dst++ = d->s[0];
254 } else if (d->len == 2) {
255 dst[0] = d->s[0];
256 dst[1] = d->s[1];
257 dst += 2;
258 } else {
259 dst[0] = d->s[0];
260 dst[1] = d->s[1];
261 dst[2] = d->s[2];
262 dst += 3;
263 }
264 *dst++ = ';';
265
266 d = &g_dec3_cache.dec3_table[fg];
267 if (d->len == 1) {
268 *dst++ = d->s[0];
269 } else if (d->len == 2) {
270 dst[0] = d->s[0];
271 dst[1] = d->s[1];
272 dst += 2;
273 } else {
274 dst[0] = d->s[0];
275 dst[1] = d->s[1];
276 dst[2] = d->s[2];
277 dst += 3;
278 }
279 *dst++ = ';';
280
281 d = &g_dec3_cache.dec3_table[fb];
282 if (d->len == 1) {
283 *dst++ = d->s[0];
284 } else if (d->len == 2) {
285 dst[0] = d->s[0];
286 dst[1] = d->s[1];
287 dst += 2;
288 } else {
289 dst[0] = d->s[0];
290 dst[1] = d->s[1];
291 dst[2] = d->s[2];
292 dst += 3;
293 }
294
295 // Write ";48;2;" directly (6 chars)
296 *dst++ = ';';
297 *dst++ = '4';
298 *dst++ = '8';
299 *dst++ = ';';
300 *dst++ = '2';
301 *dst++ = ';';
302
303 // Background RGB digits
304 d = &g_dec3_cache.dec3_table[br];
305 if (d->len == 1) {
306 *dst++ = d->s[0];
307 } else if (d->len == 2) {
308 dst[0] = d->s[0];
309 dst[1] = d->s[1];
310 dst += 2;
311 } else {
312 dst[0] = d->s[0];
313 dst[1] = d->s[1];
314 dst[2] = d->s[2];
315 dst += 3;
316 }
317 *dst++ = ';';
318
319 d = &g_dec3_cache.dec3_table[bg];
320 if (d->len == 1) {
321 *dst++ = d->s[0];
322 } else if (d->len == 2) {
323 dst[0] = d->s[0];
324 dst[1] = d->s[1];
325 dst += 2;
326 } else {
327 dst[0] = d->s[0];
328 dst[1] = d->s[1];
329 dst[2] = d->s[2];
330 dst += 3;
331 }
332 *dst++ = ';';
333
334 d = &g_dec3_cache.dec3_table[bb];
335 if (d->len == 1) {
336 *dst++ = d->s[0];
337 } else if (d->len == 2) {
338 dst[0] = d->s[0];
339 dst[1] = d->s[1];
340 dst += 2;
341 } else {
342 dst[0] = d->s[0];
343 dst[1] = d->s[1];
344 dst[2] = d->s[2];
345 dst += 3;
346 }
347
348 *dst++ = 'm';
349 return dst;
350}
351
352// Legacy wrapper functions for backward compatibility
353static inline int __attribute__((unused)) generate_ansi_fg(uint8_t r, uint8_t g, uint8_t b, char *dst) {
354 char *result = append_sgr_truecolor_fg(dst, r, g, b);
355 return (int)(result - dst);
356}
357
358static inline int __attribute__((unused)) generate_ansi_bg(uint8_t r, uint8_t g, uint8_t b, char *dst) {
359 char *result = append_sgr_truecolor_bg(dst, r, g, b);
360 return (int)(result - dst);
361}
362
363/* ============================================================================
364 * All platform-specific implementations moved to lib/video/simd/
365 * ============================================================================
366 */
367
368// Row-based scalar function removed - use image_print_color() instead
369
370/* ============================================================================
371 * OPTIMIZATION #4: Fast 256-color implementations (defined after SGR functions)
372 * ============================================================================
373 */
374
375char *image_print_color_simd(image_t *image, bool use_background_mode, bool use_256color, const char *ascii_chars) {
376 (void)use_256color; // Suppress unused parameter warning when SIMD not available
377
378#if SIMD_SUPPORT_AVX2
379 (void)use_background_mode; // Suppress unused parameter warning when SIMD not available
380 // FIXME: my AVX2 implementation is dim and has vertical stripe artifacts. Use scalar until we fix it.
381 return image_print_color(image, ascii_chars);
382 // return render_ascii_avx2_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
383#elif SIMD_SUPPORT_SSSE3
384 return render_ascii_ssse3_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
385#elif SIMD_SUPPORT_SSE2
386 return render_ascii_sse2_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
387#elif SIMD_SUPPORT_NEON
388 return render_ascii_neon_unified_optimized(image, use_background_mode, use_256color, ascii_chars);
389#else
390 // Fallback implementation for non-NEON platforms
391 // Use scalar image function for fallback path - no SIMD allocation needed
392 (void)use_background_mode; // Suppress unused parameter warning
393 return image_print_color(image, ascii_chars);
394#endif
395}
🔌 Cross-platform abstraction layer umbrella header for ascii-chat
SIMD-optimized ASCII conversion interface.
unsigned char uint8_t
Definition common.h:56
global_dec3_cache_t g_dec3_cache
Global decimal cache instance.
Definition ascii_simd.c:23
dec3_t dec3_table[256]
Definition ascii_simd.h:98
char * append_sgr_reset(char *dst)
Append ANSI reset sequence.
char * append_sgr_truecolor_fg_bg(char *dst, uint8_t fr, uint8_t fg, uint8_t fb, uint8_t br, uint8_t bg, uint8_t bb)
Append truecolor foreground and background SGR sequence.
char * get_sgr256_fg_bg_string(uint8_t fg, uint8_t bg, uint8_t *len_out)
Get 256-color foreground/background ANSI sequence string.
uint8_t len
Definition ascii_simd.h:88
char * image_print_color(const image_t *p, const char *palette)
Print image as ASCII art with color.
void prewarm_sgr256_cache(void)
Prewarm 256-color foreground/background cache for benchmarks.
void prewarm_sgr256_fg_cache(void)
Prewarm 256-color foreground cache for benchmarks.
char * append_sgr_truecolor_bg(char *dst, uint8_t r, uint8_t g, uint8_t b)
Append truecolor background SGR sequence.
char * get_sgr256_fg_string(uint8_t fg, uint8_t *len_out)
Get 256-color foreground ANSI sequence string.
char s[3]
Definition ascii_simd.h:89
char * append_sgr_truecolor_fg(char *dst, uint8_t r, uint8_t g, uint8_t b)
Append truecolor foreground SGR sequence.
char * image_print_color_simd(image_t *image, bool use_background_mode, bool use_256color, const char *ascii_chars)
Print image as ASCII with color using SIMD.
🔢 Number Formatting and Conversion Utilities
ASCII Palette Management for Video-to-ASCII Conversion.
RGB pixel structure.
Definition video/image.h:80
Decimal conversion cache structure (1-3 digits)
Definition ascii_simd.h:87
Image structure.
⏱️ High-precision timing utilities using sokol_time.h and uthash
Common SIMD utilities and structures.