ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
discovery/strings.c
Go to the documentation of this file.
1
12#include <ascii-chat/discovery/strings.h>
13#include <ascii-chat/discovery/adjectives.h>
14#include <ascii-chat/discovery/nouns.h>
15#include <ascii-chat/log/logging.h>
16#include <ascii-chat/util/utf8.h>
17// NOTE: Use explicit path to avoid Windows include resolution picking up options/common.h
18#include <ascii-chat/common.h>
19#include <ascii-chat/platform/init.h>
20#include <ascii-chat/uthash/uthash.h>
21#include <sodium.h>
22#include <string.h>
23#include <ctype.h>
24#include <stdlib.h>
25#include <ascii-chat/util/pcre2.h>
26#include <pcre2.h>
27
28// ============================================================================
29// Word Cache Implementation (Hashtable for O(1) validation)
30// ============================================================================
31
35typedef struct {
36 char *word;
37 UT_hash_handle hh;
39
43static word_cache_entry_t *g_adjectives_cache = NULL;
44static word_cache_entry_t *g_nouns_cache = NULL;
45static bool g_cache_initialized = false;
46// Mutex to protect lazy initialization of word validation caches
47static static_mutex_t g_cache_init_mutex = STATIC_MUTEX_INIT;
48
55 if (!g_cache_initialized) {
56 return;
57 }
58
59 // Cleanup adjectives cache
60 word_cache_entry_t *adj_entry, *adj_tmp;
61 HASH_ITER(hh, g_adjectives_cache, adj_entry, adj_tmp) {
62 HASH_DEL(g_adjectives_cache, adj_entry);
63 SAFE_FREE(adj_entry->word);
64 SAFE_FREE(adj_entry);
65 }
66 g_adjectives_cache = NULL;
67
68 // Cleanup nouns cache
69 word_cache_entry_t *noun_entry, *noun_tmp;
70 HASH_ITER(hh, g_nouns_cache, noun_entry, noun_tmp) {
71 HASH_DEL(g_nouns_cache, noun_entry);
72 SAFE_FREE(noun_entry->word);
73 SAFE_FREE(noun_entry);
74 }
75 g_nouns_cache = NULL;
76
77 g_cache_initialized = false;
78 log_dev("Session string word cache cleaned up");
79}
80
88static asciichat_error_t build_validation_caches(void) {
89 static_mutex_lock(&g_cache_init_mutex);
90
91 // Double-check under lock: another thread may have already built while we waited
92 if (g_cache_initialized) {
93 static_mutex_unlock(&g_cache_init_mutex);
94 return ASCIICHAT_OK;
95 }
96
97 // Build adjectives cache
98 for (size_t i = 0; i < adjectives_count; i++) {
99 word_cache_entry_t *entry = SAFE_MALLOC(sizeof(word_cache_entry_t), word_cache_entry_t *);
100 if (!entry) {
101 static_mutex_unlock(&g_cache_init_mutex);
103 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate adjectives cache entry");
104 }
105
106 size_t word_len = strlen(adjectives[i]) + 1;
107 entry->word = SAFE_MALLOC(word_len, char *);
108 if (!entry->word) {
109 SAFE_FREE(entry);
110 static_mutex_unlock(&g_cache_init_mutex);
112 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate memory for adjective word");
113 }
114 memcpy(entry->word, adjectives[i], word_len);
115
116 HASH_ADD_KEYPTR(hh, g_adjectives_cache, entry->word, strlen(entry->word), entry);
117 }
118
119 // Build nouns cache
120 for (size_t i = 0; i < nouns_count; i++) {
121 word_cache_entry_t *entry = SAFE_MALLOC(sizeof(word_cache_entry_t), word_cache_entry_t *);
122 if (!entry) {
123 static_mutex_unlock(&g_cache_init_mutex);
125 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate nouns cache entry");
126 }
127
128 size_t word_len = strlen(nouns[i]) + 1;
129 entry->word = SAFE_MALLOC(word_len, char *);
130 if (!entry->word) {
131 SAFE_FREE(entry);
132 static_mutex_unlock(&g_cache_init_mutex);
134 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate memory for noun word");
135 }
136 memcpy(entry->word, nouns[i], word_len);
137
138 HASH_ADD_KEYPTR(hh, g_nouns_cache, entry->word, strlen(entry->word), entry);
139 }
140
141 g_cache_initialized = true;
142
143 // NOTE: Cleanup is now handled by asciichat_shared_destroy() called from application code.
144 // Library code does not call atexit() - that's the application's responsibility.
145
146 static_mutex_unlock(&g_cache_init_mutex);
147 log_dev("Session string word cache initialized (%zu adjectives, %zu nouns)", adjectives_count, nouns_count);
148 return ASCIICHAT_OK;
149}
150
151// ============================================================================
152// PCRE2 Session String Format Validator
153// ============================================================================
154
168static const char *SESSION_STRING_FORMAT_PATTERN = "^(?<adj>[a-z]{2,12})-(?<noun1>[a-z]{2,12})-(?<noun2>[a-z]{2,12})$";
169
170static pcre2_singleton_t *g_session_format_regex = NULL;
171
176static pcre2_code *session_format_regex_get(void) {
177 if (g_session_format_regex == NULL) {
178 g_session_format_regex = asciichat_pcre2_singleton_compile(SESSION_STRING_FORMAT_PATTERN, PCRE2_CASELESS);
179 }
180 return asciichat_pcre2_singleton_get_code(g_session_format_regex);
181}
182
183asciichat_error_t acds_string_init(void) {
184 // Fast initialization - only init libsodium
185 // Hashtable building is deferred until actually needed for validation
186 if (sodium_init() < 0) {
187 return SET_ERRNO(ERROR_CRYPTO_INIT, "Failed to initialize libsodium");
188 }
189 return ASCIICHAT_OK;
190}
191
192asciichat_error_t acds_string_generate(char *output, size_t output_size) {
193 if (!output || output_size < SESSION_STRING_BUFFER_SIZE) {
194 return SET_ERRNO(ERROR_INVALID_PARAM, "output buffer must be at least %zu bytes",
195 (size_t)SESSION_STRING_BUFFER_SIZE);
196 }
197
198 // libsodium is guaranteed to be initialized by acds_string_init() before this is called
199 // No need to call sodium_init() again (redundant initialization removed)
200
201 // Pick random adjective
202 uint32_t adj_idx = randombytes_uniform((uint32_t)adjectives_count);
203 const char *adj = adjectives[adj_idx];
204
205 // Pick two random nouns
206 uint32_t noun1_idx = randombytes_uniform((uint32_t)nouns_count);
207 uint32_t noun2_idx = randombytes_uniform((uint32_t)nouns_count);
208 const char *noun1 = nouns[noun1_idx];
209 const char *noun2 = nouns[noun2_idx];
210
211 // Format: adjective-noun-noun
212 int written = safe_snprintf(output, output_size, "%s-%s-%s", adj, noun1, noun2);
213 if (written < 0 || (size_t)written >= output_size) {
214 return SET_ERRNO(ERROR_BUFFER_OVERFLOW, "Session string too long for buffer");
215 }
216
217 log_dev("Generated session string: %s", output);
218 return ASCIICHAT_OK;
219}
220
221bool is_session_string(const char *str) {
222 if (!str || str[0] == '\0') {
223 SET_ERRNO(ERROR_INVALID_PARAM, "Session string is NULL or empty");
224 return false;
225 }
226
227 size_t len = strlen(str);
228
229 // Check length bounds
230 if (len < 5 || len > 47) {
231 SET_ERRNO(ERROR_INVALID_PARAM, "Session string length %zu outside valid range 5-47", len);
232 return false;
233 }
234
235 // Session strings must be ASCII-only (homograph attack prevention)
236 if (!utf8_is_ascii_only(str)) {
237 SET_ERRNO(ERROR_INVALID_PARAM, "Session string contains non-ASCII characters");
238 return false;
239 }
240
241 // Get compiled regex (lazy initialization)
242 pcre2_code *regex = session_format_regex_get();
243 if (!regex) {
244 SET_ERRNO(ERROR_INTERNAL, "Session string PCRE2 regex failed to initialize");
245 return false;
246 }
247
248 // Validate format using PCRE2 regex
249 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(regex, NULL);
250 if (!match_data) {
251 log_error("Failed to allocate match data for session string regex");
252 SET_ERRNO(ERROR_MEMORY, "Failed to allocate match data");
253 return false;
254 }
255
256 int match_result = pcre2_jit_match(regex, (PCRE2_SPTR8)str, len, 0, 0, match_data, NULL);
257
258 pcre2_match_data_free(match_data);
259
260 if (match_result < 0) {
261 // Format validation failed
262 SET_ERRNO(ERROR_INVALID_PARAM, "Session string format does not match pattern");
263 return false;
264 }
265
266 // Extract the three words by finding hyphens
267 // Format is guaranteed to be: word-word-word where each word is 2-12 lowercase letters
268 const char *hyphen1 = strchr(str, '-');
269 if (!hyphen1) {
270 SET_ERRNO(ERROR_INVALID_PARAM, "Session string missing first hyphen");
271 return false;
272 }
273
274 const char *hyphen2 = strchr(hyphen1 + 1, '-');
275 if (!hyphen2) {
276 SET_ERRNO(ERROR_INVALID_PARAM, "Session string missing second hyphen");
277 return false;
278 }
279
280 // Extract three words
281 size_t adj_len = hyphen1 - str;
282 size_t noun1_len = hyphen2 - hyphen1 - 1;
283 size_t noun2_len = len - (hyphen2 - str) - 1;
284
285 if (adj_len >= 32 || noun1_len >= 32 || noun2_len >= 32) {
286 SET_ERRNO(ERROR_INVALID_PARAM, "Session string word length out of bounds");
287 return false;
288 }
289
290 char adj[32], noun1[32], noun2[32];
291 memcpy(adj, str, adj_len);
292 adj[adj_len] = '\0';
293 memcpy(noun1, hyphen1 + 1, noun1_len);
294 noun1[noun1_len] = '\0';
295 memcpy(noun2, hyphen2 + 1, noun2_len);
296 noun2[noun2_len] = '\0';
297
298 // Lazy initialization: build validation caches on first use
299 // Note: build_validation_caches() handles synchronization internally
300 if (!g_cache_initialized) {
301 asciichat_error_t cache_err = build_validation_caches();
302 if (cache_err != ASCIICHAT_OK) {
303 log_warn("Failed to initialize session string cache; accepting format-valid string");
304 log_dev("Valid session string format (cache unavailable): %s", str);
305 return true; // Format is valid, cache is unavailable, accept anyway
306 }
307 }
308
309 // Validate first word is an adjective
310 word_cache_entry_t *adj_entry = NULL;
311 HASH_FIND_STR(g_adjectives_cache, adj, adj_entry);
312 if (!adj_entry) {
313 SET_ERRNO(ERROR_INVALID_PARAM, "Session string first word '%s' is not a valid adjective", adj);
314 return false;
315 }
316
317 // Validate second and third words are nouns
318 word_cache_entry_t *noun_entry1 = NULL;
319 HASH_FIND_STR(g_nouns_cache, noun1, noun_entry1);
320 if (!noun_entry1) {
321 SET_ERRNO(ERROR_INVALID_PARAM, "Session string second word '%s' is not a valid noun", noun1);
322 return false;
323 }
324
325 word_cache_entry_t *noun_entry2 = NULL;
326 HASH_FIND_STR(g_nouns_cache, noun2, noun_entry2);
327 if (!noun_entry2) {
328 SET_ERRNO(ERROR_INVALID_PARAM, "Session string third word '%s' is not a valid noun", noun2);
329 return false;
330 }
331
332 log_dev("Valid session string: %s", str);
333 return true;
334}
const char * adjectives[]
Definition adjectives.c:7
const size_t adjectives_count
Definition adjectives.c:367
asciichat_error_t acds_string_init(void)
bool is_session_string(const char *str)
void acds_strings_destroy(void)
Cleanup function for session string cache Called by asciichat_shared_destroy() during library cleanup...
asciichat_error_t acds_string_generate(char *output, size_t output_size)
const char * nouns[]
Definition nouns.c:7
const size_t nouns_count
Definition nouns.c:724
pcre2_code * asciichat_pcre2_singleton_get_code(pcre2_singleton_t *singleton)
Get the compiled pcre2_code from a singleton handle.
Definition pcre2.c:95
Represents a thread-safe compiled PCRE2 regex singleton.
Definition pcre2.c:21
Hashtable entry for cached word lookups.
UT_hash_handle hh
uthash handle
char * word
The word string (key)
int safe_snprintf(char *buffer, size_t buffer_size, const char *format,...)
Safe formatted string printing to buffer.
Definition system.c:456
bool utf8_is_ascii_only(const char *str)
Definition utf8.c:167