ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
path.c
Go to the documentation of this file.
1
7#include <ascii-chat/util/path.h>
8#include <ascii-chat/common.h>
9#include <ascii-chat/common/error_codes.h>
10#include <ascii-chat/platform/system.h>
11#include <ascii-chat/platform/filesystem.h>
12#include <ascii-chat/platform/util.h>
13#include <ascii-chat/util/pcre2.h>
14#include <ascii-chat/paths.h>
15#include <string.h>
16#include <stdbool.h>
17#include <ctype.h>
18#include <stdlib.h>
19#include <stdio.h>
20#include <sys/stat.h>
21#include <pcre2.h>
22
30static const char *PATH_SEPARATOR_PATTERN = "[/\\\\]+";
31static const char *PATH_DOT_COMPONENT_PATTERN = "^\\.\\.*$";
32
33static pcre2_singleton_t *g_path_separator_regex = NULL;
34static pcre2_singleton_t *g_path_dot_component_regex = NULL;
35
40static pcre2_code *path_separator_regex_get(void) {
41 if (g_path_separator_regex == NULL) {
42 g_path_separator_regex = asciichat_pcre2_singleton_compile(PATH_SEPARATOR_PATTERN, 0);
43 }
44 return asciichat_pcre2_singleton_get_code(g_path_separator_regex);
45}
46
51static pcre2_code *path_dot_component_regex_get(void) {
52 if (g_path_dot_component_regex == NULL) {
53 g_path_dot_component_regex = asciichat_pcre2_singleton_compile(PATH_DOT_COMPONENT_PATTERN, 0);
54 }
55 return asciichat_pcre2_singleton_get_code(g_path_dot_component_regex);
56}
57
58/* Normalize a path by resolving .. and . components
59 * Handles both Windows (\‍) and Unix (/) separators
60 * Returns a pointer to a static buffer (not thread-safe, but sufficient for __FILE__ normalization)
61 */
62static const char *normalize_path(const char *path) {
63 if (!path) {
64 SET_ERRNO(ERROR_INVALID_PARAM, "path is null");
65 return "unknown";
66 }
67
68 static char normalized[PLATFORM_MAX_PATH_LENGTH];
69 static char components[PLATFORM_MAX_PATH_LENGTH][256];
70 int component_count = 0;
71 size_t path_len = strlen(path);
72
73 if (path_len >= PLATFORM_MAX_PATH_LENGTH) {
74 return path; /* Can't normalize, return as-is */
75 }
76
77 const char *pos = path;
78 bool absolute = path_is_absolute(path);
79
80 /* Skip past the absolute path prefix if present */
81 if (absolute) {
82#ifdef _WIN32
83 if (path_len >= 3 && isalpha((unsigned char)path[0]) && path[1] == ':' && path[2] == PATH_DELIM) {
84 pos += 3; /* Skip the drive letter and colon and separator (e.g., "C:\") */
85 }
86#else
87 if (path_len >= 1 && path[0] == PATH_DELIM) {
88 pos += 1; /* Skip the root separator */
89 }
90#endif
91 }
92
93 /* Parse path into components using PCRE2 regex for separator/dot detection */
94 pcre2_code *separator_regex = path_separator_regex_get();
95 pcre2_code *dot_regex = path_dot_component_regex_get();
96
97 /* If regex not available, fall back to manual parsing */
98 if (!separator_regex || !dot_regex) {
99 const char *parse_pos = pos;
100 while (*parse_pos) {
101 while (*parse_pos == '/' || *parse_pos == '\\') {
102 parse_pos++;
103 }
104 if (!*parse_pos)
105 break;
106 const char *component_start = parse_pos;
107 while (*parse_pos && *parse_pos != '/' && *parse_pos != '\\') {
108 parse_pos++;
109 }
110 size_t component_len = (size_t)(parse_pos - component_start);
111 if (component_len == 0)
112 continue;
113 if (component_len >= sizeof(components[0])) {
114 component_len = sizeof(components[0]) - 1;
115 }
116 if (component_len == 1 && component_start[0] == PATH_COMPONENT_DOT) {
117 continue;
118 }
119 if (component_len == 2 && component_start[0] == PATH_COMPONENT_DOT && component_start[1] == PATH_COMPONENT_DOT) {
120 if (component_count > 0) {
121 component_count--;
122 continue;
123 }
124 if (!absolute) {
125 memcpy(components[component_count], component_start, component_len);
126 components[component_count][component_len] = '\0';
127 component_count++;
128 }
129 continue;
130 }
131 memcpy(components[component_count], component_start, component_len);
132 components[component_count][component_len] = '\0';
133 component_count++;
134 }
135 } else {
136 /* Use PCRE2 to split by separators and detect dot components */
137 pcre2_match_data *sep_match = pcre2_match_data_create_from_pattern(separator_regex, NULL);
138 pcre2_match_data *dot_match = pcre2_match_data_create_from_pattern(dot_regex, NULL);
139
140 if (!sep_match || !dot_match) {
141 if (sep_match)
142 pcre2_match_data_free(sep_match);
143 if (dot_match)
144 pcre2_match_data_free(dot_match);
145
146 /* Fall back to manual parsing */
147 const char *parse_pos = pos;
148 while (*parse_pos) {
149 while (*parse_pos == '/' || *parse_pos == '\\') {
150 parse_pos++;
151 }
152 if (!*parse_pos)
153 break;
154 const char *component_start = parse_pos;
155 while (*parse_pos && *parse_pos != '/' && *parse_pos != '\\') {
156 parse_pos++;
157 }
158 size_t component_len = (size_t)(parse_pos - component_start);
159 if (component_len == 0)
160 continue;
161 if (component_len >= sizeof(components[0])) {
162 component_len = sizeof(components[0]) - 1;
163 }
164 if (component_len == 1 && component_start[0] == PATH_COMPONENT_DOT) {
165 continue;
166 }
167 if (component_len == 2 && component_start[0] == PATH_COMPONENT_DOT &&
168 component_start[1] == PATH_COMPONENT_DOT) {
169 if (component_count > 0) {
170 component_count--;
171 continue;
172 }
173 if (!absolute) {
174 memcpy(components[component_count], component_start, component_len);
175 components[component_count][component_len] = '\0';
176 component_count++;
177 }
178 continue;
179 }
180 memcpy(components[component_count], component_start, component_len);
181 components[component_count][component_len] = '\0';
182 component_count++;
183 }
184 goto build_normalized; /* Skip to path building */
185 }
186
187 size_t offset = 0;
188 size_t remaining_len = strlen(pos);
189 size_t last_component_end = 0;
190
191 while (offset <= remaining_len) {
192 /* Find next separator */
193 int rc = pcre2_jit_match(separator_regex, (PCRE2_SPTR8)pos, remaining_len, offset, 0, sep_match, NULL);
194 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(sep_match);
195 PCRE2_SIZE sep_start, sep_end;
196
197 if (rc >= 0) {
198 sep_start = ovector[0];
199 sep_end = ovector[1];
200 } else {
201 /* No more separators - handle last component if any */
202 sep_start = remaining_len;
203 sep_end = remaining_len;
204 }
205
206 /* Extract component between last_component_end and sep_start */
207 size_t component_len = sep_start - last_component_end;
208 if (component_len > 0) {
209 if (component_len >= sizeof(components[0])) {
210 component_len = sizeof(components[0]) - 1;
211 }
212
213 char temp_component[256];
214 memcpy(temp_component, pos + last_component_end, component_len);
215 temp_component[component_len] = '\0';
216
217 /* Check if component matches dot pattern (^\.\.*$) */
218 int is_dot = pcre2_jit_match(dot_regex, (PCRE2_SPTR8)temp_component, component_len, 0, 0, dot_match, NULL);
219
220 if (is_dot >= 0) {
221 /* It's ".", "..", or "..." */
222 if (component_len == 1) {
223 /* Skip "." */
224 } else if (component_len == 2) {
225 /* Handle ".." */
226 if (component_count > 0) {
227 component_count--;
228 } else if (!absolute) {
229 memcpy(components[component_count], temp_component, component_len);
230 components[component_count][component_len] = '\0';
231 component_count++;
232 }
233 } else {
234 /* "..." or more - treat as normal component */
235 memcpy(components[component_count], temp_component, component_len);
236 components[component_count][component_len] = '\0';
237 component_count++;
238 }
239 } else {
240 /* Normal component */
241 memcpy(components[component_count], temp_component, component_len);
242 components[component_count][component_len] = '\0';
243 component_count++;
244 }
245 }
246
247 if (rc < 0) {
248 break; /* No more separators */
249 }
250
251 offset = sep_end;
252 last_component_end = sep_end;
253 }
254
255 pcre2_match_data_free(sep_match);
256 pcre2_match_data_free(dot_match);
257 }
258
259build_normalized:;
260 /* Build normalized path */
261 size_t out_pos = 0;
262#ifdef _WIN32
263 if (absolute && path_len >= 3 && isalpha((unsigned char)path[0]) && path[1] == ':') {
264 normalized[out_pos++] = path[0];
265 normalized[out_pos++] = ':';
266 normalized[out_pos++] = PATH_DELIM;
267 }
268#else
269 if (absolute) {
270 normalized[out_pos++] = PATH_DELIM;
271 }
272#endif
273
274 for (int i = 0; i < component_count; i++) {
275 if (i > 0) {
276 normalized[out_pos++] = PATH_DELIM;
277 }
278 size_t comp_len = strlen(components[i]);
279 if (out_pos + comp_len >= PLATFORM_MAX_PATH_LENGTH) {
280 break;
281 }
282 memcpy(normalized + out_pos, components[i], comp_len);
283 out_pos += comp_len;
284 }
285
286 normalized[out_pos] = '\0';
287 return normalized;
288}
289
305static char *find_project_root(void) {
306 // Cache the result so we don't repeatedly call stat() which can hang during shutdown
307 static char cached_root[PLATFORM_MAX_PATH_LENGTH] = {0};
308 static bool search_done = false;
309
310 if (search_done) {
311 if (cached_root[0] != '\0') {
312 // Return a new allocation using malloc (not SAFE_MALLOC) to avoid debug_malloc
313 // recursion when called during memory report while holding g_mem.mutex
314 char *copy = malloc(strlen(cached_root) + 1);
315 if (copy) {
316 strcpy(copy, cached_root);
317 }
318 return copy;
319 }
320 return NULL; // Not found
321 }
322
323 char cwd_buf[PLATFORM_MAX_PATH_LENGTH];
324 if (!platform_get_cwd(cwd_buf, sizeof(cwd_buf))) {
325 search_done = true;
326 return NULL;
327 }
328
329 char search_path[PLATFORM_MAX_PATH_LENGTH];
330 SAFE_STRNCPY(search_path, cwd_buf, sizeof(search_path));
331
332 // Search up to 128 levels deep for repository root
333 for (int i = 0; i < 128; i++) {
334 // Check for .git directory
335 char git_check[PLATFORM_MAX_PATH_LENGTH];
336 safe_snprintf(git_check, sizeof(git_check), "%s%s.git", search_path, PATH_SEPARATOR_STR);
337 struct stat git_stat;
338 bool git_found = stat(git_check, &git_stat) == 0 && S_ISDIR(git_stat.st_mode);
339 if (git_found) {
340 // Found .git directory - this is the repo root
341 safe_snprintf(cached_root, sizeof(cached_root), "%s", search_path);
342 search_done = true;
343 // Return a new allocation using malloc (not SAFE_MALLOC) to avoid debug_malloc
344 // recursion when called during memory report while holding g_mem.mutex
345 char *result = malloc(strlen(search_path) + 1);
346 if (result) {
347 strcpy(result, search_path);
348 }
349 return result;
350 }
351
352 // We might be built from a tarball and not a git repo. But I still want this to work.
353 // So check for LICENSE.txt and CMakeLists.txt (ascii-chat's license is in the repo root with the main
354 // CMakeLists.txt).
355 char license_check[PLATFORM_MAX_PATH_LENGTH];
356 safe_snprintf(license_check, sizeof(license_check), "%s%sLICENSE.txt", search_path, PATH_SEPARATOR_STR);
357 char cmake_check[PLATFORM_MAX_PATH_LENGTH];
358 safe_snprintf(cmake_check, sizeof(cmake_check), "%s%sCMakeLists.txt", search_path, PATH_SEPARATOR_STR);
359 struct stat license_stat;
360 bool license_found = stat(license_check, &license_stat) == 0 && S_ISREG(license_stat.st_mode);
361 struct stat cmake_stat;
362 bool cmake_found = stat(cmake_check, &cmake_stat) == 0 && S_ISREG(cmake_stat.st_mode);
363 if (license_found && cmake_found) {
364 // Found LICENSE.txt
365 safe_snprintf(cached_root, sizeof(cached_root), "%s", search_path);
366 search_done = true;
367 // Return a new allocation using malloc (not SAFE_MALLOC) to avoid debug_malloc
368 // recursion when called during memory report while holding g_mem.mutex
369 char *result = malloc(strlen(search_path) + 1);
370 if (result) {
371 strcpy(result, search_path);
372 }
373 return result;
374 }
375
376 // Go up one directory
377 size_t len = strlen(search_path);
378 if (len == 0 || (len == 1 && search_path[0] == '/')) {
379 // Hit root directory without finding .git or CMakeLists.txt
380 break;
381 }
382
383 // Remove trailing separator if present
384 if (search_path[len - 1] == '/' || search_path[len - 1] == '\\') {
385 search_path[len - 1] = '\0';
386 len--;
387 }
388
389 // Find last separator
390 int last_sep = -1;
391 for (int j = (int)len - 1; j >= 0; j--) {
392 if (search_path[j] == '/' || search_path[j] == '\\') {
393 last_sep = j;
394 break;
395 }
396 }
397
398 if (last_sep <= 0) {
399 // Can't go up further
400 break;
401 }
402
403 search_path[last_sep] = '\0';
404 }
405
406 search_done = true;
407 return NULL;
408}
409
410const char *extract_project_relative_path(const char *file) {
411 if (!file) {
412 SET_ERRNO(ERROR_INVALID_PARAM, "file is null");
413 return "unknown";
414 }
415
416#ifdef __EMSCRIPTEN__
417 // WASM builds: Return raw path to avoid recursion in logging system
418 // extract_project_relative_path is called from format_log_header, which is part of logging.
419 // If we call normalize_path or other functions that might use SET_ERRNO or log functions,
420 // we create infinite recursion: log_msg -> format_log_header -> extract_project_relative_path -> SET_ERRNO ->
421 // log_error -> log_msg For WASM, just return the filename without path processing.
422 const char *sep = strrchr(file, '/');
423 if (!sep) {
424 sep = strrchr(file, '\\');
425 }
426 return sep ? sep + 1 : file;
427#endif
428
429 /* First normalize the path to resolve .. and . components */
430 const char *normalized = normalize_path(file);
431
432 /* Try to find and strip the project root from the absolute path */
433 char *project_root = find_project_root();
434 if (project_root) {
435 size_t root_len = strlen(project_root);
436 size_t norm_len = strlen(normalized);
437
438 /* Check if normalized path starts with project root */
439 if (norm_len > root_len && strncmp(normalized, project_root, root_len) == 0) {
440 const char *remainder = normalized + root_len;
441
442 /* Skip the separator if present */
443 if (*remainder == PATH_DELIM || *remainder == '/' || *remainder == '\\') {
444 remainder++;
445 }
446
447 free(project_root);
448 return remainder;
449 }
450 free(project_root);
451 }
452
453 /* Fallback: Extract relative path by looking for common separators */
454 const char *last_sep = strrchr(normalized, PATH_DELIM);
455 if (!last_sep) {
456 last_sep = strrchr(normalized, '/');
457 }
458 if (!last_sep) {
459 last_sep = strrchr(normalized, '\\');
460 }
461
462 /* If we found a separator, return the part after it */
463 if (last_sep) {
464 return last_sep + 1;
465 }
466
467 /* Last resort: return just the filename (don't return absolute path) */
468 return normalized;
469}
470
471char *expand_path(const char *path) {
472 if (path[0] == PATH_TILDE) {
473 const char *home = platform_get_home_dir();
474 if (!home) {
475 return NULL;
476 }
477
478 char *expanded;
479 size_t total_len = strlen(home) + strlen(path) + 1; // path includes the tilde
480 expanded = SAFE_MALLOC(total_len, char *);
481 if (!expanded) {
482 return NULL;
483 }
484 safe_snprintf(expanded, total_len, "%s%s", home, path + 1);
485
487
488 return expanded;
489 }
490 return platform_strdup(path);
491}
492
493char *get_config_dir(void) {
494 /* Delegate to platform abstraction layer */
496}
497
498char *get_data_dir(void) {
499 /* Delegate to platform abstraction layer */
500 return platform_get_data_dir();
501}
502
503char *get_log_dir(void) {
504#ifdef __EMSCRIPTEN__
505 // WASM builds: Return NULL to skip SAFE_MALLOC before memory tracking is initialized
506 // The caller will use a fallback path (temp dir + filename)
507 return NULL;
508#endif
509
510#ifdef NDEBUG
511 // Release builds: Use $TMPDIR/ascii-chat/
512 // Get system temp directory
513 char temp_dir[PLATFORM_MAX_PATH_LENGTH];
514 if (!platform_get_temp_dir(temp_dir, sizeof(temp_dir))) {
515 // Fallback: Use current working directory if temp dir unavailable
516 char cwd_buf[PLATFORM_MAX_PATH_LENGTH];
517 if (!platform_get_cwd(cwd_buf, sizeof(cwd_buf))) {
518 return NULL;
519 }
520 char *result = SAFE_MALLOC(strlen(cwd_buf) + 1, char *);
521 if (!result) {
522 return NULL;
523 }
524 safe_snprintf(result, strlen(cwd_buf) + 1, "%s", cwd_buf);
525 return result;
526 }
527
528 // Build path to ascii-chat subdirectory
529 size_t log_dir_len = strlen(temp_dir) + strlen(PATH_SEPARATOR_STR) + strlen("ascii-chat") + 1;
530 char *log_dir = SAFE_MALLOC(log_dir_len, char *);
531 if (!log_dir) {
532 return NULL;
533 }
534 safe_snprintf(log_dir, log_dir_len, "%s%sascii-chat", temp_dir, PATH_SEPARATOR_STR);
535
536 // Create the directory if it doesn't exist (with owner-only permissions)
537 asciichat_error_t mkdir_result = platform_mkdir(log_dir, DIR_PERM_PRIVATE);
538 if (mkdir_result != ASCIICHAT_OK) {
539 // Directory creation failed - fall back to temp_dir without subdirectory
540 SAFE_FREE(log_dir);
541 char *result = SAFE_MALLOC(strlen(temp_dir) + 1, char *);
542 if (!result) {
543 return NULL;
544 }
545 safe_snprintf(result, strlen(temp_dir) + 1, "%s", temp_dir);
546 return result;
547 }
548
549 // Verify the directory is writable
550 if (platform_access(log_dir, PLATFORM_ACCESS_WRITE) != 0) {
551 // Directory not writable - fall back to temp_dir
552 SAFE_FREE(log_dir);
553 char *result = SAFE_MALLOC(strlen(temp_dir) + 1, char *);
554 if (!result) {
555 return NULL;
556 }
557 safe_snprintf(result, strlen(temp_dir) + 1, "%s", temp_dir);
558 return result;
559 }
560
561 return log_dir;
562#else
563 // Debug builds: Use repository root for logs
564 char *repo_root = find_project_root();
565 if (repo_root) {
566 return repo_root;
567 }
568
569 // Fallback to current working directory if repo root not found
570 char cwd_buf[PLATFORM_MAX_PATH_LENGTH];
571 if (!platform_get_cwd(cwd_buf, sizeof(cwd_buf))) {
572 return NULL;
573 }
574
575 char *result = SAFE_MALLOC(strlen(cwd_buf) + 1, char *);
576 safe_snprintf(result, strlen(cwd_buf) + 1, "%s", cwd_buf);
577 return result;
578#endif
579}
580
582#ifdef _WIN32
583 // Windows: Try %PROGRAMDATA%\ascii-chat\ first, then fall back to user directories
584 const char *program_data = platform_getenv("PROGRAMDATA");
585 if (program_data && program_data[0] != '\0') {
586 size_t len = strlen(program_data) + strlen("\\ascii-chat\\") + 1;
587 char *system_dir = SAFE_MALLOC(len, char *);
588 if (system_dir) {
589 safe_snprintf(system_dir, len, "%s\\ascii-chat\\", program_data);
590
591 // Try to create directory recursively with public permissions (755 equivalent)
592 asciichat_error_t mkdir_result = platform_mkdir_recursive(system_dir, 0755);
593 if (mkdir_result == ASCIICHAT_OK) {
594 // Directory exists or was created - check if writable
595 if (platform_access(system_dir, PLATFORM_ACCESS_WRITE) == 0) {
596 return system_dir; // System-wide location is writable
597 }
598 }
599 SAFE_FREE(system_dir);
600 }
601 }
602
603 // Fall back to user data directory
604 char *data_dir = get_data_dir();
605 if (data_dir) {
606 // Try to create the directory recursively
607 asciichat_error_t mkdir_result = platform_mkdir_recursive(data_dir, DIR_PERM_PRIVATE);
608 if (mkdir_result == ASCIICHAT_OK) {
609 if (platform_access(data_dir, PLATFORM_ACCESS_WRITE) == 0) {
610 return data_dir;
611 }
612 }
613 SAFE_FREE(data_dir);
614 }
615
616 return NULL;
617#else
618 // Unix: Try ${INSTALL_PREFIX}/var/ascii-chat/ first (system-wide, Homebrew-aware)
619 // This uses the baked-in install prefix from paths.h (e.g., /opt/homebrew or /usr/local)
620 const char *prefix = ASCIICHAT_INSTALL_PREFIX;
621 size_t system_len = strlen(prefix) + strlen("/var/ascii-chat/") + 1;
622 char *system_dir = SAFE_MALLOC(system_len, char *);
623 if (system_dir) {
624 safe_snprintf(system_dir, system_len, "%s/var/ascii-chat/", prefix);
625
626 // Try to create directory recursively with public permissions (755) for system-wide use
627 // platform_mkdir_recursive creates parent directories as needed
628 asciichat_error_t mkdir_result = platform_mkdir_recursive(system_dir, 0755);
629 if (mkdir_result == ASCIICHAT_OK) {
630 // Directory exists or was created - check if writable
631 if (platform_access(system_dir, PLATFORM_ACCESS_WRITE) == 0) {
632 return system_dir; // System-wide location is writable
633 }
634 }
635 SAFE_FREE(system_dir);
636 }
637
638#ifdef __APPLE__
639 // On macOS, /usr/local is typically user-writable (set up by Homebrew)
640 // Try it if the install prefix is different (e.g., /opt/homebrew on Apple Silicon)
641 if (strcmp(prefix, "/usr/local") != 0) {
642 const char *usr_local_path = "/usr/local/var/ascii-chat/";
643 size_t usr_local_len = strlen(usr_local_path) + 1;
644 char *usr_local_dir = SAFE_MALLOC(usr_local_len, char *);
645 if (usr_local_dir) {
646 safe_snprintf(usr_local_dir, usr_local_len, "%s", usr_local_path);
647
648 asciichat_error_t mkdir_result = platform_mkdir_recursive(usr_local_dir, 0755);
649 if (mkdir_result == ASCIICHAT_OK) {
650 if (platform_access(usr_local_dir, PLATFORM_ACCESS_WRITE) == 0) {
651 return usr_local_dir; // /usr/local is writable
652 }
653 }
654 SAFE_FREE(usr_local_dir);
655 }
656 }
657#endif
658
659 // Fall back to user data directory (XDG_DATA_HOME or ~/.local/share/ascii-chat/)
660 char *data_dir = get_data_dir();
661 if (data_dir) {
662 // Try to create the directory recursively
663 asciichat_error_t mkdir_result = platform_mkdir_recursive(data_dir, DIR_PERM_PRIVATE);
664 if (mkdir_result == ASCIICHAT_OK) {
665 if (platform_access(data_dir, PLATFORM_ACCESS_WRITE) == 0) {
666 return data_dir;
667 }
668 }
669 SAFE_FREE(data_dir);
670 }
671
672 return NULL;
673#endif
674}
675
676bool path_normalize_copy(const char *path, char *out, size_t out_len) {
677 if (!path || !out || out_len == 0) {
678 SET_ERRNO(ERROR_INVALID_PARAM, "null path or out or out_len is 0");
679 return false;
680 }
681
682 const char *normalized = normalize_path(path);
683 if (!normalized) {
684 return false;
685 }
686
687 size_t len = strlen(normalized);
688 if (len + 1 > out_len) {
689 return false;
690 }
691
692 memcpy(out, normalized, len + 1);
693 return true;
694}
695
696bool path_is_absolute(const char *path) {
697 if (!path || !*path) {
698 return false;
699 }
700
701#ifdef _WIN32
702 if ((path[0] == '\\' && path[1] == '\\')) {
703 return true; // UNC path
704 }
705 if (isalpha((unsigned char)path[0]) && path[1] == PATH_DRIVE_SEPARATOR && path[2] == PATH_DELIM) {
706 return true;
707 }
708 return false;
709#else
710 return path[0] == PATH_DELIM;
711#endif
712}
713
714bool path_is_within_base(const char *path, const char *base) {
715 if (!path || !base) {
716 return false;
717 }
718
719 if (!path_is_absolute(path) || !path_is_absolute(base)) {
720 return false;
721 }
722
723 char normalized_path[PLATFORM_MAX_PATH_LENGTH];
724 char normalized_base[PLATFORM_MAX_PATH_LENGTH];
725
726 if (!path_normalize_copy(path, normalized_path, sizeof(normalized_path))) {
727 return false;
728 }
729 if (!path_normalize_copy(base, normalized_base, sizeof(normalized_base))) {
730 return false;
731 }
732
733 size_t base_len = strlen(normalized_base);
734 if (base_len == 0) {
735 return false;
736 }
737
738 if (platform_path_strcasecmp(normalized_path, normalized_base, base_len) != 0) {
739 return false;
740 }
741 char next = normalized_path[base_len];
742 if (next == '\0') {
743 return true;
744 }
745 return next == PATH_DELIM;
746}
747
748bool path_is_within_any_base(const char *path, const char *const *bases, size_t base_count) {
749 if (!path || !bases || base_count == 0) {
750 return false;
751 }
752
753 for (size_t i = 0; i < base_count; ++i) {
754 const char *base = bases[i];
755 if (!base) {
756 continue;
757 }
758 if (path_is_within_base(path, base)) {
759 return true;
760 }
761 }
762
763 return false;
764}
765
766bool path_looks_like_path(const char *value) {
767 if (!value || *value == '\0') {
768 return false;
769 }
770
771 if (value[0] == PATH_DELIM || value[0] == PATH_COMPONENT_DOT || value[0] == PATH_TILDE) {
772 return true;
773 }
774
775 if (strchr(value, PATH_DELIM)) {
776 return true;
777 }
778
779#ifdef _WIN32
780 if (isalpha((unsigned char)value[0]) && value[1] == ':' && value[2] == PATH_DELIM) {
781 return true;
782 }
783#endif
784
785 return false;
786}
787
788static asciichat_error_t map_role_to_error(path_role_t role) {
789 switch (role) {
790 case PATH_ROLE_CONFIG_FILE:
791 return ERROR_CONFIG;
792 case PATH_ROLE_LOG_FILE:
793 return ERROR_LOGGING_INIT;
794 case PATH_ROLE_KEY_PRIVATE:
795 case PATH_ROLE_KEY_PUBLIC:
796 case PATH_ROLE_CLIENT_KEYS:
797 return ERROR_CRYPTO_KEY;
798 }
799 return ERROR_GENERAL;
800}
801
802static void append_base_if_valid(const char *candidate, const char **bases, size_t *count) {
803 if (!candidate || *candidate == '\0' || *count >= MAX_PATH_BASES) {
804 return;
805 }
806 if (!path_is_absolute(candidate)) {
807 return;
808 }
809 bases[*count] = candidate;
810 (*count)++;
811}
812
813static void build_ascii_chat_path(const char *base, const char *suffix, char *out, size_t out_len) {
814 if (!base || !suffix || out_len == 0) {
815 out[0] = '\0';
816 return;
817 }
818
819 size_t base_len = strlen(base);
820 bool needs_sep = base_len > 0 && base[base_len - 1] != PATH_DELIM;
821
822 safe_snprintf(out, out_len, "%s%s%s", base, needs_sep ? PATH_SEPARATOR_STR : "", suffix);
823}
824
832static bool is_sensitive_system_path(const char *path) {
833 if (!path) {
834 return false;
835 }
836
837#ifdef _WIN32
838 // Windows system directories
839 const char *sensitive_paths[] = {"C:\\Windows", // System directory
840 "C:\\Program Files", // Program files
841 "C:\\Program Files (x86)", // 32-bit programs
842 "C:\\ProgramData", // All users data
843 "C:\\System Volume Information", // System recovery
844 "C:\\PerfLogs", // Performance logs
845 NULL};
846#else
847 // Unix/Linux/macOS system directories
848 const char *sensitive_paths[] = {"/etc", // System configuration
849 "/bin", // Essential binaries
850 "/sbin", // System binaries
851 "/usr/bin", // User binaries
852 "/usr/sbin", // User system binaries
853 "/usr/lib", // System libraries
854 "/lib", // Libraries
855 "/lib64", // 64-bit libraries
856 "/boot", // Boot files
857 "/sys", // System interface
858 "/proc", // Process interface
859 "/dev", // Devices
860 "/root", // Root home (should not write to)
861 "/var/lib", // Variable library data
862 "/var/cache", // Cache data
863 "/var/spool", // Spool data
864 NULL};
865
866#ifdef __APPLE__
867 // macOS-specific system paths
868 const char *macos_paths[] = {"/System", // Core system
869 "/Library", // System library
870 "/Applications", // Bundled apps
871 "/Developer", // Developer tools
872 "/Volumes", // Mounted volumes
873 NULL};
874#endif
875#endif
876
877 // Check each sensitive path
878 for (int i = 0; sensitive_paths[i] != NULL; i++) {
879 const char *base = sensitive_paths[i];
880 size_t base_len = strlen(base);
881
882 // Match if path equals base or starts with base + separator
883 if (strcmp(path, base) == 0) {
884 return true; // Exact match is sensitive
885 }
886 if (strncmp(path, base, base_len) == 0) {
887 // Make sure it's followed by a path separator, not a partial match
888 if (path[base_len] == PATH_DELIM || path[base_len] == '/' || path[base_len] == '\\') {
889 return true;
890 }
891 }
892 }
893
894#ifdef __APPLE__
895 // Check macOS paths
896 for (int i = 0; macos_paths[i] != NULL; i++) {
897 const char *base = macos_paths[i];
898 size_t base_len = strlen(base);
899
900 if (strcmp(path, base) == 0) {
901 return true;
902 }
903 if (strncmp(path, base, base_len) == 0) {
904 if (path[base_len] == PATH_DELIM || path[base_len] == '/' || path[base_len] == '\\') {
905 return true;
906 }
907 }
908 }
909#endif
910
911 return false;
912}
913
921static bool is_file_empty(const char *path) {
922 if (!path) {
923 SET_ERRNO(ERROR_INVALID_PARAM, "is_file_empty: invalid parameter");
924 return false;
925 }
926
927 if (!platform_is_regular_file(path)) {
928 return false;
929 }
930
931 FILE *f = platform_fopen(path, "r");
932 if (!f) {
933 return false;
934 }
935
936 int c = fgetc(f);
937 fclose(f);
938 return c == EOF;
939}
940
941static bool is_existing_ascii_chat_log(const char *path) {
942 if (!path) {
943 return false;
944 }
945
946 // Check if path is a regular file (not a directory) using platform abstraction
947 if (!platform_is_regular_file(path)) {
948 return false; // Not a regular file (could be directory, doesn't exist, symlink, etc.)
949 }
950
951 // Try to open and read the first line
952 FILE *f = platform_fopen(path, "r");
953 if (!f) {
954 return false; // Can't read file
955 }
956
957 char buffer[256];
958 bool is_ascii_chat_log = false;
959
960 // Read first line and check for ascii-chat log signature
961 if (fgets(buffer, sizeof(buffer), f) != NULL) {
962 // ascii-chat logs start with timestamps like: [HH:MM:SS.microseconds] [LEVEL]
963 // Pattern: [digit][digit]:[digit][digit]:[digit][digit].[digits]
964 if (buffer[0] == '[' && isdigit((unsigned char)buffer[1]) && isdigit((unsigned char)buffer[2]) &&
965 buffer[3] == ':') {
966 is_ascii_chat_log = true;
967 }
968 }
969
970 fclose(f);
971 return is_ascii_chat_log;
972}
973
974asciichat_error_t path_validate_user_path(const char *input, path_role_t role, char **normalized_out) {
975 if (!normalized_out) {
976 return SET_ERRNO(map_role_to_error(role), "path_validate_user_path requires output pointer");
977 }
978 *normalized_out = NULL;
979
980 if (!input || *input == '\0') {
981 return SET_ERRNO(map_role_to_error(role), "Path is empty for role %d", role);
982 }
983
984 // SECURITY: For log files, if input is a simple filename (no separators or ..), constrain it to a safe directory
985 if (role == PATH_ROLE_LOG_FILE) {
986 // Check if input contains path separators or parent directory references
987 bool is_simple_filename = true;
988 for (const char *p = input; *p; p++) {
989 if (*p == PATH_DELIM || *p == '/' || *p == '\\') {
990 is_simple_filename = false;
991 break;
992 }
993 }
994 // Also reject ".." components (even without separators like "..something")
995 if (strstr(input, "..") != NULL) {
996 is_simple_filename = false;
997 }
998
999 // If it's a simple filename, resolve it to a safe base directory
1000 if (is_simple_filename) {
1001 // Always prefer current working directory for simple log filenames
1002 // This ensures logs go to where the user is running the command from
1003 char safe_base[PLATFORM_MAX_PATH_LENGTH];
1004
1005 if (!platform_get_cwd(safe_base, sizeof(safe_base))) {
1006 // If cwd fails, try config dir as fallback
1007 char *config_dir = get_config_dir();
1008 if (config_dir) {
1009 SAFE_STRNCPY(safe_base, config_dir, sizeof(safe_base));
1010 SAFE_FREE(config_dir);
1011 } else {
1012 return SET_ERRNO(ERROR_LOGGING_INIT, "Failed to determine safe directory for log file");
1013 }
1014 }
1015
1016 // Build the full path: safe_base + separator + input
1017 size_t base_len = strlen(safe_base);
1018 size_t input_len = strlen(input);
1019 bool needs_sep = base_len > 0 && safe_base[base_len - 1] != PATH_DELIM;
1020 size_t total_len = base_len + (needs_sep ? 1 : 0) + input_len + 1;
1021
1022 if (total_len > PLATFORM_MAX_PATH_LENGTH) {
1023 return SET_ERRNO(ERROR_LOGGING_INIT, "Log file path too long: %s/%s", safe_base, input);
1024 }
1025
1026 char resolved_buf[PLATFORM_MAX_PATH_LENGTH];
1027 safe_snprintf(resolved_buf, sizeof(resolved_buf), "%s%s%s", safe_base, needs_sep ? PATH_SEPARATOR_STR : "",
1028 input);
1029
1030 // Normalize the resolved path
1031 char normalized_buf[PLATFORM_MAX_PATH_LENGTH];
1032 if (!path_normalize_copy(resolved_buf, normalized_buf, sizeof(normalized_buf))) {
1033 return SET_ERRNO(ERROR_LOGGING_INIT, "Failed to normalize log file path: %s", resolved_buf);
1034 }
1035
1036 // Allocate and return the result
1037 char *result = SAFE_MALLOC(strlen(normalized_buf) + 1, char *);
1038 if (!result) {
1039 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate normalized path");
1040 }
1041 safe_snprintf(result, strlen(normalized_buf) + 1, "%s", normalized_buf);
1042 *normalized_out = result;
1043 return ASCIICHAT_OK;
1044 }
1045 // If not a simple filename (contains separators), continue with normal validation below
1046 }
1047
1048 // For non-log-files or log files with path separators, validate as usual
1049 if (role != PATH_ROLE_LOG_FILE && !path_looks_like_path(input)) {
1050 return SET_ERRNO(map_role_to_error(role), "Value does not look like a filesystem path: %s", input);
1051 }
1052
1053 char *expanded = expand_path(input);
1054 if (!expanded) {
1055 return SET_ERRNO(map_role_to_error(role), "Failed to expand path: %s", input);
1056 }
1057
1058 char candidate_buf[PLATFORM_MAX_PATH_LENGTH];
1059 const char *candidate_path = expanded;
1060
1061 if (!path_is_absolute(candidate_path)) {
1062 char cwd_buf[PLATFORM_MAX_PATH_LENGTH];
1063 if (!platform_get_cwd(cwd_buf, sizeof(cwd_buf))) {
1064 SAFE_FREE(expanded);
1065 return SET_ERRNO(map_role_to_error(role), "Failed to determine current working directory");
1066 }
1067
1068 size_t total_len = strlen(cwd_buf) + 1 + strlen(candidate_path) + 1;
1069 if (total_len >= sizeof(candidate_buf)) {
1070 SAFE_FREE(expanded);
1071 return SET_ERRNO(map_role_to_error(role), "Resolved path is too long: %s/%s", cwd_buf, candidate_path);
1072 }
1073 if (strlen(candidate_path) > 0 && candidate_path[0] == PATH_DELIM) {
1074 safe_snprintf(candidate_buf, sizeof(candidate_buf), "%s%s", cwd_buf, candidate_path);
1075 } else {
1076 safe_snprintf(candidate_buf, sizeof(candidate_buf), "%s%c%s", cwd_buf, PATH_DELIM, candidate_path);
1077 }
1078 candidate_path = candidate_buf;
1079 }
1080
1081 char normalized_buf[PLATFORM_MAX_PATH_LENGTH];
1082 if (!path_normalize_copy(candidate_path, normalized_buf, sizeof(normalized_buf))) {
1083 SAFE_FREE(expanded);
1084 return SET_ERRNO(map_role_to_error(role), "Failed to normalize path: %s", candidate_path);
1085 }
1086
1087 if (!path_is_absolute(normalized_buf)) {
1088 SAFE_FREE(expanded);
1089 return SET_ERRNO(map_role_to_error(role), "Normalized path is not absolute: %s", normalized_buf);
1090 }
1091
1092 const char *bases[MAX_PATH_BASES] = {0};
1093 size_t base_count = 0;
1094
1095 // Always add current working directory as an allowed base
1096 // This is critical for log files and other paths relative to where the user runs the command
1097 char cwd_base[PLATFORM_MAX_PATH_LENGTH];
1098 if (platform_get_cwd(cwd_base, sizeof(cwd_base))) {
1099 append_base_if_valid(cwd_base, bases, &base_count);
1100 }
1101
1102 char temp_base[PLATFORM_MAX_PATH_LENGTH];
1103 if (platform_get_temp_dir(temp_base, sizeof(temp_base))) {
1104 append_base_if_valid(temp_base, bases, &base_count);
1105 }
1106
1107 char *config_dir = get_config_dir();
1108 if (config_dir) {
1109 append_base_if_valid(config_dir, bases, &base_count);
1110 }
1111
1112 const char *home_env = platform_get_home_dir();
1113 if (home_env) {
1114 append_base_if_valid(home_env, bases, &base_count);
1115 }
1116
1117 char ascii_chat_home[PLATFORM_MAX_PATH_LENGTH];
1118 if (home_env) {
1119 build_ascii_chat_path(home_env, ".ascii-chat", ascii_chat_home, sizeof(ascii_chat_home));
1120 append_base_if_valid(ascii_chat_home, bases, &base_count);
1121 }
1122
1123#ifndef _WIN32
1124 char ascii_chat_home_tmp[PLATFORM_MAX_PATH_LENGTH];
1125 build_ascii_chat_path("/tmp", ".ascii-chat", ascii_chat_home_tmp, sizeof(ascii_chat_home_tmp));
1126 append_base_if_valid(ascii_chat_home_tmp, bases, &base_count);
1127#endif
1128
1129 char ssh_home[PLATFORM_MAX_PATH_LENGTH];
1130 if (home_env) {
1131 build_ascii_chat_path(home_env, ".ssh", ssh_home, sizeof(ssh_home));
1132 append_base_if_valid(ssh_home, bases, &base_count);
1133 }
1134
1135#ifdef _WIN32
1136 char program_data_logs[PLATFORM_MAX_PATH_LENGTH];
1137 const char *program_data = platform_getenv("PROGRAMDATA");
1138 if (program_data) {
1139 build_ascii_chat_path(program_data, "ascii-chat", program_data_logs, sizeof(program_data_logs));
1140 append_base_if_valid(program_data_logs, bases, &base_count);
1141 }
1142#else
1143 // System-wide config directories (for server deployments)
1144 append_base_if_valid("/etc/ascii-chat", bases, &base_count);
1145 append_base_if_valid("/usr/local/etc/ascii-chat", bases, &base_count);
1146 append_base_if_valid("/var/log", bases, &base_count);
1147 append_base_if_valid("/var/tmp", bases, &base_count);
1148 append_base_if_valid("/tmp", bases, &base_count);
1149#ifdef __APPLE__
1150 // On macOS, /tmp is a symlink to /private/tmp
1151 append_base_if_valid("/private/tmp", bases, &base_count);
1152 // On macOS, all user home directories are under /Users
1153 append_base_if_valid("/Users", bases, &base_count);
1154#endif
1155#endif
1156
1157 // Security check: Reject paths that point to sensitive system files
1158 // This applies to all path roles, not just logs
1159 if (is_sensitive_system_path(normalized_buf)) {
1160 SAFE_FREE(expanded);
1161 if (config_dir) {
1162 SAFE_FREE(config_dir);
1163 }
1164 return SET_ERRNO(map_role_to_error(role), "Cannot write to protected system path: %s", normalized_buf);
1165 }
1166
1167 // For log files, apply special validation rules
1168 if (role == PATH_ROLE_LOG_FILE) {
1169 // Check if path is a regular file (not a directory, not non-existent)
1170 // On macOS, fopen() can succeed on directories, so we must use platform_is_regular_file()
1171 bool is_regular_file = platform_is_regular_file(normalized_buf);
1172
1173 // If a regular file exists, it MUST be an ascii-chat log or empty file to be overwritten
1174 if (is_regular_file && !is_existing_ascii_chat_log(normalized_buf) && !is_file_empty(normalized_buf)) {
1175 SAFE_FREE(expanded);
1176 if (config_dir) {
1177 SAFE_FREE(config_dir);
1178 }
1179 return SET_ERRNO(ERROR_LOGGING_INIT,
1180 "Cannot overwrite existing non-ascii-chat file: %s\n"
1181 "For safety, ascii-chat will only overwrite its own log files or empty files",
1182 normalized_buf);
1183 }
1184
1185 // If file doesn't exist (or is a directory), check that path is in safe locations
1186 if (!is_regular_file) {
1187 bool allowed = base_count == 0 ? true : path_is_within_any_base(normalized_buf, bases, base_count);
1188 if (!allowed) {
1189 SAFE_FREE(expanded);
1190 if (config_dir) {
1191 SAFE_FREE(config_dir);
1192 }
1193 return SET_ERRNO(ERROR_LOGGING_INIT,
1194 "Log path %s is outside allowed directories (use -L /tmp/file.log, ~/file.log, or "
1195 "relative/absolute paths in safe locations)",
1196 normalized_buf);
1197 }
1198 }
1199 } else {
1200 // For non-log-file paths, apply standard whitelist validation
1201 bool allowed = base_count == 0 ? true : path_is_within_any_base(normalized_buf, bases, base_count);
1202 if (!allowed) {
1203 SAFE_FREE(expanded);
1204 if (config_dir) {
1205 SAFE_FREE(config_dir);
1206 }
1207 return SET_ERRNO(map_role_to_error(role), "Path %s is outside allowed directories", normalized_buf);
1208 }
1209 }
1210
1211 char *result = SAFE_MALLOC(strlen(normalized_buf) + 1, char *);
1212 if (!result) {
1213 SAFE_FREE(expanded);
1214 if (config_dir) {
1215 SAFE_FREE(config_dir);
1216 }
1217 return SET_ERRNO(ERROR_MEMORY, "Failed to allocate normalized path");
1218 }
1219 safe_snprintf(result, strlen(normalized_buf) + 1, "%s", normalized_buf);
1220 *normalized_out = result;
1221
1222 SAFE_FREE(expanded);
1223 if (config_dir) {
1224 SAFE_FREE(config_dir);
1225 }
1226 return ASCIICHAT_OK;
1227}
bool path_looks_like_path(const char *value)
Definition path.c:766
char * get_log_dir(void)
Definition path.c:503
bool path_is_absolute(const char *path)
Definition path.c:696
asciichat_error_t path_validate_user_path(const char *input, path_role_t role, char **normalized_out)
Definition path.c:974
char * expand_path(const char *path)
Definition path.c:471
bool path_is_within_base(const char *path, const char *base)
Definition path.c:714
char * get_discovery_database_dir(void)
Definition path.c:581
char * get_config_dir(void)
Definition path.c:493
bool path_is_within_any_base(const char *path, const char *const *bases, size_t base_count)
Definition path.c:748
char * get_data_dir(void)
Definition path.c:498
const char * extract_project_relative_path(const char *file)
Definition path.c:410
bool path_normalize_copy(const char *path, char *out, size_t out_len)
Definition path.c:676
pcre2_code * asciichat_pcre2_singleton_get_code(pcre2_singleton_t *singleton)
Get the compiled pcre2_code from a singleton handle.
Definition pcre2.c:95
char * platform_strdup(const char *s)
#define true
Definition stdbool.h:23
Represents a thread-safe compiled PCRE2 regex singleton.
Definition pcre2.c:21
int safe_snprintf(char *buffer, size_t buffer_size, const char *format,...)
Safe formatted string printing to buffer.
Definition system.c:456
#define PLATFORM_MAX_PATH_LENGTH
Definition system.c:64
bool platform_get_cwd(char *cwd, size_t path_size)
Definition util.c:108
int platform_path_strcasecmp(const char *a, const char *b, size_t n)
Definition util.c:117
void platform_normalize_path_separators(char *path)
Definition util.c:96
bool platform_get_temp_dir(char *temp_dir, size_t path_size)
Definition util.c:72
int platform_is_regular_file(const char *path)
Definition util.c:122
const char * platform_get_home_dir(void)
Definition util.c:92
char * platform_get_config_dir(void)
Definition util.c:102
FILE * platform_fopen(const char *filename, const char *mode)
int platform_access(const char *pathname, int mode)
char * platform_get_data_dir(void)
asciichat_error_t platform_mkdir_recursive(const char *path, int mode)
const char * platform_getenv(const char *name)
Definition wasm/system.c:13