7#include <ascii-chat/util/path.h>
8#include <ascii-chat/common.h>
9#include <ascii-chat/common/error_codes.h>
10#include <ascii-chat/platform/system.h>
11#include <ascii-chat/platform/filesystem.h>
12#include <ascii-chat/platform/util.h>
13#include <ascii-chat/util/pcre2.h>
14#include <ascii-chat/paths.h>
30static const char *PATH_SEPARATOR_PATTERN =
"[/\\\\]+";
31static const char *PATH_DOT_COMPONENT_PATTERN =
"^\\.\\.*$";
40static pcre2_code *path_separator_regex_get(
void) {
41 if (g_path_separator_regex == NULL) {
42 g_path_separator_regex = asciichat_pcre2_singleton_compile(PATH_SEPARATOR_PATTERN, 0);
51static pcre2_code *path_dot_component_regex_get(
void) {
52 if (g_path_dot_component_regex == NULL) {
53 g_path_dot_component_regex = asciichat_pcre2_singleton_compile(PATH_DOT_COMPONENT_PATTERN, 0);
62static const char *normalize_path(
const char *path) {
64 SET_ERRNO(ERROR_INVALID_PARAM,
"path is null");
70 int component_count = 0;
71 size_t path_len = strlen(path);
77 const char *pos = path;
83 if (path_len >= 3 && isalpha((
unsigned char)path[0]) && path[1] ==
':' && path[2] == PATH_DELIM) {
87 if (path_len >= 1 && path[0] == PATH_DELIM) {
94 pcre2_code *separator_regex = path_separator_regex_get();
95 pcre2_code *dot_regex = path_dot_component_regex_get();
98 if (!separator_regex || !dot_regex) {
99 const char *parse_pos = pos;
101 while (*parse_pos ==
'/' || *parse_pos ==
'\\') {
106 const char *component_start = parse_pos;
107 while (*parse_pos && *parse_pos !=
'/' && *parse_pos !=
'\\') {
110 size_t component_len = (size_t)(parse_pos - component_start);
111 if (component_len == 0)
113 if (component_len >=
sizeof(components[0])) {
114 component_len =
sizeof(components[0]) - 1;
116 if (component_len == 1 && component_start[0] == PATH_COMPONENT_DOT) {
119 if (component_len == 2 && component_start[0] == PATH_COMPONENT_DOT && component_start[1] == PATH_COMPONENT_DOT) {
120 if (component_count > 0) {
125 memcpy(components[component_count], component_start, component_len);
126 components[component_count][component_len] =
'\0';
131 memcpy(components[component_count], component_start, component_len);
132 components[component_count][component_len] =
'\0';
137 pcre2_match_data *sep_match = pcre2_match_data_create_from_pattern(separator_regex, NULL);
138 pcre2_match_data *dot_match = pcre2_match_data_create_from_pattern(dot_regex, NULL);
140 if (!sep_match || !dot_match) {
142 pcre2_match_data_free(sep_match);
144 pcre2_match_data_free(dot_match);
147 const char *parse_pos = pos;
149 while (*parse_pos ==
'/' || *parse_pos ==
'\\') {
154 const char *component_start = parse_pos;
155 while (*parse_pos && *parse_pos !=
'/' && *parse_pos !=
'\\') {
158 size_t component_len = (size_t)(parse_pos - component_start);
159 if (component_len == 0)
161 if (component_len >=
sizeof(components[0])) {
162 component_len =
sizeof(components[0]) - 1;
164 if (component_len == 1 && component_start[0] == PATH_COMPONENT_DOT) {
167 if (component_len == 2 && component_start[0] == PATH_COMPONENT_DOT &&
168 component_start[1] == PATH_COMPONENT_DOT) {
169 if (component_count > 0) {
174 memcpy(components[component_count], component_start, component_len);
175 components[component_count][component_len] =
'\0';
180 memcpy(components[component_count], component_start, component_len);
181 components[component_count][component_len] =
'\0';
184 goto build_normalized;
188 size_t remaining_len = strlen(pos);
189 size_t last_component_end = 0;
191 while (offset <= remaining_len) {
193 int rc = pcre2_jit_match(separator_regex, (PCRE2_SPTR8)pos, remaining_len, offset, 0, sep_match, NULL);
194 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(sep_match);
195 PCRE2_SIZE sep_start, sep_end;
198 sep_start = ovector[0];
199 sep_end = ovector[1];
202 sep_start = remaining_len;
203 sep_end = remaining_len;
207 size_t component_len = sep_start - last_component_end;
208 if (component_len > 0) {
209 if (component_len >=
sizeof(components[0])) {
210 component_len =
sizeof(components[0]) - 1;
213 char temp_component[256];
214 memcpy(temp_component, pos + last_component_end, component_len);
215 temp_component[component_len] =
'\0';
218 int is_dot = pcre2_jit_match(dot_regex, (PCRE2_SPTR8)temp_component, component_len, 0, 0, dot_match, NULL);
222 if (component_len == 1) {
224 }
else if (component_len == 2) {
226 if (component_count > 0) {
228 }
else if (!absolute) {
229 memcpy(components[component_count], temp_component, component_len);
230 components[component_count][component_len] =
'\0';
235 memcpy(components[component_count], temp_component, component_len);
236 components[component_count][component_len] =
'\0';
241 memcpy(components[component_count], temp_component, component_len);
242 components[component_count][component_len] =
'\0';
252 last_component_end = sep_end;
255 pcre2_match_data_free(sep_match);
256 pcre2_match_data_free(dot_match);
263 if (absolute && path_len >= 3 && isalpha((
unsigned char)path[0]) && path[1] ==
':') {
264 normalized[out_pos++] = path[0];
265 normalized[out_pos++] =
':';
266 normalized[out_pos++] = PATH_DELIM;
270 normalized[out_pos++] = PATH_DELIM;
274 for (
int i = 0; i < component_count; i++) {
276 normalized[out_pos++] = PATH_DELIM;
278 size_t comp_len = strlen(components[i]);
282 memcpy(normalized + out_pos, components[i], comp_len);
286 normalized[out_pos] =
'\0';
305static char *find_project_root(
void) {
308 static bool search_done =
false;
311 if (cached_root[0] !=
'\0') {
314 char *copy = malloc(strlen(cached_root) + 1);
316 strcpy(copy, cached_root);
330 SAFE_STRNCPY(search_path, cwd_buf,
sizeof(search_path));
333 for (
int i = 0; i < 128; i++) {
336 safe_snprintf(git_check,
sizeof(git_check),
"%s%s.git", search_path, PATH_SEPARATOR_STR);
337 struct stat git_stat;
338 bool git_found = stat(git_check, &git_stat) == 0 && S_ISDIR(git_stat.st_mode);
341 safe_snprintf(cached_root,
sizeof(cached_root),
"%s", search_path);
345 char *result = malloc(strlen(search_path) + 1);
347 strcpy(result, search_path);
356 safe_snprintf(license_check,
sizeof(license_check),
"%s%sLICENSE.txt", search_path, PATH_SEPARATOR_STR);
358 safe_snprintf(cmake_check,
sizeof(cmake_check),
"%s%sCMakeLists.txt", search_path, PATH_SEPARATOR_STR);
359 struct stat license_stat;
360 bool license_found = stat(license_check, &license_stat) == 0 && S_ISREG(license_stat.st_mode);
361 struct stat cmake_stat;
362 bool cmake_found = stat(cmake_check, &cmake_stat) == 0 && S_ISREG(cmake_stat.st_mode);
363 if (license_found && cmake_found) {
365 safe_snprintf(cached_root,
sizeof(cached_root),
"%s", search_path);
369 char *result = malloc(strlen(search_path) + 1);
371 strcpy(result, search_path);
377 size_t len = strlen(search_path);
378 if (len == 0 || (len == 1 && search_path[0] ==
'/')) {
384 if (search_path[len - 1] ==
'/' || search_path[len - 1] ==
'\\') {
385 search_path[len - 1] =
'\0';
391 for (
int j = (
int)len - 1; j >= 0; j--) {
392 if (search_path[j] ==
'/' || search_path[j] ==
'\\') {
403 search_path[last_sep] =
'\0';
412 SET_ERRNO(ERROR_INVALID_PARAM,
"file is null");
422 const char *sep = strrchr(file,
'/');
424 sep = strrchr(file,
'\\');
426 return sep ? sep + 1 : file;
430 const char *normalized = normalize_path(file);
433 char *project_root = find_project_root();
435 size_t root_len = strlen(project_root);
436 size_t norm_len = strlen(normalized);
439 if (norm_len > root_len && strncmp(normalized, project_root, root_len) == 0) {
440 const char *remainder = normalized + root_len;
443 if (*remainder == PATH_DELIM || *remainder ==
'/' || *remainder ==
'\\') {
454 const char *last_sep = strrchr(normalized, PATH_DELIM);
456 last_sep = strrchr(normalized,
'/');
459 last_sep = strrchr(normalized,
'\\');
472 if (path[0] == PATH_TILDE) {
479 size_t total_len = strlen(home) + strlen(path) + 1;
480 expanded = SAFE_MALLOC(total_len,
char *);
520 char *result = SAFE_MALLOC(strlen(cwd_buf) + 1,
char *);
529 size_t log_dir_len = strlen(temp_dir) + strlen(PATH_SEPARATOR_STR) + strlen(
"ascii-chat") + 1;
530 char *log_dir = SAFE_MALLOC(log_dir_len,
char *);
534 safe_snprintf(log_dir, log_dir_len,
"%s%sascii-chat", temp_dir, PATH_SEPARATOR_STR);
537 asciichat_error_t mkdir_result = platform_mkdir(log_dir, DIR_PERM_PRIVATE);
538 if (mkdir_result != ASCIICHAT_OK) {
541 char *result = SAFE_MALLOC(strlen(temp_dir) + 1,
char *);
553 char *result = SAFE_MALLOC(strlen(temp_dir) + 1,
char *);
564 char *repo_root = find_project_root();
575 char *result = SAFE_MALLOC(strlen(cwd_buf) + 1,
char *);
585 if (program_data && program_data[0] !=
'\0') {
586 size_t len = strlen(program_data) + strlen(
"\\ascii-chat\\") + 1;
587 char *system_dir = SAFE_MALLOC(len,
char *);
589 safe_snprintf(system_dir, len,
"%s\\ascii-chat\\", program_data);
593 if (mkdir_result == ASCIICHAT_OK) {
599 SAFE_FREE(system_dir);
608 if (mkdir_result == ASCIICHAT_OK) {
620 const char *prefix = ASCIICHAT_INSTALL_PREFIX;
621 size_t system_len = strlen(prefix) + strlen(
"/var/ascii-chat/") + 1;
622 char *system_dir = SAFE_MALLOC(system_len,
char *);
624 safe_snprintf(system_dir, system_len,
"%s/var/ascii-chat/", prefix);
629 if (mkdir_result == ASCIICHAT_OK) {
635 SAFE_FREE(system_dir);
641 if (strcmp(prefix,
"/usr/local") != 0) {
642 const char *usr_local_path =
"/usr/local/var/ascii-chat/";
643 size_t usr_local_len = strlen(usr_local_path) + 1;
644 char *usr_local_dir = SAFE_MALLOC(usr_local_len,
char *);
646 safe_snprintf(usr_local_dir, usr_local_len,
"%s", usr_local_path);
649 if (mkdir_result == ASCIICHAT_OK) {
651 return usr_local_dir;
654 SAFE_FREE(usr_local_dir);
664 if (mkdir_result == ASCIICHAT_OK) {
677 if (!path || !out || out_len == 0) {
678 SET_ERRNO(ERROR_INVALID_PARAM,
"null path or out or out_len is 0");
682 const char *normalized = normalize_path(path);
687 size_t len = strlen(normalized);
688 if (len + 1 > out_len) {
692 memcpy(out, normalized, len + 1);
697 if (!path || !*path) {
702 if ((path[0] ==
'\\' && path[1] ==
'\\')) {
705 if (isalpha((
unsigned char)path[0]) && path[1] == PATH_DRIVE_SEPARATOR && path[2] == PATH_DELIM) {
710 return path[0] == PATH_DELIM;
715 if (!path || !base) {
733 size_t base_len = strlen(normalized_base);
741 char next = normalized_path[base_len];
745 return next == PATH_DELIM;
749 if (!path || !bases || base_count == 0) {
753 for (
size_t i = 0; i < base_count; ++i) {
754 const char *base = bases[i];
767 if (!value || *value ==
'\0') {
771 if (value[0] == PATH_DELIM || value[0] == PATH_COMPONENT_DOT || value[0] == PATH_TILDE) {
775 if (strchr(value, PATH_DELIM)) {
780 if (isalpha((
unsigned char)value[0]) && value[1] ==
':' && value[2] == PATH_DELIM) {
788static asciichat_error_t map_role_to_error(path_role_t role) {
790 case PATH_ROLE_CONFIG_FILE:
792 case PATH_ROLE_LOG_FILE:
793 return ERROR_LOGGING_INIT;
794 case PATH_ROLE_KEY_PRIVATE:
795 case PATH_ROLE_KEY_PUBLIC:
796 case PATH_ROLE_CLIENT_KEYS:
797 return ERROR_CRYPTO_KEY;
799 return ERROR_GENERAL;
802static void append_base_if_valid(
const char *candidate,
const char **bases,
size_t *count) {
803 if (!candidate || *candidate ==
'\0' || *count >= MAX_PATH_BASES) {
809 bases[*count] = candidate;
813static void build_ascii_chat_path(
const char *base,
const char *suffix,
char *out,
size_t out_len) {
814 if (!base || !suffix || out_len == 0) {
819 size_t base_len = strlen(base);
820 bool needs_sep = base_len > 0 && base[base_len - 1] != PATH_DELIM;
822 safe_snprintf(out, out_len,
"%s%s%s", base, needs_sep ? PATH_SEPARATOR_STR :
"", suffix);
832static bool is_sensitive_system_path(
const char *path) {
839 const char *sensitive_paths[] = {
"C:\\Windows",
841 "C:\\Program Files (x86)",
843 "C:\\System Volume Information",
848 const char *sensitive_paths[] = {
"/etc",
868 const char *macos_paths[] = {
"/System",
878 for (
int i = 0; sensitive_paths[i] != NULL; i++) {
879 const char *base = sensitive_paths[i];
880 size_t base_len = strlen(base);
883 if (strcmp(path, base) == 0) {
886 if (strncmp(path, base, base_len) == 0) {
888 if (path[base_len] == PATH_DELIM || path[base_len] ==
'/' || path[base_len] ==
'\\') {
896 for (
int i = 0; macos_paths[i] != NULL; i++) {
897 const char *base = macos_paths[i];
898 size_t base_len = strlen(base);
900 if (strcmp(path, base) == 0) {
903 if (strncmp(path, base, base_len) == 0) {
904 if (path[base_len] == PATH_DELIM || path[base_len] ==
'/' || path[base_len] ==
'\\') {
921static bool is_file_empty(
const char *path) {
923 SET_ERRNO(ERROR_INVALID_PARAM,
"is_file_empty: invalid parameter");
941static bool is_existing_ascii_chat_log(
const char *path) {
958 bool is_ascii_chat_log =
false;
961 if (fgets(buffer,
sizeof(buffer), f) != NULL) {
964 if (buffer[0] ==
'[' && isdigit((
unsigned char)buffer[1]) && isdigit((
unsigned char)buffer[2]) &&
966 is_ascii_chat_log =
true;
971 return is_ascii_chat_log;
975 if (!normalized_out) {
976 return SET_ERRNO(map_role_to_error(role),
"path_validate_user_path requires output pointer");
978 *normalized_out = NULL;
980 if (!input || *input ==
'\0') {
981 return SET_ERRNO(map_role_to_error(role),
"Path is empty for role %d", role);
985 if (role == PATH_ROLE_LOG_FILE) {
987 bool is_simple_filename =
true;
988 for (
const char *p = input; *p; p++) {
989 if (*p == PATH_DELIM || *p ==
'/' || *p ==
'\\') {
990 is_simple_filename =
false;
995 if (strstr(input,
"..") != NULL) {
996 is_simple_filename =
false;
1000 if (is_simple_filename) {
1009 SAFE_STRNCPY(safe_base, config_dir,
sizeof(safe_base));
1010 SAFE_FREE(config_dir);
1012 return SET_ERRNO(ERROR_LOGGING_INIT,
"Failed to determine safe directory for log file");
1017 size_t base_len = strlen(safe_base);
1018 size_t input_len = strlen(input);
1019 bool needs_sep = base_len > 0 && safe_base[base_len - 1] != PATH_DELIM;
1020 size_t total_len = base_len + (needs_sep ? 1 : 0) + input_len + 1;
1023 return SET_ERRNO(ERROR_LOGGING_INIT,
"Log file path too long: %s/%s", safe_base, input);
1027 safe_snprintf(resolved_buf,
sizeof(resolved_buf),
"%s%s%s", safe_base, needs_sep ? PATH_SEPARATOR_STR :
"",
1033 return SET_ERRNO(ERROR_LOGGING_INIT,
"Failed to normalize log file path: %s", resolved_buf);
1037 char *result = SAFE_MALLOC(strlen(normalized_buf) + 1,
char *);
1039 return SET_ERRNO(ERROR_MEMORY,
"Failed to allocate normalized path");
1041 safe_snprintf(result, strlen(normalized_buf) + 1,
"%s", normalized_buf);
1042 *normalized_out = result;
1043 return ASCIICHAT_OK;
1050 return SET_ERRNO(map_role_to_error(role),
"Value does not look like a filesystem path: %s", input);
1055 return SET_ERRNO(map_role_to_error(role),
"Failed to expand path: %s", input);
1059 const char *candidate_path = expanded;
1064 SAFE_FREE(expanded);
1065 return SET_ERRNO(map_role_to_error(role),
"Failed to determine current working directory");
1068 size_t total_len = strlen(cwd_buf) + 1 + strlen(candidate_path) + 1;
1069 if (total_len >=
sizeof(candidate_buf)) {
1070 SAFE_FREE(expanded);
1071 return SET_ERRNO(map_role_to_error(role),
"Resolved path is too long: %s/%s", cwd_buf, candidate_path);
1073 if (strlen(candidate_path) > 0 && candidate_path[0] == PATH_DELIM) {
1074 safe_snprintf(candidate_buf,
sizeof(candidate_buf),
"%s%s", cwd_buf, candidate_path);
1076 safe_snprintf(candidate_buf,
sizeof(candidate_buf),
"%s%c%s", cwd_buf, PATH_DELIM, candidate_path);
1078 candidate_path = candidate_buf;
1083 SAFE_FREE(expanded);
1084 return SET_ERRNO(map_role_to_error(role),
"Failed to normalize path: %s", candidate_path);
1088 SAFE_FREE(expanded);
1089 return SET_ERRNO(map_role_to_error(role),
"Normalized path is not absolute: %s", normalized_buf);
1092 const char *bases[MAX_PATH_BASES] = {0};
1093 size_t base_count = 0;
1099 append_base_if_valid(cwd_base, bases, &base_count);
1104 append_base_if_valid(temp_base, bases, &base_count);
1109 append_base_if_valid(config_dir, bases, &base_count);
1114 append_base_if_valid(home_env, bases, &base_count);
1119 build_ascii_chat_path(home_env,
".ascii-chat", ascii_chat_home,
sizeof(ascii_chat_home));
1120 append_base_if_valid(ascii_chat_home, bases, &base_count);
1125 build_ascii_chat_path(
"/tmp",
".ascii-chat", ascii_chat_home_tmp,
sizeof(ascii_chat_home_tmp));
1126 append_base_if_valid(ascii_chat_home_tmp, bases, &base_count);
1131 build_ascii_chat_path(home_env,
".ssh", ssh_home,
sizeof(ssh_home));
1132 append_base_if_valid(ssh_home, bases, &base_count);
1139 build_ascii_chat_path(program_data,
"ascii-chat", program_data_logs,
sizeof(program_data_logs));
1140 append_base_if_valid(program_data_logs, bases, &base_count);
1144 append_base_if_valid(
"/etc/ascii-chat", bases, &base_count);
1145 append_base_if_valid(
"/usr/local/etc/ascii-chat", bases, &base_count);
1146 append_base_if_valid(
"/var/log", bases, &base_count);
1147 append_base_if_valid(
"/var/tmp", bases, &base_count);
1148 append_base_if_valid(
"/tmp", bases, &base_count);
1151 append_base_if_valid(
"/private/tmp", bases, &base_count);
1153 append_base_if_valid(
"/Users", bases, &base_count);
1159 if (is_sensitive_system_path(normalized_buf)) {
1160 SAFE_FREE(expanded);
1162 SAFE_FREE(config_dir);
1164 return SET_ERRNO(map_role_to_error(role),
"Cannot write to protected system path: %s", normalized_buf);
1168 if (role == PATH_ROLE_LOG_FILE) {
1174 if (is_regular_file && !is_existing_ascii_chat_log(normalized_buf) && !is_file_empty(normalized_buf)) {
1175 SAFE_FREE(expanded);
1177 SAFE_FREE(config_dir);
1179 return SET_ERRNO(ERROR_LOGGING_INIT,
1180 "Cannot overwrite existing non-ascii-chat file: %s\n"
1181 "For safety, ascii-chat will only overwrite its own log files or empty files",
1186 if (!is_regular_file) {
1189 SAFE_FREE(expanded);
1191 SAFE_FREE(config_dir);
1193 return SET_ERRNO(ERROR_LOGGING_INIT,
1194 "Log path %s is outside allowed directories (use -L /tmp/file.log, ~/file.log, or "
1195 "relative/absolute paths in safe locations)",
1203 SAFE_FREE(expanded);
1205 SAFE_FREE(config_dir);
1207 return SET_ERRNO(map_role_to_error(role),
"Path %s is outside allowed directories", normalized_buf);
1211 char *result = SAFE_MALLOC(strlen(normalized_buf) + 1,
char *);
1213 SAFE_FREE(expanded);
1215 SAFE_FREE(config_dir);
1217 return SET_ERRNO(ERROR_MEMORY,
"Failed to allocate normalized path");
1219 safe_snprintf(result, strlen(normalized_buf) + 1,
"%s", normalized_buf);
1220 *normalized_out = result;
1222 SAFE_FREE(expanded);
1224 SAFE_FREE(config_dir);
1226 return ASCIICHAT_OK;
bool path_looks_like_path(const char *value)
bool path_is_absolute(const char *path)
asciichat_error_t path_validate_user_path(const char *input, path_role_t role, char **normalized_out)
char * expand_path(const char *path)
bool path_is_within_base(const char *path, const char *base)
char * get_discovery_database_dir(void)
char * get_config_dir(void)
bool path_is_within_any_base(const char *path, const char *const *bases, size_t base_count)
char * get_data_dir(void)
const char * extract_project_relative_path(const char *file)
bool path_normalize_copy(const char *path, char *out, size_t out_len)
pcre2_code * asciichat_pcre2_singleton_get_code(pcre2_singleton_t *singleton)
Get the compiled pcre2_code from a singleton handle.
Represents a thread-safe compiled PCRE2 regex singleton.
int safe_snprintf(char *buffer, size_t buffer_size, const char *format,...)
Safe formatted string printing to buffer.
#define PLATFORM_MAX_PATH_LENGTH
bool platform_get_cwd(char *cwd, size_t path_size)
int platform_path_strcasecmp(const char *a, const char *b, size_t n)
void platform_normalize_path_separators(char *path)
bool platform_get_temp_dir(char *temp_dir, size_t path_size)
int platform_is_regular_file(const char *path)
const char * platform_get_home_dir(void)
char * platform_get_config_dir(void)
FILE * platform_fopen(const char *filename, const char *mode)
int platform_access(const char *pathname, int mode)
char * platform_get_data_dir(void)
asciichat_error_t platform_mkdir_recursive(const char *path, int mode)
const char * platform_getenv(const char *name)