9#define LLVM_SYMBOLIZER_BIN "llvm-symbolizer.exe"
10#define ADDR2LINE_BIN "addr2line.exe"
14#define LLVM_SYMBOLIZER_BIN "llvm-symbolizer"
15#define ADDR2LINE_BIN "addr2line"
29#include <ascii-chat/platform/symbols.h>
30#include <ascii-chat/platform/system.h>
31#include <ascii-chat/common.h>
32#include <ascii-chat/uthash/uthash.h>
33#include <ascii-chat/platform/rwlock.h>
34#include <ascii-chat/platform/init.h>
35#include <ascii-chat/util/path.h>
36#include <ascii-chat/util/string.h>
43#define NULL_SENTINEL "[NULL]"
56static atomic_bool g_symbolizer_detected =
false;
57static atomic_bool g_llvm_symbolizer_checked =
false;
58static atomic_bool g_llvm_symbolizer_available =
false;
60static atomic_bool g_addr2line_checked =
false;
61static atomic_bool g_addr2line_available =
false;
63static static_mutex_t g_symbolizer_detection_mutex = STATIC_MUTEX_INIT;
115static rwlock_t g_symbol_cache_lock = {0};
116static atomic_bool g_symbol_cache_initialized =
false;
119static atomic_uint_fast64_t g_cache_hits = 0;
120static atomic_uint_fast64_t g_cache_misses = 0;
130static bool symbolizer_path_is_executable(
const char *path) {
132 if (!path || path[0] ==
'\0') {
136 DWORD attrs = GetFileAttributesA(path);
137 if (attrs == INVALID_FILE_ATTRIBUTES) {
140 if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
144 HANDLE handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING,
145 FILE_ATTRIBUTE_NORMAL, NULL);
146 if (handle == INVALID_HANDLE_VALUE) {
153 return (path && path[0] !=
'\0' && access(path, X_OK) == 0);
157static const char *get_llvm_symbolizer_command(
void) {
158 if (!atomic_load(&g_llvm_symbolizer_checked)) {
159 static_mutex_lock(&g_symbolizer_detection_mutex);
160 if (!atomic_load(&g_llvm_symbolizer_checked)) {
161 const char *env_path = SAFE_GETENV(
"LLVM_SYMBOLIZER_PATH");
162 bool available =
false;
164 g_llvm_symbolizer_cmd[0] =
'\0';
166 if (env_path && env_path[0] !=
'\0') {
167 if (symbolizer_path_is_executable(env_path)) {
168 SAFE_STRNCPY(g_llvm_symbolizer_cmd, env_path,
sizeof(g_llvm_symbolizer_cmd));
170 log_dev(
"Using llvm-symbolizer from LLVM_SYMBOLIZER_PATH: %s", env_path);
172 log_warn(
"LLVM_SYMBOLIZER_PATH is set but not executable: %s", env_path);
178 g_llvm_symbolizer_cmd[0] =
'\0';
182 atomic_store(&g_llvm_symbolizer_available, available);
183 atomic_store(&g_llvm_symbolizer_checked,
true);
185 static_mutex_unlock(&g_symbolizer_detection_mutex);
188 if (!atomic_load(&g_llvm_symbolizer_available)) {
192 if (g_llvm_symbolizer_cmd[0] !=
'\0') {
193 return g_llvm_symbolizer_cmd;
199static const char *get_addr2line_command(
void) {
200 if (!atomic_load(&g_addr2line_checked)) {
201 static_mutex_lock(&g_symbolizer_detection_mutex);
202 if (!atomic_load(&g_addr2line_checked)) {
203 const char *env_path = SAFE_GETENV(
"ADDR2LINE_PATH");
204 bool available =
false;
206 g_addr2line_cmd[0] =
'\0';
208 if (env_path && env_path[0] !=
'\0') {
209 if (symbolizer_path_is_executable(env_path)) {
210 SAFE_STRNCPY(g_addr2line_cmd, env_path,
sizeof(g_addr2line_cmd));
212 log_dev(
"Using addr2line from ADDR2LINE_PATH: %s", env_path);
214 log_warn(
"ADDR2LINE_PATH is set but not executable: %s", env_path);
220 g_addr2line_cmd[0] =
'\0';
224 atomic_store(&g_addr2line_available, available);
225 atomic_store(&g_addr2line_checked,
true);
227 static_mutex_unlock(&g_symbolizer_detection_mutex);
230 if (!atomic_load(&g_addr2line_available)) {
234 if (g_addr2line_cmd[0] !=
'\0') {
235 return g_addr2line_cmd;
244 const char *llvm_symbolizer = get_llvm_symbolizer_command();
245 if (llvm_symbolizer) {
246 log_dev(
"Using llvm-symbolizer for symbol resolution");
250 const char *addr2line_cmd = get_addr2line_command();
252 log_debug(
"Using addr2line command: %s", addr2line_cmd);
265 bool expected =
false;
266 if (!atomic_compare_exchange_strong(&g_symbol_cache_initialized, &expected,
true)) {
272 if (atomic_compare_exchange_strong(&g_symbolizer_detected, &expected,
true)) {
273 g_symbolizer_type = detect_symbolizer();
278 atomic_store(&g_symbol_cache_initialized,
false);
279 return SET_ERRNO(ERROR_THREAD,
"Failed to initialize symbol cache rwlock");
283 g_symbol_cache = NULL;
285 atomic_store(&g_cache_hits, 0);
286 atomic_store(&g_cache_misses, 0);
288 log_dev(
"Symbol cache initialized");
293 if (!atomic_load(&g_symbol_cache_initialized)) {
298 atomic_store(&g_symbol_cache_initialized,
false);
301 rwlock_wrlock(&g_symbol_cache_lock);
304 size_t entry_count = HASH_COUNT(g_symbol_cache);
308 size_t freed_count = 0;
309 HASH_ITER(hh, g_symbol_cache, entry, tmp) {
311 HASH_DEL(g_symbol_cache, entry);
321 rwlock_wrunlock(&g_symbol_cache_lock);
322 rwlock_destroy(&g_symbol_cache_lock);
324 g_symbol_cache = NULL;
326 log_dev(
"Symbol cache cleaned up: %zu entries counted, %zu entries freed (hits=%llu, misses=%llu)", entry_count,
327 freed_count, (
unsigned long long)atomic_load(&g_cache_hits),
328 (
unsigned long long)atomic_load(&g_cache_misses));
332 if (!atomic_load(&g_symbol_cache_initialized) || !addr) {
336 rwlock_rdlock(&g_symbol_cache_lock);
339 HASH_FIND_PTR(g_symbol_cache, &addr, entry);
341 const char *result = NULL;
345 atomic_fetch_add(&g_cache_hits, 1);
347 atomic_fetch_add(&g_cache_misses, 1);
350 rwlock_rdunlock(&g_symbol_cache_lock);
355 if (!atomic_load(&g_symbol_cache_initialized) || !addr || !symbol) {
360 rwlock_wrlock(&g_symbol_cache_lock);
364 if (!atomic_load(&g_symbol_cache_initialized)) {
365 rwlock_wrunlock(&g_symbol_cache_lock);
371 HASH_FIND_PTR(g_symbol_cache, &addr, existing);
375 if (existing->
symbol && strcmp(existing->
symbol, symbol) != 0) {
380 rwlock_wrunlock(&g_symbol_cache_lock);
384 rwlock_wrunlock(&g_symbol_cache_lock);
391 rwlock_wrunlock(&g_symbol_cache_lock);
399 rwlock_wrunlock(&g_symbol_cache_lock);
404 HASH_ADD_PTR(g_symbol_cache, addr, entry);
407 rwlock_wrunlock(&g_symbol_cache_lock);
414 *hits_out = atomic_load(&g_cache_hits);
417 *misses_out = atomic_load(&g_cache_misses);
420 rwlock_rdlock(&g_symbol_cache_lock);
421 *entries_out = HASH_COUNT(g_symbol_cache);
422 rwlock_rdunlock(&g_symbol_cache_lock);
427 uint64_t hits = atomic_load(&g_cache_hits);
428 uint64_t misses = atomic_load(&g_cache_misses);
430 rwlock_rdlock(&g_symbol_cache_lock);
431 size_t entries = HASH_COUNT(g_symbol_cache);
432 rwlock_rdunlock(&g_symbol_cache_lock);
434 uint64_t total = hits + misses;
435 double hit_rate = total > 0 ? (100.0 * (double)hits / (
double)total) : 0.0;
437 log_debug(
"Symbol Cache Stats: %zu entries, %llu hits, %llu misses (%.1f%% hit rate)", entries,
438 (
unsigned long long)hits, (
unsigned long long)misses, hit_rate);
451static char *parse_llvm_symbolizer_result(FILE *fp,
void *addr) {
452 char func_name[512] =
"??";
453 char file_location[512] =
"??:0";
457 if (fgets(func_name,
sizeof(func_name), fp) == NULL) {
460 func_name[strcspn(func_name,
"\n")] =
'\0';
463 if (fgets(file_location,
sizeof(file_location), fp) == NULL) {
466 file_location[strcspn(file_location,
"\n")] =
'\0';
469 if (fgets(blank_line,
sizeof(blank_line), fp) == NULL) {
474 char *last_colon = strrchr(file_location,
':');
485 SAFE_STRNCPY(rel_path, rel_path_tmp,
sizeof(rel_path));
488 char *result = SAFE_MALLOC(1024,
char *);
494 bool has_func = (strcmp(func_name,
"??") != 0 && strlen(func_name) > 0);
496 (strcmp(file_location,
"??:0") != 0 && strcmp(file_location,
"??:?") != 0 && strcmp(file_location,
"??") != 0);
499 char clean_func[512];
500 SAFE_STRNCPY(clean_func, func_name,
sizeof(clean_func));
501 char *paren = strstr(clean_func,
"()");
506 if (!has_func && !has_file) {
507 SAFE_SNPRINTF(result, 1024,
"%p", addr);
508 }
else if (has_func && has_file) {
509 SAFE_SNPRINTF(result, 1024,
"%s() (%s)", clean_func, rel_path);
510 }
else if (has_func) {
511 SAFE_SNPRINTF(result, 1024,
"%s()", clean_func);
513 SAFE_SNPRINTF(result, 1024,
"%s (unknown function)", rel_path);
528static char **run_llvm_symbolizer_batch(
void *
const *buffer,
int size) {
529 if (size <= 0 || !buffer) {
533 const char *symbolizer_cmd = get_llvm_symbolizer_command();
534 if (!symbolizer_cmd) {
535 log_debug(
"llvm-symbolizer not available - skipping symbolization");
540 char **result = SAFE_CALLOC((
size_t)(size + 1),
sizeof(
char *),
char **);
553 uintptr_t file_offsets[64];
554 int original_indices[64];
558 binary_group_t groups[8];
563 for (
int i = 0; i < size; i++) {
564 platform_binary_match_t match;
567 result[i] = SAFE_MALLOC(32,
char *);
569 SAFE_SNPRINTF(result[i], 32,
"%p", buffer[i]);
576 for (
int g = 0; g < num_groups; g++) {
577 if (strcmp(groups[g].binary_path, match.path) == 0) {
583 if (group_idx < 0 && num_groups < 8) {
584 group_idx = num_groups++;
585 SAFE_STRNCPY(groups[group_idx].binary_path, match.path,
sizeof(groups[group_idx].binary_path));
586 groups[group_idx].count = 0;
589 if (group_idx >= 0 && groups[group_idx].count < 64) {
590 int idx = groups[group_idx].count++;
591 groups[group_idx].file_offsets[idx] = match.file_offset;
592 groups[group_idx].original_indices[idx] = i;
597 for (
int g = 0; g < num_groups; g++) {
598 if (groups[g].count == 0) {
604 if (!
escape_path_for_shell(groups[g].binary_path, escaped_binary_path,
sizeof(escaped_binary_path))) {
610 int offset =
safe_snprintf(cmd,
sizeof(cmd),
"%s --demangle --output-style=LLVM --relativenames -e %s ",
611 symbolizer_cmd, escaped_binary_path);
613 for (
int j = 0; j < groups[g].count && offset < (int)
sizeof(cmd) - 32; j++) {
615 safe_snprintf(cmd + offset,
sizeof(cmd) - (
size_t)offset,
"0x%lx ", (
unsigned long)groups[g].file_offsets[j]);
622 strncat(cmd,
"2>/dev/null",
sizeof(cmd) - strlen(cmd) - 1);
624 FILE *fp = popen(cmd,
"r");
630 const char *binary_name = strrchr(groups[g].binary_path,
'/');
632 binary_name = strrchr(groups[g].binary_path,
'\\');
634 binary_name = binary_name ? binary_name + 1 : groups[g].binary_path;
637 for (
int j = 0; j < groups[g].count; j++) {
638 int orig_idx = groups[g].original_indices[j];
639 char *parsed = parse_llvm_symbolizer_result(fp, buffer[orig_idx]);
643 char *with_binary = SAFE_MALLOC(1024,
char *);
645 SAFE_SNPRINTF(with_binary, 1024,
"[%s] %s", binary_name, parsed);
647 result[orig_idx] = with_binary;
649 result[orig_idx] = parsed;
653 result[orig_idx] = SAFE_MALLOC(256,
char *);
654 if (result[orig_idx]) {
655 SAFE_SNPRINTF(result[orig_idx], 256,
"[%s] %p", binary_name, buffer[orig_idx]);
672static char **run_addr2line_batch(
void *
const *buffer,
int size) {
673 if (size <= 0 || !buffer) {
674 log_error(
"Invalid parameters: buffer=%p, size=%d", (
void *)buffer, size);
678 const char *addr2line_cmd = get_addr2line_command();
679 if (!addr2line_cmd) {
680 log_debug(
"addr2line not available - skipping symbolization");
693 log_error(
"Invalid executable path - contains unsafe characters: %s", exe_path);
700 log_error(
"Failed to escape executable path for shell command");
703 const char *escaped_exe_path = escaped_exe_path_buf;
706 log_warn(
"addr2line path contains unsafe characters: %s", addr2line_cmd);
713 log_error(
"Failed to escape addr2line path for shell command");
716 const char *escaped_addr2line_cmd = escaped_addr2line_buf;
720 int offset =
safe_snprintf(cmd,
sizeof(cmd),
"%s -e %s -f -C -i ", escaped_addr2line_cmd, escaped_exe_path);
721 if (offset <= 0 || offset >= (
int)
sizeof(cmd)) {
722 log_error(
"Failed to build addr2line command");
727 for (
int i = 0; i < size; i++) {
728 int n =
safe_snprintf(cmd + offset,
sizeof(cmd) - (
size_t)offset,
"0x%llx ", (
unsigned long long)buffer[i]);
729 if (n <= 0 || offset + n >= (
int)
sizeof(cmd)) {
730 log_error(
"Failed to build addr2line command");
737 FILE *fp = popen(cmd,
"r");
739 log_error(
"Failed to execute addr2line command");
744 char **result = SAFE_CALLOC((
size_t)(size + 1),
sizeof(
char *),
char **);
751 for (
int i = 0; i < size; i++) {
755 if (fgets(func_name,
sizeof(func_name), fp) == NULL) {
758 if (fgets(file_line,
sizeof(file_line), fp) == NULL) {
763 func_name[strcspn(func_name,
"\n")] =
'\0';
764 file_line[strcspn(file_line,
"\n")] =
'\0';
772 SAFE_STRNCPY(rel_path, rel_path_tmp,
sizeof(rel_path));
775 result[i] = SAFE_MALLOC(1024,
char *);
778 bool has_func = (strcmp(func_name,
"??") != 0);
779 bool has_file = (strcmp(file_line,
"??:0") != 0 && strcmp(file_line,
"??:?") != 0);
781 if (!has_func && !has_file) {
783 SAFE_SNPRINTF(result[i], 1024,
"%p", buffer[i]);
784 }
else if (has_func && has_file) {
786 if (strstr(rel_path,
":") != NULL) {
787 SAFE_SNPRINTF(result[i], 1024,
"%s in %s()", rel_path, func_name);
789 SAFE_SNPRINTF(result[i], 1024,
"%s() at %s", func_name, rel_path);
791 }
else if (has_func) {
793 SAFE_SNPRINTF(result[i], 1024,
"%s() at %p", func_name, buffer[i]);
796 SAFE_SNPRINTF(result[i], 1024,
"%s (unknown function)", rel_path);
805 if (size <= 0 || !buffer) {
806 log_error(
"Invalid parameters: buffer=%p, size=%d", (
void *)buffer, size);
812 if (!atomic_load(&g_symbol_cache_initialized)) {
815 char **result = run_llvm_symbolizer_batch(buffer, size);
817 result = run_addr2line_batch(buffer, size);
824 char **result = SAFE_CALLOC((
size_t)(size + 1),
sizeof(
char *),
char **);
833 int uncached_count = 0;
834 void *uncached_addrs[size];
835 int uncached_indices[size];
837 for (
int i = 0; i < size; i++) {
841 result[i] = (
char *)cached;
844 uncached_addrs[uncached_count] = buffer[i];
845 uncached_indices[uncached_count] = i;
851 if (uncached_count > 0) {
852 char **resolved = NULL;
855 switch (g_symbolizer_type) {
857 resolved = run_llvm_symbolizer_batch(uncached_addrs, uncached_count);
860 resolved = run_addr2line_batch(uncached_addrs, uncached_count);
870 for (
int i = 0; i < uncached_count; i++) {
871 int orig_idx = uncached_indices[i];
875 if (!result[orig_idx]) {
876 log_error(
"Failed to duplicate string for result[%d]", orig_idx);
880 if (result[orig_idx] && strcmp(result[orig_idx],
NULL_SENTINEL) != 0) {
882 log_error(
"Failed to insert symbol into cache for result[%d]", orig_idx);
885 SAFE_FREE(resolved[i]);
888 if (!result[orig_idx]) {
892 if (!result[orig_idx]) {
893 log_error(
"Failed to allocate memory for result[%d]", orig_idx);
901 for (
int i = 0; i < uncached_count; i++) {
902 int orig_idx = uncached_indices[i];
903 if (!result[orig_idx]) {
904 result[orig_idx] = SAFE_MALLOC(32,
char *);
905 if (result[orig_idx]) {
906 SAFE_SNPRINTF(result[orig_idx], 32,
"%p", uncached_addrs[i]);
911 if (!result[orig_idx]) {
912 log_error(
"Failed to allocate memory for result[%d]", orig_idx);
933 for (
int i = 0; i < 64; i++) {
934 if (symbols[i] == NULL) {
942 SAFE_FREE(symbols[i]);
int get_binary_file_address_offsets(const void *addr, platform_binary_match_t *matches, int max_matches)
Get binary that contains address on Linux via /proc/self/maps.
const char * extract_project_relative_path(const char *file)
Symbol cache entry structure for address-to-symbol mapping.
UT_hash_handle hh
uthash handle (required for hash table operations)
char * symbol
Resolved symbol string (allocated, owned by cache)
void * addr
Memory address key (used for hashtable lookup)
#define LLVM_SYMBOLIZER_BIN
void symbol_cache_get_stats(uint64_t *hits_out, uint64_t *misses_out, size_t *entries_out)
bool symbol_cache_insert(void *addr, const char *symbol)
void symbol_cache_print_stats(void)
char ** symbol_cache_resolve_batch(void *const *buffer, int size)
const char * symbol_cache_lookup(void *addr)
asciichat_error_t symbol_cache_init(void)
void symbol_cache_destroy(void)
void symbol_cache_free_symbols(char **symbols)
bool platform_get_executable_path(char *exe_path, size_t path_size)
Get the path to the current executable.
int safe_snprintf(char *buffer, size_t buffer_size, const char *format,...)
Safe formatted string printing to buffer.
bool platform_is_binary_in_path(const char *bin_name)
#define PLATFORM_MAX_PATH_LENGTH
int rwlock_init(rwlock_t *rwlock)
bool validate_shell_safe(const char *str, const char *allowed_chars)
bool escape_path_for_shell(const char *path, char *out_buffer, size_t out_buffer_size)