ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
symbols.c
Go to the documentation of this file.
1
7// Platform-specific binary names
8#ifdef _WIN32
9#define LLVM_SYMBOLIZER_BIN "llvm-symbolizer.exe"
10#define ADDR2LINE_BIN "addr2line.exe"
11#define popen _popen
12#define pclose _pclose
13#else
14#define LLVM_SYMBOLIZER_BIN "llvm-symbolizer"
15#define ADDR2LINE_BIN "addr2line"
16#endif
17
18#include <stdlib.h>
19#include <string.h>
20#include <stdio.h>
21#include <stdatomic.h>
22
23#ifndef _WIN32
24#include <unistd.h>
25#else
26#include <windows.h>
27#endif
28
29#include "symbols.h"
30#include "system.h"
31#include "common.h"
32#include "util/uthash.h"
33#include "platform/rwlock.h"
34#include "platform/init.h"
35#include "util/path.h"
36#include "util/string.h"
37
38// ============================================================================
39// Constants
40// ============================================================================
41
42// Sentinel string for failed allocations (replaces NULL in middle of array)
43#define NULL_SENTINEL "[NULL]"
44
45// ============================================================================
46// Symbolizer Type Selection
47// ============================================================================
48
49typedef enum {
50 SYMBOLIZER_NONE = 0, // No symbolizer available, use raw addresses
51 SYMBOLIZER_LLVM = 1, // llvm-symbolizer (preferred on all platforms)
52 SYMBOLIZER_ADDR2LINE = 2, // addr2line (fallback)
54
55static symbolizer_type_t g_symbolizer_type = SYMBOLIZER_NONE;
56static atomic_bool g_symbolizer_detected = false;
57static atomic_bool g_llvm_symbolizer_checked = false;
58static atomic_bool g_llvm_symbolizer_available = false;
59static char g_llvm_symbolizer_cmd[PLATFORM_MAX_PATH_LENGTH];
60static atomic_bool g_addr2line_checked = false;
61static atomic_bool g_addr2line_available = false;
62static char g_addr2line_cmd[PLATFORM_MAX_PATH_LENGTH];
63static static_mutex_t g_symbolizer_detection_mutex = STATIC_MUTEX_INIT;
64
65// ============================================================================
66// Cache State
67// ============================================================================
68
105typedef struct {
107 void *addr;
109 char *symbol;
111 UT_hash_handle hh;
113
114static symbol_entry_t *g_symbol_cache = NULL; // uthash uses structure pointer as head
115static rwlock_t g_symbol_cache_lock = {0}; // External locking for thread safety
116static atomic_bool g_symbol_cache_initialized = false;
117
118// Statistics
119static atomic_uint_fast64_t g_cache_hits = 0;
120static atomic_uint_fast64_t g_cache_misses = 0;
121
122// ============================================================================
123// Helper Functions
124// ============================================================================
125
130static bool symbolizer_path_is_executable(const char *path) {
131#ifdef _WIN32
132 if (!path || path[0] == '\0') {
133 return false;
134 }
135
136 DWORD attrs = GetFileAttributesA(path);
137 if (attrs == INVALID_FILE_ATTRIBUTES) {
138 return false;
139 }
140 if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
141 return false;
142 }
143
144 HANDLE handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING,
145 FILE_ATTRIBUTE_NORMAL, NULL);
146 if (handle == INVALID_HANDLE_VALUE) {
147 return false;
148 }
149
150 CloseHandle(handle);
151 return true;
152#else
153 return (path && path[0] != '\0' && access(path, X_OK) == 0);
154#endif
155}
156
157static const char *get_llvm_symbolizer_command(void) {
158 if (!atomic_load(&g_llvm_symbolizer_checked)) {
159 static_mutex_lock(&g_symbolizer_detection_mutex);
160 if (!atomic_load(&g_llvm_symbolizer_checked)) {
161 const char *env_path = SAFE_GETENV("LLVM_SYMBOLIZER_PATH");
162 bool available = false;
163
164 g_llvm_symbolizer_cmd[0] = '\0';
165
166 if (env_path && env_path[0] != '\0') {
167 if (symbolizer_path_is_executable(env_path)) {
168 SAFE_STRNCPY(g_llvm_symbolizer_cmd, env_path, sizeof(g_llvm_symbolizer_cmd));
169 available = true;
170 log_debug("Using llvm-symbolizer from LLVM_SYMBOLIZER_PATH: %s", env_path);
171 } else {
172 log_warn("LLVM_SYMBOLIZER_PATH is set but not executable: %s", env_path);
173 }
174 }
175
177 available = true;
178 g_llvm_symbolizer_cmd[0] = '\0'; // Use binary name from PATH
179 log_debug("Found %s in PATH", LLVM_SYMBOLIZER_BIN);
180 }
181
182 atomic_store(&g_llvm_symbolizer_available, available);
183 atomic_store(&g_llvm_symbolizer_checked, true);
184 }
185 static_mutex_unlock(&g_symbolizer_detection_mutex);
186 }
187
188 if (!atomic_load(&g_llvm_symbolizer_available)) {
189 return NULL;
190 }
191
192 if (g_llvm_symbolizer_cmd[0] != '\0') {
193 return g_llvm_symbolizer_cmd;
194 }
195
196 return LLVM_SYMBOLIZER_BIN;
197}
198
199static const char *get_addr2line_command(void) {
200 if (!atomic_load(&g_addr2line_checked)) {
201 static_mutex_lock(&g_symbolizer_detection_mutex);
202 if (!atomic_load(&g_addr2line_checked)) {
203 const char *env_path = SAFE_GETENV("ADDR2LINE_PATH");
204 bool available = false;
205
206 g_addr2line_cmd[0] = '\0';
207
208 if (env_path && env_path[0] != '\0') {
209 if (symbolizer_path_is_executable(env_path)) {
210 SAFE_STRNCPY(g_addr2line_cmd, env_path, sizeof(g_addr2line_cmd));
211 available = true;
212 log_debug("Using addr2line from ADDR2LINE_PATH: %s", env_path);
213 } else {
214 log_warn("ADDR2LINE_PATH is set but not executable: %s", env_path);
215 }
216 }
217
218 if (!available && platform_is_binary_in_path(ADDR2LINE_BIN)) {
219 available = true;
220 g_addr2line_cmd[0] = '\0'; // Use binary name from PATH
221 log_debug("Found %s in PATH", ADDR2LINE_BIN);
222 }
223
224 atomic_store(&g_addr2line_available, available);
225 atomic_store(&g_addr2line_checked, true);
226 }
227 static_mutex_unlock(&g_symbolizer_detection_mutex);
228 }
229
230 if (!atomic_load(&g_addr2line_available)) {
231 return NULL;
232 }
233
234 if (g_addr2line_cmd[0] != '\0') {
235 return g_addr2line_cmd;
236 }
237
238 return ADDR2LINE_BIN;
239}
240
241#ifdef __APPLE__
242#include <mach-o/dyld.h>
243
257static bool get_macos_file_offset(const void *addr, char *out_path, size_t path_size, uintptr_t *out_file_offset) {
258 if (!addr || !out_path || !out_file_offset) {
259 return false;
260 }
261
262 uintptr_t target_addr = (uintptr_t)addr;
263 uint32_t image_count = _dyld_image_count();
264
265 for (uint32_t i = 0; i < image_count; i++) {
266 const struct mach_header *header = _dyld_get_image_header(i);
267 if (!header) {
268 continue;
269 }
270
271 intptr_t slide = _dyld_get_image_vmaddr_slide(i);
272 const char *image_name = _dyld_get_image_name(i);
273 if (!image_name) {
274 continue;
275 }
276
277 // For 64-bit binaries, iterate through load commands to find the segment
278 if (header->magic == MH_MAGIC_64) {
279 const struct mach_header_64 *header64 = (const struct mach_header_64 *)header;
280 const uint8_t *ptr = (const uint8_t *)(header64 + 1);
281
282 for (uint32_t j = 0; j < header64->ncmds; j++) {
283 const struct load_command *cmd = (const struct load_command *)ptr;
284
285 if (cmd->cmd == LC_SEGMENT_64) {
286 const struct segment_command_64 *seg = (const struct segment_command_64 *)ptr;
287 uintptr_t seg_start = seg->vmaddr + (uintptr_t)slide;
288 uintptr_t seg_end = seg_start + seg->vmsize;
289
290 if (target_addr >= seg_start && target_addr < seg_end) {
291 // Found the segment containing our address
292 SAFE_STRNCPY(out_path, image_name, path_size);
293 *out_file_offset = target_addr - (uintptr_t)slide;
294 return true;
295 }
296 }
297
298 ptr += cmd->cmdsize;
299 }
300 }
301 }
302
303 return false;
304}
305#elif defined(__linux__)
319static bool get_linux_file_offset(const void *addr, char *out_path, size_t path_size, uintptr_t *out_file_offset) {
320 if (!addr || !out_path || !out_file_offset) {
321 return false;
322 }
323
324 uintptr_t target_addr = (uintptr_t)addr;
325
326 // Read /proc/self/maps to find the base address
327 FILE *maps = fopen("/proc/self/maps", "r");
328 if (!maps) {
329 return false;
330 }
331
332 char line[1024];
333 bool found = false;
334
335 while (fgets(line, sizeof(line), maps)) {
336 // Parse: 5599372f4000-559937519000 r-xp 00178000 00:1b 10003268 /path/to/binary
337 uintptr_t start_addr, end_addr, file_offset;
338 char perms[5];
339 char path[512];
340
341 // Parse the line (now capturing file_offset from 4th column)
342 int matched = sscanf(line, "%lx-%lx %4s %lx %*x:%*x %*d %511s", &start_addr, &end_addr, perms, &file_offset, path);
343
344 if (matched >= 5) {
345 // Check if this is an executable segment (r-xp) containing our address
346 if (perms[2] == 'x' && target_addr >= start_addr && target_addr < end_addr) {
347 // Check if this is the main executable (not a shared library)
348 if (strstr(path, "ascii-chat") != NULL && strstr(path, ".so") == NULL) {
349 // Found it! Calculate file offset: (addr - segment_base) + segment_file_offset
350 *out_file_offset = (target_addr - start_addr) + file_offset;
351 SAFE_STRNCPY(out_path, path, path_size);
352 found = true;
353#ifndef NDEBUG
354 log_debug("ASLR: addr=%p -> file_offset=0x%lx (segment_base=0x%lx, segment_file_offset=0x%lx, path=%s)", addr,
355 *out_file_offset, start_addr, file_offset, path);
356#endif
357 break;
358 }
359 }
360 }
361 }
362
363 fclose(maps);
364 return found;
365}
366#endif
367
368static symbolizer_type_t detect_symbolizer(void) {
369 // Prefer llvm-symbolizer on all platforms (including macOS)
370 // We handle ASLR ourselves using dyld APIs on macOS
371 const char *llvm_symbolizer = get_llvm_symbolizer_command();
372 if (llvm_symbolizer) {
373 log_debug("Using llvm-symbolizer for symbol resolution");
374 return SYMBOLIZER_LLVM;
375 }
376
377 const char *addr2line_cmd = get_addr2line_command();
378 if (addr2line_cmd) {
379 log_debug("Using addr2line command: %s", addr2line_cmd);
381 }
382
383 log_warn("No symbolizer found in PATH (tried %s, %s) - using native backend", LLVM_SYMBOLIZER_BIN, ADDR2LINE_BIN);
384 return SYMBOLIZER_NONE;
385}
386
387// ============================================================================
388// Public API Implementation
389// ============================================================================
390
392 bool expected = false;
393 if (!atomic_compare_exchange_strong(&g_symbol_cache_initialized, &expected, true)) {
394 return 0; // Already initialized
395 }
396
397 // Detect which symbolizer is available (once at init)
398 expected = false;
399 if (atomic_compare_exchange_strong(&g_symbolizer_detected, &expected, true)) {
400 g_symbolizer_type = detect_symbolizer();
401 }
402
403 // Initialize rwlock for thread safety (uthash requires external locking)
404 if (rwlock_init(&g_symbol_cache_lock) != 0) {
405 atomic_store(&g_symbol_cache_initialized, false);
406 return SET_ERRNO(ERROR_THREAD, "Failed to initialize symbol cache rwlock");
407 }
408
409 // Initialize uthash head to NULL (required)
410 g_symbol_cache = NULL;
411
412 atomic_store(&g_cache_hits, 0);
413 atomic_store(&g_cache_misses, 0);
414
415 log_debug("Symbol cache initialized");
416 return 0;
417}
418
420 if (!atomic_load(&g_symbol_cache_initialized)) {
421 return;
422 }
423
424 // Mark as uninitialized FIRST to prevent new inserts during cleanup
425 atomic_store(&g_symbol_cache_initialized, false);
426
427 // Acquire write lock to prevent any concurrent operations
428 rwlock_wrlock(&g_symbol_cache_lock);
429
430 // Count entries before freeing for debugging
431 size_t entry_count = HASH_COUNT(g_symbol_cache);
432
433 // Free all symbol entries using HASH_ITER
434 symbol_entry_t *entry, *tmp;
435 size_t freed_count = 0;
436 HASH_ITER(hh, g_symbol_cache, entry, tmp) {
437 if (entry) {
438 HASH_DEL(g_symbol_cache, entry);
439 if (entry->symbol) {
440 // Use SAFE_FREE() because entry->symbol was allocated with platform_strdup()
441 // which uses SAFE_MALLOC(), so it's tracked by debug memory system
442 SAFE_FREE(entry->symbol);
443 }
444 SAFE_FREE(entry);
445 freed_count++;
446 }
447 }
448
449 // Release lock and destroy rwlock
450 rwlock_wrunlock(&g_symbol_cache_lock);
451 rwlock_destroy(&g_symbol_cache_lock);
452
453 g_symbol_cache = NULL;
454
455 log_debug("Symbol cache cleaned up: %zu entries counted, %zu entries freed (hits=%llu, misses=%llu)", entry_count,
456 freed_count, (unsigned long long)atomic_load(&g_cache_hits),
457 (unsigned long long)atomic_load(&g_cache_misses));
458}
459
460const char *symbol_cache_lookup(void *addr) {
461 if (!atomic_load(&g_symbol_cache_initialized) || !addr) {
462 return NULL;
463 }
464
465 rwlock_rdlock(&g_symbol_cache_lock);
466
467 symbol_entry_t *entry = NULL;
468 HASH_FIND_PTR(g_symbol_cache, &addr, entry);
469
470 if (entry) {
471 const char *symbol = entry->symbol;
472 atomic_fetch_add(&g_cache_hits, 1);
473 rwlock_rdunlock(&g_symbol_cache_lock);
474 return symbol;
475 }
476
477 atomic_fetch_add(&g_cache_misses, 1);
478 rwlock_rdunlock(&g_symbol_cache_lock);
479 return NULL;
480}
481
482bool symbol_cache_insert(void *addr, const char *symbol) {
483 if (!atomic_load(&g_symbol_cache_initialized) || !addr || !symbol) {
484 return false;
485 }
486
487 // Acquire write lock to make the entire operation atomic
488 rwlock_wrlock(&g_symbol_cache_lock);
489
490 // Double-check cache is still initialized after acquiring lock
491 // (cleanup might have marked it uninitialized between our check and lock acquisition)
492 if (!atomic_load(&g_symbol_cache_initialized)) {
493 rwlock_wrunlock(&g_symbol_cache_lock);
494 return false;
495 }
496
497 // Check if entry already exists
498 symbol_entry_t *existing = NULL;
499 HASH_FIND_PTR(g_symbol_cache, &addr, existing);
500
501 if (existing) {
502 // Entry exists - update symbol if different
503 if (existing->symbol && strcmp(existing->symbol, symbol) != 0) {
504 // Free old symbol and allocate new one
505 SAFE_FREE(existing->symbol);
506 existing->symbol = platform_strdup(symbol);
507 if (!existing->symbol) {
508 rwlock_wrunlock(&g_symbol_cache_lock);
509 return false;
510 }
511 }
512 rwlock_wrunlock(&g_symbol_cache_lock);
513 return true;
514 }
515
516 // Create new entry
518 if (!entry) {
519 rwlock_wrunlock(&g_symbol_cache_lock);
520 return false;
521 }
522
523 entry->addr = addr;
524 entry->symbol = platform_strdup(symbol);
525 if (!entry->symbol) {
526 SAFE_FREE(entry);
527 rwlock_wrunlock(&g_symbol_cache_lock);
528 return false;
529 }
530
531 // Add to hash table
532 HASH_ADD_PTR(g_symbol_cache, addr, entry);
533
534 // Release lock
535 rwlock_wrunlock(&g_symbol_cache_lock);
536
537 return true;
538}
539
540void symbol_cache_get_stats(uint64_t *hits_out, uint64_t *misses_out, size_t *entries_out) {
541 if (hits_out) {
542 *hits_out = atomic_load(&g_cache_hits);
543 }
544 if (misses_out) {
545 *misses_out = atomic_load(&g_cache_misses);
546 }
547 if (entries_out) {
548 rwlock_rdlock(&g_symbol_cache_lock);
549 *entries_out = HASH_COUNT(g_symbol_cache);
550 rwlock_rdunlock(&g_symbol_cache_lock);
551 }
552}
553
555 uint64_t hits = atomic_load(&g_cache_hits);
556 uint64_t misses = atomic_load(&g_cache_misses);
557
558 rwlock_rdlock(&g_symbol_cache_lock);
559 size_t entries = HASH_COUNT(g_symbol_cache);
560 rwlock_rdunlock(&g_symbol_cache_lock);
561
562 uint64_t total = hits + misses;
563 double hit_rate = total > 0 ? (100.0 * (double)hits / (double)total) : 0.0;
564
565 log_info("Symbol Cache Stats: %zu entries, %llu hits, %llu misses (%.1f%% hit rate)", entries,
566 (unsigned long long)hits, (unsigned long long)misses, hit_rate);
567}
568
569// ============================================================================
570// Batch Resolution with llvm-symbolizer and addr2line
571// ============================================================================
572
579static char *parse_llvm_symbolizer_result(FILE *fp, void *addr) {
580 char func_name[512] = "??";
581 char file_location[512] = "??:0";
582 char blank_line[8];
583
584 // Read function name line
585 if (fgets(func_name, sizeof(func_name), fp) == NULL) {
586 return NULL;
587 }
588 func_name[strcspn(func_name, "\n")] = '\0';
589
590 // Read file location line
591 if (fgets(file_location, sizeof(file_location), fp) == NULL) {
592 return NULL;
593 }
594 file_location[strcspn(file_location, "\n")] = '\0';
595
596 // Read blank separator line (and discard it)
597 if (fgets(blank_line, sizeof(blank_line), fp) == NULL) {
598 // End of output - not an error
599 }
600
601 // Remove column number (last :N) from file_location
602 char *last_colon = strrchr(file_location, ':');
603 if (last_colon) {
604 *last_colon = '\0';
605 }
606
607 // Extract relative path and COPY IT IMMEDIATELY
608 // IMPORTANT: extract_project_relative_path uses a static buffer internally.
609 // SAFE_MALLOC's debug memory tracking also calls extract_project_relative_path,
610 // which would overwrite the static buffer. So we must copy before allocating.
611 const char *rel_path_tmp = extract_project_relative_path(file_location);
612 char rel_path[512];
613 SAFE_STRNCPY(rel_path, rel_path_tmp, sizeof(rel_path));
614
615 // Allocate result buffer (AFTER copying rel_path to local storage)
616 char *result = SAFE_MALLOC(1024, char *);
617 if (!result) {
618 return NULL;
619 }
620
621 // Format symbol
622 bool has_func = (strcmp(func_name, "??") != 0 && strlen(func_name) > 0);
623 bool has_file =
624 (strcmp(file_location, "??:0") != 0 && strcmp(file_location, "??:?") != 0 && strcmp(file_location, "??") != 0);
625
626 // Remove () from function name if present (llvm-symbolizer includes them)
627 char clean_func[512];
628 SAFE_STRNCPY(clean_func, func_name, sizeof(clean_func));
629 char *paren = strstr(clean_func, "()");
630 if (paren) {
631 *paren = '\0';
632 }
633
634 if (!has_func && !has_file) {
635 SAFE_SNPRINTF(result, 1024, "%p", addr);
636 } else if (has_func && has_file) {
637 SAFE_SNPRINTF(result, 1024, "%s() (%s)", clean_func, rel_path);
638 } else if (has_func) {
639 SAFE_SNPRINTF(result, 1024, "%s()", clean_func);
640 } else {
641 SAFE_SNPRINTF(result, 1024, "%s (unknown function)", rel_path);
642 }
643
644 return result;
645}
646
656static char **run_llvm_symbolizer_batch(void *const *buffer, int size) {
657 if (size <= 0 || !buffer) {
658 return NULL;
659 }
660
661 const char *symbolizer_cmd = get_llvm_symbolizer_command();
662 if (!symbolizer_cmd) {
663 log_debug("llvm-symbolizer not available - skipping symbolization");
664 return NULL;
665 }
666
667 // Allocate result array
668 char **result = SAFE_CALLOC((size_t)(size + 1), sizeof(char *), char **);
669 if (!result) {
670 return NULL;
671 }
672
673#ifdef __APPLE__
674 // macOS: Group addresses by binary, calculate file offsets, batch per binary
675 // We'll collect addresses for each unique binary path
676
677 // First pass: determine binary for each address and calculate file offsets
678 typedef struct {
679 char binary_path[PLATFORM_MAX_PATH_LENGTH];
680 uintptr_t file_offsets[64]; // Max addresses per binary
681 int original_indices[64];
682 int count;
683 } binary_group_t;
684
685 binary_group_t groups[8]; // Support up to 8 different binaries
686 int num_groups = 0;
687
688 for (int i = 0; i < size; i++) {
689 char binary_path[PLATFORM_MAX_PATH_LENGTH];
690 uintptr_t file_offset = 0;
691
692 if (!get_macos_file_offset(buffer[i], binary_path, sizeof(binary_path), &file_offset)) {
693 // Could not find binary - use raw address
694 result[i] = SAFE_MALLOC(32, char *);
695 if (result[i]) {
696 SAFE_SNPRINTF(result[i], 32, "%p", buffer[i]);
697 }
698 continue;
699 }
700
701 // Find or create group for this binary
702 int group_idx = -1;
703 for (int g = 0; g < num_groups; g++) {
704 if (strcmp(groups[g].binary_path, binary_path) == 0) {
705 group_idx = g;
706 break;
707 }
708 }
709
710 if (group_idx < 0 && num_groups < 8) {
711 group_idx = num_groups++;
712 SAFE_STRNCPY(groups[group_idx].binary_path, binary_path, sizeof(groups[group_idx].binary_path));
713 groups[group_idx].count = 0;
714 }
715
716 if (group_idx >= 0 && groups[group_idx].count < 64) {
717 int idx = groups[group_idx].count++;
718 groups[group_idx].file_offsets[idx] = file_offset;
719 groups[group_idx].original_indices[idx] = i;
720 }
721 }
722
723 // Second pass: batch symbolize each group
724 for (int g = 0; g < num_groups; g++) {
725 if (groups[g].count == 0) {
726 continue;
727 }
728
729 // Build command with all addresses for this binary
730 char cmd[8192];
731 int offset = snprintf(cmd, sizeof(cmd), "%s --demangle --output-style=LLVM --relativenames -e '%s' ",
732 symbolizer_cmd, groups[g].binary_path);
733
734 for (int j = 0; j < groups[g].count && offset < (int)sizeof(cmd) - 32; j++) {
735 int n = snprintf(cmd + offset, sizeof(cmd) - (size_t)offset, "0x%lx ", (unsigned long)groups[g].file_offsets[j]);
736 if (n > 0) {
737 offset += n;
738 }
739 }
740
741 // Suppress stderr
742 strncat(cmd, "2>/dev/null", sizeof(cmd) - strlen(cmd) - 1);
743
744 FILE *fp = popen(cmd, "r");
745 if (!fp) {
746 continue;
747 }
748
749 // Parse results in order
750 for (int j = 0; j < groups[g].count; j++) {
751 int orig_idx = groups[g].original_indices[j];
752 result[orig_idx] = parse_llvm_symbolizer_result(fp, buffer[orig_idx]);
753 if (!result[orig_idx]) {
754 result[orig_idx] = SAFE_MALLOC(32, char *);
755 if (result[orig_idx]) {
756 SAFE_SNPRINTF(result[orig_idx], 32, "%p", buffer[orig_idx]);
757 }
758 }
759 }
760
761 pclose(fp);
762 }
763
764#elif defined(__linux__)
765 // Linux: Use file offsets like macOS (ASLR handling)
766 // Group addresses by binary path and calculate file offsets
767 typedef struct {
768 char binary_path[PLATFORM_MAX_PATH_LENGTH];
769 uintptr_t file_offsets[64]; // Max addresses per binary
770 int original_indices[64];
771 int count;
772 } binary_group_t;
773
774 binary_group_t groups[8]; // Support up to 8 different binaries
775 int num_groups = 0;
776
777 for (int i = 0; i < size; i++) {
778 char binary_path[PLATFORM_MAX_PATH_LENGTH];
779 uintptr_t file_offset = 0;
780
781 if (!get_linux_file_offset(buffer[i], binary_path, sizeof(binary_path), &file_offset)) {
782 // Could not find binary - use raw address
783 result[i] = SAFE_MALLOC(32, char *);
784 if (result[i]) {
785 SAFE_SNPRINTF(result[i], 32, "%p", buffer[i]);
786 }
787 continue;
788 }
789
790 // Find or create group for this binary
791 int group_idx = -1;
792 for (int g = 0; g < num_groups; g++) {
793 if (strcmp(groups[g].binary_path, binary_path) == 0) {
794 group_idx = g;
795 break;
796 }
797 }
798
799 if (group_idx < 0 && num_groups < 8) {
800 group_idx = num_groups++;
801 SAFE_STRNCPY(groups[group_idx].binary_path, binary_path, sizeof(groups[group_idx].binary_path));
802 groups[group_idx].count = 0;
803 }
804
805 if (group_idx >= 0 && groups[group_idx].count < 64) {
806 int idx = groups[group_idx].count++;
807 groups[group_idx].file_offsets[idx] = file_offset;
808 groups[group_idx].original_indices[idx] = i;
809 }
810 }
811
812 // Second pass: batch symbolize each group
813 for (int g = 0; g < num_groups; g++) {
814 if (groups[g].count == 0) {
815 continue;
816 }
817
818 // Escape binary path for shell
819 char escaped_binary_path[PLATFORM_MAX_PATH_LENGTH * 2];
820 if (!escape_path_for_shell(groups[g].binary_path, escaped_binary_path, sizeof(escaped_binary_path))) {
821 continue;
822 }
823
824 // Build command with all addresses for this binary
825 char cmd[8192];
826 int offset = snprintf(cmd, sizeof(cmd), "%s --demangle --output-style=LLVM --relativenames -e %s ", symbolizer_cmd,
827 escaped_binary_path);
828
829 for (int j = 0; j < groups[g].count && offset < (int)sizeof(cmd) - 32; j++) {
830 int n = snprintf(cmd + offset, sizeof(cmd) - (size_t)offset, "0x%lx ", (unsigned long)groups[g].file_offsets[j]);
831 if (n > 0) {
832 offset += n;
833 }
834 }
835
836 // Suppress stderr
837 strncat(cmd, "2>/dev/null", sizeof(cmd) - strlen(cmd) - 1);
838
839 FILE *fp = popen(cmd, "r");
840 if (!fp) {
841 continue;
842 }
843
844 // Parse results in order
845 for (int j = 0; j < groups[g].count; j++) {
846 int orig_idx = groups[g].original_indices[j];
847 result[orig_idx] = parse_llvm_symbolizer_result(fp, buffer[orig_idx]);
848 if (!result[orig_idx]) {
849 result[orig_idx] = SAFE_MALLOC(32, char *);
850 if (result[orig_idx]) {
851 SAFE_SNPRINTF(result[orig_idx], 32, "%p", buffer[orig_idx]);
852 }
853 }
854 }
855
856 pclose(fp);
857 }
858
859#else
860 // Windows: Use runtime addresses directly (TODO: implement Windows ASLR handling)
861 char exe_path[PLATFORM_MAX_PATH_LENGTH];
862 if (!platform_get_executable_path(exe_path, sizeof(exe_path))) {
863 SAFE_FREE(result);
864 return NULL;
865 }
866
867 // Escape paths for shell
868 char escaped_exe_path[PLATFORM_MAX_PATH_LENGTH * 2];
869 if (!escape_path_for_shell(exe_path, escaped_exe_path, sizeof(escaped_exe_path))) {
870 SAFE_FREE(result);
871 return NULL;
872 }
873
874 char escaped_symbolizer[PLATFORM_MAX_PATH_LENGTH * 2];
875 if (!escape_path_for_shell(symbolizer_cmd, escaped_symbolizer, sizeof(escaped_symbolizer))) {
876 SAFE_FREE(result);
877 return NULL;
878 }
879
880 // Build command
881 char cmd[8192];
882 int offset = snprintf(cmd, sizeof(cmd), "%s --demangle --output-style=LLVM --relativenames -e %s ",
883 escaped_symbolizer, escaped_exe_path);
884
885 for (int i = 0; i < size && offset < (int)sizeof(cmd) - 32; i++) {
886 int n = snprintf(cmd + offset, sizeof(cmd) - (size_t)offset, "0x%llx ", (unsigned long long)buffer[i]);
887 if (n > 0) {
888 offset += n;
889 }
890 }
891
892 FILE *fp = popen(cmd, "r");
893 if (!fp) {
894 SAFE_FREE(result);
895 return NULL;
896 }
897
898 for (int i = 0; i < size; i++) {
899 result[i] = parse_llvm_symbolizer_result(fp, buffer[i]);
900 if (!result[i]) {
901 result[i] = SAFE_MALLOC(32, char *);
902 if (result[i]) {
903 SAFE_SNPRINTF(result[i], 32, "%p", buffer[i]);
904 }
905 }
906 }
907
908 pclose(fp);
909#endif
910
911 return result;
912}
913
920static char **run_addr2line_batch(void *const *buffer, int size) {
921 if (size <= 0 || !buffer) {
922 log_error("Invalid parameters: buffer=%p, size=%d", (void *)buffer, size);
923 return NULL;
924 }
925
926 const char *addr2line_cmd = get_addr2line_command();
927 if (!addr2line_cmd) {
928 log_debug("addr2line not available - skipping symbolization");
929 return NULL;
930 }
931
932 // Get executable path
933 char exe_path[PLATFORM_MAX_PATH_LENGTH];
934 if (!platform_get_executable_path(exe_path, sizeof(exe_path))) {
935 return NULL;
936 }
937
938 // SECURITY: Validate executable path to prevent command injection
939 // Paths from system APIs should be safe, but validate to be thorough
940 if (!validate_shell_safe(exe_path, ".-/\\:")) {
941 log_error("Invalid executable path - contains unsafe characters: %s", exe_path);
942 return NULL;
943 }
944
945 // Escape exe_path for shell (auto-detects platform and quoting needs)
946 char escaped_exe_path_buf[PLATFORM_MAX_PATH_LENGTH * 2];
947 if (!escape_path_for_shell(exe_path, escaped_exe_path_buf, sizeof(escaped_exe_path_buf))) {
948 log_error("Failed to escape executable path for shell command");
949 return NULL;
950 }
951 const char *escaped_exe_path = escaped_exe_path_buf;
952
953 if (!validate_shell_safe(addr2line_cmd, ".-/\\:_")) {
954 log_warn("addr2line path contains unsafe characters: %s", addr2line_cmd);
955 return NULL;
956 }
957
958 // Escape addr2line command for shell (auto-detects platform and quoting needs)
959 char escaped_addr2line_buf[PLATFORM_MAX_PATH_LENGTH * 2];
960 if (!escape_path_for_shell(addr2line_cmd, escaped_addr2line_buf, sizeof(escaped_addr2line_buf))) {
961 log_error("Failed to escape addr2line path for shell command");
962 return NULL;
963 }
964 const char *escaped_addr2line_cmd = escaped_addr2line_buf;
965
966 // Build addr2line command
967 char cmd[4096];
968 int offset = snprintf(cmd, sizeof(cmd), "%s -e %s -f -C -i ", escaped_addr2line_cmd, escaped_exe_path);
969 if (offset <= 0 || offset >= (int)sizeof(cmd)) {
970 log_error("Failed to build addr2line command");
971 return NULL;
972 }
973
974 // Use explicit hex format with 0x prefix since Windows %p doesn't include it
975 for (int i = 0; i < size; i++) {
976 int n = snprintf(cmd + offset, sizeof(cmd) - (size_t)offset, "0x%llx ", (unsigned long long)buffer[i]);
977 if (n <= 0 || offset + n >= (int)sizeof(cmd)) {
978 log_error("Failed to build addr2line command");
979 break;
980 }
981 offset += n;
982 }
983
984 // Execute addr2line
985 FILE *fp = popen(cmd, "r");
986 if (!fp) {
987 log_error("Failed to execute addr2line command");
988 return NULL;
989 }
990
991 // Allocate result array
992 char **result = SAFE_CALLOC((size_t)(size + 1), sizeof(char *), char **);
993 if (!result) {
994 pclose(fp);
995 return NULL;
996 }
997
998 // Parse output
999 for (int i = 0; i < size; i++) {
1000 char func_name[256];
1001 char file_line[512];
1002
1003 if (fgets(func_name, sizeof(func_name), fp) == NULL) {
1004 break;
1005 }
1006 if (fgets(file_line, sizeof(file_line), fp) == NULL) {
1007 break;
1008 }
1009
1010 // Remove newlines
1011 func_name[strcspn(func_name, "\n")] = '\0';
1012 file_line[strcspn(file_line, "\n")] = '\0';
1013
1014 // Extract relative path and COPY IT IMMEDIATELY
1015 // IMPORTANT: extract_project_relative_path uses a static buffer internally.
1016 // SAFE_MALLOC's debug memory tracking also calls extract_project_relative_path,
1017 // which would overwrite the static buffer. So we must copy before allocating.
1018 const char *rel_path_tmp = extract_project_relative_path(file_line);
1019 char rel_path[512];
1020 SAFE_STRNCPY(rel_path, rel_path_tmp, sizeof(rel_path));
1021
1022 // Allocate result buffer (AFTER copying rel_path to local storage)
1023 result[i] = SAFE_MALLOC(1024, char *);
1024
1025 // Format symbol
1026 bool has_func = (strcmp(func_name, "??") != 0);
1027 bool has_file = (strcmp(file_line, "??:0") != 0 && strcmp(file_line, "??:?") != 0);
1028
1029 if (!has_func && !has_file) {
1030 // Complete unknown - show raw address
1031 SAFE_SNPRINTF(result[i], 1024, "%p", buffer[i]);
1032 } else if (has_func && has_file) {
1033 // Best case - both function and file:line known
1034 if (strstr(rel_path, ":") != NULL) {
1035 SAFE_SNPRINTF(result[i], 1024, "%s in %s()", rel_path, func_name);
1036 } else {
1037 SAFE_SNPRINTF(result[i], 1024, "%s() at %s", func_name, rel_path);
1038 }
1039 } else if (has_func) {
1040 // Function known but file unknown (common for library functions)
1041 SAFE_SNPRINTF(result[i], 1024, "%s() at %p", func_name, buffer[i]);
1042 } else {
1043 // File known but function unknown (rare)
1044 SAFE_SNPRINTF(result[i], 1024, "%s (unknown function)", rel_path);
1045 }
1046 }
1047
1048 pclose(fp);
1049 return result;
1050}
1051
1052char **symbol_cache_resolve_batch(void *const *buffer, int size) {
1053 if (size <= 0 || !buffer) {
1054 log_error("Invalid parameters: buffer=%p, size=%d", (void *)buffer, size);
1055 return NULL;
1056 }
1057
1058 // DO NOT auto-initialize here - causes circular dependency during lock_debug_init()
1059 // The cache must be initialized explicitly by platform_init() before use
1060 if (!atomic_load(&g_symbol_cache_initialized)) {
1061 // Cache not initialized - fall back to uncached resolution
1062 // This happens during early initialization before platform_init() completes
1063 char **result = run_llvm_symbolizer_batch(buffer, size);
1064 if (!result) {
1065 result = run_addr2line_batch(buffer, size);
1066 }
1067 return result;
1068 }
1069
1070 // Allocate result array (size + 1 for NULL terminator)
1071 // CALLOC zeros the memory, so result[size] is already NULL
1072 char **result = SAFE_CALLOC((size_t)(size + 1), sizeof(char *), char **);
1073 if (!result) {
1074 return NULL;
1075 }
1076
1077 // Ensure NULL terminator is explicitly set (CALLOC already did this, but be explicit)
1078 result[size] = NULL;
1079
1080 // First pass: check cache for all addresses
1081 int uncached_count = 0;
1082 void *uncached_addrs[size];
1083 int uncached_indices[size];
1084
1085 for (int i = 0; i < size; i++) {
1086 const char *cached = symbol_cache_lookup(buffer[i]);
1087 if (cached) {
1088 // Cache hit - duplicate the string
1089 result[i] = platform_strdup(cached);
1090 // If allocation failed, use sentinel string instead of NULL
1091 if (!result[i]) {
1092 result[i] = platform_strdup(NULL_SENTINEL);
1093 }
1094 } else {
1095 // Cache miss - track for batch resolution
1096 uncached_addrs[uncached_count] = buffer[i];
1097 uncached_indices[uncached_count] = i;
1098 uncached_count++;
1099 }
1100 }
1101
1102 // Second pass: resolve uncached addresses with selected symbolizer
1103 if (uncached_count > 0) {
1104 char **resolved = NULL;
1105
1106 // Use the detected symbolizer type
1107 switch (g_symbolizer_type) {
1108 case SYMBOLIZER_LLVM:
1109 resolved = run_llvm_symbolizer_batch(uncached_addrs, uncached_count);
1110 break;
1112 resolved = run_addr2line_batch(uncached_addrs, uncached_count);
1113 break;
1114 case SYMBOLIZER_NONE:
1115 default:
1116 // No symbolizer available - will fall through to raw address handling
1117 resolved = NULL;
1118 break;
1119 }
1120
1121 if (resolved) {
1122 for (int i = 0; i < uncached_count; i++) {
1123 int orig_idx = uncached_indices[i];
1124 if (resolved[i]) {
1125 result[orig_idx] = platform_strdup(resolved[i]);
1126 // If strdup failed, use sentinel string instead of NULL
1127 if (!result[orig_idx]) {
1128 log_error("Failed to duplicate string for result[%d]", orig_idx);
1129 result[orig_idx] = platform_strdup(NULL_SENTINEL);
1130 }
1131 // Only insert into cache if strdup succeeded (and it's not the sentinel)
1132 if (result[orig_idx] && strcmp(result[orig_idx], NULL_SENTINEL) != 0) {
1133 if (!symbol_cache_insert(uncached_addrs[i], resolved[i])) {
1134 log_error("Failed to insert symbol into cache for result[%d]", orig_idx);
1135 }
1136 }
1137 SAFE_FREE(resolved[i]);
1138 } else {
1139 // resolved[i] is NULL - use sentinel string
1140 if (!result[orig_idx]) {
1141 result[orig_idx] = platform_strdup(NULL_SENTINEL);
1142 }
1143 }
1144 if (!result[orig_idx]) {
1145 log_error("Failed to allocate memory for result[%d]", orig_idx);
1146 }
1147 }
1148
1149 SAFE_FREE(resolved);
1150
1151 } else {
1152 // addr2line failed - fill uncached entries with raw addresses or sentinel
1153 for (int i = 0; i < uncached_count; i++) {
1154 int orig_idx = uncached_indices[i];
1155 if (!result[orig_idx]) {
1156 result[orig_idx] = SAFE_MALLOC(32, char *);
1157 if (result[orig_idx]) {
1158 SAFE_SNPRINTF(result[orig_idx], 32, "%p", uncached_addrs[i]);
1159 } else {
1160 result[orig_idx] = platform_strdup(NULL_SENTINEL);
1161 }
1162 }
1163 if (!result[orig_idx]) {
1164 log_error("Failed to allocate memory for result[%d]", orig_idx);
1165 }
1166 }
1167 }
1168 }
1169
1170 return result;
1171}
1172
1173void symbol_cache_free_symbols(char **symbols) {
1174 if (!symbols) {
1175 return;
1176 }
1177
1178 // The array is NULL-terminated (allocated with size+1, with result[size] = NULL)
1179 // The terminator is a SINGLE NULL at index 'size'
1180 // Failed allocations use the NULL_SENTINEL string "[NULL]" instead of NULL,
1181 // so there are no NULL entries in the middle - only at the terminator
1182 // This makes iteration safe: we can iterate until we find the first NULL (the terminator)
1183
1184 // Iterate through entries, freeing all non-NULL entries until we hit the NULL terminator
1185 for (int i = 0; i < 64; i++) { // Reasonable limit to prevent infinite loop
1186 if (symbols[i] == NULL) {
1187 // Found NULL - this is the terminator, stop here
1188 break;
1189 }
1190
1191 // Found a non-NULL entry - check if it's the sentinel string
1192 // Both regular strings and sentinel strings are allocated (with strdup),
1193 // so we free them all
1194 SAFE_FREE(symbols[i]);
1195 symbols[i] = NULL; // Clear pointer after freeing
1196 }
1197
1198 // Free the array itself
1199 SAFE_FREE(symbols);
1200}
unsigned int uint32_t
Definition common.h:58
#define SAFE_STRNCPY(dst, src, size)
Definition common.h:358
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_MALLOC(size, cast)
Definition common.h:208
#define SAFE_GETENV(name)
Definition common.h:378
#define SAFE_SNPRINTF(buffer, buffer_size,...)
Definition common.h:412
#define SAFE_CALLOC(count, size, cast)
Definition common.h:218
#define PLATFORM_MAX_PATH_LENGTH
Definition common.h:91
unsigned long long uint64_t
Definition common.h:59
unsigned char uint8_t
Definition common.h:56
#define SET_ERRNO(code, context_msg,...)
Set error code with custom context message and log it.
asciichat_error_t
Error and exit codes - unified status values (0-255)
Definition error_codes.h:46
@ ERROR_THREAD
Definition error_codes.h:95
#define log_warn(...)
Log a WARN message.
#define log_error(...)
Log an ERROR message.
#define log_info(...)
Log an INFO message.
#define log_debug(...)
Log a DEBUG message.
void symbol_cache_cleanup(void)
Clean up the symbol cache and free all resources.
Definition symbols.c:419
bool platform_get_executable_path(char *exe_path, size_t path_size)
Get the path to the current executable.
Definition system.c:351
#define rwlock_wrunlock(lock)
Release a write lock (with debug tracking in debug builds)
Definition rwlock.h:249
void symbol_cache_get_stats(uint64_t *hits_out, uint64_t *misses_out, size_t *entries_out)
Get cache statistics.
Definition symbols.c:540
bool symbol_cache_insert(void *addr, const char *symbol)
Insert a symbol into the cache.
Definition symbols.c:482
int rwlock_destroy(rwlock_t *lock)
Destroy a read-write lock.
#define rwlock_rdlock(lock)
Acquire a read lock (with debug tracking in debug builds)
Definition rwlock.h:194
void symbol_cache_print_stats(void)
Print cache statistics to logging system.
Definition symbols.c:554
bool platform_is_binary_in_path(const char *bin_name)
Check if a binary is available in the system PATH.
Definition system.c:288
char * platform_strdup(const char *s)
Duplicate string (strdup replacement)
char ** symbol_cache_resolve_batch(void *const *buffer, int size)
Resolve multiple addresses using addr2line and cache results.
Definition symbols.c:1052
#define STATIC_MUTEX_INIT
Definition init.h:107
pthread_rwlock_t rwlock_t
Read-write lock type (POSIX: pthread_rwlock_t)
Definition rwlock.h:40
const char * symbol_cache_lookup(void *addr)
Look up a symbol for a given address.
Definition symbols.c:460
#define rwlock_wrlock(lock)
Acquire a write lock (with debug tracking in debug builds)
Definition rwlock.h:213
asciichat_error_t symbol_cache_init(void)
Initialize the symbol cache.
Definition symbols.c:391
void symbol_cache_free_symbols(char **symbols)
Free symbol array returned by symbol_cache_resolve_batch.
Definition symbols.c:1173
int rwlock_init(rwlock_t *lock)
Initialize a read-write lock.
#define rwlock_rdunlock(lock)
Release a read lock (with debug tracking in debug builds)
Definition rwlock.h:231
bool validate_shell_safe(const char *str, const char *allowed_chars)
Validate that a string contains only safe characters for shell commands.
Definition string.c:54
bool escape_path_for_shell(const char *path, char *out_buffer, size_t out_buffer_size)
Escape a path for safe use in shell commands (auto-platform)
Definition string.c:213
const char * extract_project_relative_path(const char *file)
Extract relative path from an absolute path.
Definition path.c:127
Platform initialization and static synchronization helpers.
📂 Path Manipulation Utilities
Cross-platform read-write lock interface for ascii-chat.
Static mutex structure for global mutexes requiring static initialization.
Definition init.h:40
Symbol cache entry structure for address-to-symbol mapping.
Definition symbols.c:105
UT_hash_handle hh
uthash handle (required for hash table operations)
Definition symbols.c:111
char * symbol
Resolved symbol string (allocated, owned by cache)
Definition symbols.c:109
void * addr
Memory address key (used for hashtable lookup)
Definition symbols.c:107
#define LLVM_SYMBOLIZER_BIN
Definition symbols.c:14
#define NULL_SENTINEL
Definition symbols.c:43
#define ADDR2LINE_BIN
Definition symbols.c:15
symbolizer_type_t
Definition symbols.c:49
@ SYMBOLIZER_NONE
Definition symbols.c:50
@ SYMBOLIZER_LLVM
Definition symbols.c:51
@ SYMBOLIZER_ADDR2LINE
Definition symbols.c:52
Symbol Resolution Cache for Backtrace Addresses.
Cross-platform system functions interface for ascii-chat.
#️⃣ Wrapper for uthash.h that ensures common.h is included first
🔤 String Manipulation and Shell Escaping Utilities