7#include <ascii-chat/util/pcre2.h>
8#include <ascii-chat/common.h>
9#include <ascii-chat/log/logging.h>
22 _Atomic(pcre2_code *) code;
23 pcre2_jit_stack *jit_stack;
24 _Atomic(
bool) compiled;
32static _Atomic(
bool) g_registry_initialized =
false;
46pcre2_singleton_t *asciichat_pcre2_singleton_compile(
const char *pattern, uint32_t flags) {
48 log_error(
"PCRE2 singleton: pattern is NULL");
55 log_error(
"PCRE2 singleton: failed to allocate singleton structure");
60 size_t pattern_len = strlen(pattern);
61 singleton->pattern = SAFE_MALLOC(pattern_len + 1,
char *);
62 if (!singleton->pattern) {
63 log_error(
"PCRE2 singleton: failed to allocate pattern buffer");
67 memcpy(singleton->pattern, pattern, pattern_len + 1);
70 atomic_store(&singleton->code, NULL);
71 singleton->jit_stack = NULL;
72 atomic_store(&singleton->compiled,
false);
73 singleton->flags = flags;
76 singleton->next = g_singleton_registry;
77 g_singleton_registry = singleton;
78 atomic_store(&g_registry_initialized,
true);
101 pcre2_code *code = atomic_load(&singleton->code);
108 if (atomic_load(&singleton->compiled)) {
114 PCRE2_SIZE erroroffset;
116 code = pcre2_compile((PCRE2_SPTR8)singleton->pattern, PCRE2_ZERO_TERMINATED, singleton->flags, &errornumber,
120 PCRE2_UCHAR error_buf[256];
121 pcre2_get_error_message(errornumber, error_buf,
sizeof(error_buf));
122 log_warn(
"Failed to compile PCRE2 regex at offset %zu: %s", erroroffset, (
const char *)error_buf);
124 atomic_store(&singleton->compiled,
true);
129 int jit_rc = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
131 log_debug(
"PCRE2 JIT compilation not available (code %d), using interpreted mode", jit_rc);
134 singleton->jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
135 if (!singleton->jit_stack) {
136 log_warn(
"Failed to create JIT stack for PCRE2 regex");
141 atomic_store(&singleton->code, code);
142 atomic_store(&singleton->compiled,
true);
157 return atomic_load(&singleton->code) != NULL;
174 pcre2_code *code = atomic_load(&singleton->code);
176 pcre2_code_free(code);
180 if (singleton->jit_stack) {
181 pcre2_jit_stack_free(singleton->jit_stack);
185 SAFE_FREE(singleton->pattern);
188 SAFE_FREE(singleton);
198 if (!atomic_load(&g_registry_initialized)) {
203 if (g_singleton_registry == NULL) {
210 g_singleton_registry = NULL;
211 atomic_store(&g_registry_initialized,
false);
218 pcre2_code *code = atomic_load(¤t->code);
220 pcre2_code_free(code);
221 atomic_store(¤t->code, NULL);
225 if (current->jit_stack) {
226 pcre2_jit_stack_free(current->jit_stack);
227 current->jit_stack = NULL;
231 SAFE_FREE(current->pattern);
253 const char *subject) {
254 if (!regex || !match_data || !group_name || !subject) {
255 log_error(
"pcre2_extract_named_group: invalid parameters");
260 int group_number = pcre2_substring_number_from_name(regex, (PCRE2_SPTR)group_name);
261 if (group_number < 0) {
266 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
267 PCRE2_SIZE start = ovector[2 * group_number];
268 PCRE2_SIZE end = ovector[2 * group_number + 1];
270 if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
275 size_t len = end - start;
276 char *result = SAFE_MALLOC(len + 1,
char *);
278 log_error(
"pcre2_extract_named_group: failed to allocate %zu bytes", len + 1);
282 memcpy(result, subject + start, len);
292 if (!match_data || !subject || group_num < 0) {
297 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
298 PCRE2_SIZE start = ovector[2 * group_num];
299 PCRE2_SIZE end = ovector[2 * group_num + 1];
301 if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
306 size_t len = end - start;
307 char *result = SAFE_MALLOC(len + 1,
char *);
309 log_error(
"asciichat_pcre2_extract_group: failed to allocate %zu bytes", len + 1);
313 memcpy(result, subject + start, len);
324 if (!match_data || !subject || !out_len || group_num < 0) {
329 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
330 PCRE2_SIZE start = ovector[2 * group_num];
331 PCRE2_SIZE end = ovector[2 * group_num + 1];
333 if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
337 *out_len = end - start;
338 return subject + start;
345 unsigned long *out_value) {
346 if (!match_data || !subject || !out_value || group_num < 0) {
351 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
352 PCRE2_SIZE start = ovector[2 * group_num];
353 PCRE2_SIZE end = ovector[2 * group_num + 1];
355 if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
360 size_t len = end - start;
361 if (len == 0 || len > 63) {
366 memcpy(temp, subject + start, len);
372 unsigned long value = strtoul(temp, &endptr, 10);
374 if (errno != 0 || endptr == temp || *endptr !=
'\0') {
struct pcre2_singleton pcre2_singleton_t
Represents a thread-safe compiled PCRE2 regex singleton.
pcre2_code * asciichat_pcre2_singleton_get_code(pcre2_singleton_t *singleton)
Get the compiled pcre2_code from a singleton handle.
bool asciichat_pcre2_singleton_is_initialized(pcre2_singleton_t *singleton)
Check if a singleton was successfully initialized.
void asciichat_pcre2_cleanup_all(void)
Free all PCRE2 singletons in the global registry.
char * asciichat_pcre2_extract_group(pcre2_match_data *match_data, int group_num, const char *subject)
Extract numbered capture group as allocated string.
char * asciichat_pcre2_extract_named_group(pcre2_code *regex, pcre2_match_data *match_data, const char *group_name, const char *subject)
Extract named substring from PCRE2 match data.
const char * asciichat_pcre2_extract_group_ptr(pcre2_match_data *match_data, int group_num, const char *subject, size_t *out_len)
Extract numbered capture group as pointer into subject (non-allocating)
bool asciichat_pcre2_extract_group_ulong(pcre2_match_data *match_data, int group_num, const char *subject, unsigned long *out_value)
Extract numbered capture group and convert to unsigned long.
void asciichat_pcre2_singleton_free(pcre2_singleton_t *singleton)
Free a PCRE2 singleton and its resources.
Represents a thread-safe compiled PCRE2 regex singleton.