ascii-chat 0.8.38
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
parser.c
Go to the documentation of this file.
1
10#include <ascii-chat/options/manpage/parser.h>
11#include <ascii-chat/log/logging.h>
12#include <ascii-chat/common.h>
13#include <ascii-chat/platform/util.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <ctype.h>
18
19// ============================================================================
20// Helper Functions
21// ============================================================================
22
31static bool is_marker_line(const char *line, const char **type_out, char **section_out) {
32 if (!line) {
33 return false;
34 }
35
36 // Skip leading whitespace
37 const char *p = line;
38 while (isspace((unsigned char)*p)) {
39 p++;
40 }
41
42 // Check for groff comment: .\ " or ."
43 if (strncmp(p, ".\\\"", 3) != 0 && strncmp(p, ".\"", 2) != 0) {
44 return false;
45 }
46
47 // Skip comment marker
48 if (p[0] == '.' && p[1] == '\\' && p[2] == '"') {
49 p += 3;
50 } else if (p[0] == '.' && p[1] == '"') {
51 p += 2;
52 }
53
54 // Skip whitespace after comment marker
55 while (isspace((unsigned char)*p)) {
56 p++;
57 }
58
59 // Check for marker pattern: TYPE-START: SECTION or TYPE-END: SECTION
60 const char *type = NULL;
61 const char *section_start = NULL;
62
63 if (strncmp(p, "AUTO-START:", 11) == 0) {
64 type = "AUTO";
65 section_start = p + 11;
66 } else if (strncmp(p, "AUTO-END:", 9) == 0) {
67 type = "AUTO";
68 section_start = p + 9;
69 } else if (strncmp(p, "MANUAL-START:", 13) == 0) {
70 type = "MANUAL";
71 section_start = p + 13;
72 } else if (strncmp(p, "MANUAL-END:", 11) == 0) {
73 type = "MANUAL";
74 section_start = p + 11;
75 } else if (strncmp(p, "MERGE-START:", 12) == 0) {
76 type = "MERGE";
77 section_start = p + 12;
78 } else if (strncmp(p, "MERGE-END:", 10) == 0) {
79 type = "MERGE";
80 section_start = p + 10;
81 } else {
82 return false;
83 }
84
85 // Skip whitespace before section name
86 while (isspace((unsigned char)*section_start)) {
87 section_start++;
88 }
89
90 // Extract section name (until end of line)
91 const char *section_end = section_start;
92 while (*section_end && *section_end != '\n' && *section_end != '\r') {
93 section_end++;
94 }
95
96 if (section_end > section_start) {
97 if (type_out) {
98 *type_out = type;
99 }
100 if (section_out) {
101 size_t len = section_end - section_start;
102 *section_out = SAFE_MALLOC(len + 1, char *);
103 memcpy(*section_out, section_start, len);
104 (*section_out)[len] = '\0';
105 // Trim trailing whitespace
106 char *s = *section_out + len - 1;
107 while (s >= *section_out && isspace((unsigned char)*s)) {
108 *s-- = '\0';
109 }
110 }
111 return true;
112 }
113
114 return false;
115}
116
120static bool is_section_header(const char *line, char **section_name_out) {
121 if (!line) {
122 return false;
123 }
124
125 // Skip leading whitespace
126 const char *p = line;
127 while (isspace((unsigned char)*p)) {
128 p++;
129 }
130
131 // Check for .SH directive
132 if (strncmp(p, ".SH", 3) != 0) {
133 return false;
134 }
135
136 p += 3;
137
138 // Skip whitespace
139 while (isspace((unsigned char)*p)) {
140 p++;
141 }
142
143 // Extract section name (until end of line)
144 const char *name_start = p;
145 const char *name_end = name_start;
146 while (*name_end && *name_end != '\n' && *name_end != '\r') {
147 name_end++;
148 }
149
150 if (name_end > name_start) {
151 size_t len = name_end - name_start;
152 *section_name_out = SAFE_MALLOC(len + 1, char *);
153 memcpy(*section_name_out, name_start, len);
154 (*section_name_out)[len] = '\0';
155 // Trim trailing whitespace
156 char *s = *section_name_out + len - 1;
157 while (s >= *section_name_out && isspace((unsigned char)*s)) {
158 *s-- = '\0';
159 }
160 // Remove quotes if present
161 len = strlen(*section_name_out);
162 if (len >= 2 && (*section_name_out)[0] == '"' && (*section_name_out)[len - 1] == '"') {
163 memmove(*section_name_out, *section_name_out + 1, len - 2);
164 (*section_name_out)[len - 2] = '\0';
165 }
166 return true;
167 }
168
169 return false;
170}
171
175static section_type_t type_string_to_enum(const char *type_str) {
176 if (!type_str) {
177 return SECTION_TYPE_UNMARKED;
178 }
179 if (strcmp(type_str, "AUTO") == 0) {
180 return SECTION_TYPE_AUTO;
181 }
182 if (strcmp(type_str, "MANUAL") == 0) {
183 return SECTION_TYPE_MANUAL;
184 }
185 if (strcmp(type_str, "MERGE") == 0) {
186 return SECTION_TYPE_MERGE;
187 }
188 return SECTION_TYPE_UNMARKED;
189}
190
194static asciichat_error_t parse_sections_internal(FILE *f, parsed_section_t **out_sections, size_t *out_count) {
195 if (!f || !out_sections || !out_count) {
196 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters for parse_sections_internal");
197 }
198
199 parsed_section_t *sections = NULL;
200 size_t capacity = 16;
201 size_t count = 0;
202 sections = SAFE_MALLOC(capacity * sizeof(parsed_section_t), parsed_section_t *);
203
204 char *line = NULL;
205 size_t line_len = 0;
206 size_t line_num = 0;
207
208 parsed_section_t *current_section = NULL;
209 char *current_content = NULL;
210 size_t current_content_capacity = 4096;
211 size_t current_content_len = 0;
212 current_content = SAFE_MALLOC(current_content_capacity, char *);
213
214 // Track markers for current section
215 const char *current_type = NULL;
216 char *current_marker_section = NULL;
217 bool in_marked_section = false;
218
219 while (platform_getline(&line, &line_len, f) != -1) {
220 line_num++;
221
222 const char *marker_type = NULL;
223 char *marker_section = NULL;
224 bool is_marker = is_marker_line(line, &marker_type, &marker_section);
225
226 char *section_header_name = NULL;
227 bool is_header = is_section_header(line, &section_header_name);
228
229 if (is_marker) {
230 // Check if this is a START marker
231 if (strstr(line, "-START:") != NULL) {
232 // Start of a marked section
233 current_type = marker_type;
234 if (current_marker_section) {
235 SAFE_FREE(current_marker_section);
236 }
237 current_marker_section = marker_section;
238 in_marked_section = true;
239 marker_section = NULL; // Ownership transferred
240 } else if (strstr(line, "-END:") != NULL) {
241 // End of marked section
242 if (marker_section) {
243 SAFE_FREE(marker_section);
244 }
245 in_marked_section = false;
246 current_type = NULL;
247 if (current_marker_section) {
248 SAFE_FREE(current_marker_section);
249 current_marker_section = NULL;
250 }
251 } else {
252 if (marker_section) {
253 SAFE_FREE(marker_section);
254 }
255 }
256 }
257
258 if (is_header) {
259 // Finalize previous section if exists
260 if (current_section) {
261 if (current_content && current_content_len > 0) {
262 current_section->content = current_content;
263 current_section->content_len = current_content_len;
264 current_content = NULL;
265 current_content_capacity = 4096;
266 current_content_len = 0;
267 current_content = SAFE_MALLOC(current_content_capacity, char *);
268 } else {
269 SAFE_FREE(current_content);
270 current_content = NULL;
271 current_content_capacity = 4096;
272 current_content_len = 0;
273 current_content = SAFE_MALLOC(current_content_capacity, char *);
274 }
275 }
276
277 // Start new section
278 if (count >= capacity) {
279 capacity *= 2;
280 sections = SAFE_REALLOC(sections, capacity * sizeof(parsed_section_t), parsed_section_t *);
281 }
282
283 current_section = &sections[count++];
284 memset(current_section, 0, sizeof(parsed_section_t));
285 current_section->section_name = section_header_name;
286 current_section->start_line = line_num;
287 current_section->end_line = line_num;
288 current_section->type = in_marked_section ? type_string_to_enum(current_type) : SECTION_TYPE_UNMARKED;
289 current_section->has_markers = in_marked_section;
290
291 // Append header line to content
292 size_t line_strlen = strlen(line);
293 if (current_content_len + line_strlen + 1 >= current_content_capacity) {
294 current_content_capacity = (current_content_len + line_strlen + 1) * 2;
295 current_content = SAFE_REALLOC(current_content, current_content_capacity, char *);
296 }
297 memcpy(current_content + current_content_len, line, line_strlen);
298 current_content_len += line_strlen;
299 current_content[current_content_len] = '\0';
300 } else if (current_section) {
301 // Append line to current section content
302 size_t line_strlen = strlen(line);
303 if (current_content_len + line_strlen + 1 >= current_content_capacity) {
304 current_content_capacity = (current_content_len + line_strlen + 1) * 2;
305 current_content = SAFE_REALLOC(current_content, current_content_capacity, char *);
306 }
307 memcpy(current_content + current_content_len, line, line_strlen);
308 current_content_len += line_strlen;
309 current_content[current_content_len] = '\0';
310 current_section->end_line = line_num;
311 }
312 }
313
314 // Finalize last section
315 if (current_section && current_content && current_content_len > 0) {
316 current_section->content = current_content;
317 current_section->content_len = current_content_len;
318 } else if (current_content) {
319 SAFE_FREE(current_content);
320 }
321
322 if (current_marker_section) {
323 SAFE_FREE(current_marker_section);
324 }
325 if (line) {
326 free(line);
327 }
328
329 *out_sections = sections;
330 *out_count = count;
331 log_debug("Parsed %zu sections from file", count);
332 return ASCIICHAT_OK;
333}
334
335// ============================================================================
336// Public API
337// ============================================================================
338
339asciichat_error_t manpage_parser_parse_file(FILE *f, parsed_section_t **out_sections, size_t *out_count) {
340 if (!f || !out_sections || !out_count) {
341 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters for manpage_parser_parse_file");
342 }
343
344 return parse_sections_internal(f, out_sections, out_count);
345}
346
347asciichat_error_t manpage_parser_parse_memory(const char *content, size_t content_len, parsed_section_t **out_sections,
348 size_t *out_count) {
349 if (!content || content_len == 0 || !out_sections || !out_count) {
350 return SET_ERRNO(ERROR_INVALID_PARAM, "Invalid parameters for manpage_parser_parse_memory");
351 }
352
353 // Create temporary file from memory
354 FILE *tmp = platform_tmpfile();
355 if (!tmp) {
356 return SET_ERRNO_SYS(ERROR_CONFIG, "Failed to create temporary file for memory parsing");
357 }
358
359 // Write content to temporary file
360 // Write content_len + 1 to include the null terminator.
361 // The embedded string is content_len + 1 bytes (content_len chars + null terminator).
362 // platform_getline() needs the null terminator to properly handle EOF.
363 size_t bytes_to_write = content_len + 1;
364 size_t written = fwrite(content, 1, bytes_to_write, tmp);
365 if (written != bytes_to_write) {
366 fclose(tmp);
367 return SET_ERRNO_SYS(ERROR_CONFIG, "Failed to write complete content to temporary file");
368 }
369
370 // Rewind to beginning for reading
371 rewind(tmp);
372
373 // Parse using common function
374 asciichat_error_t err = parse_sections_internal(tmp, out_sections, out_count);
375 fclose(tmp);
376
377 return err;
378}
379
380void manpage_parser_free_sections(parsed_section_t *sections, size_t count) {
381 if (!sections) {
382 return;
383 }
384
385 for (size_t i = 0; i < count; i++) {
386 if (sections[i].section_name) {
387 SAFE_FREE(sections[i].section_name);
388 }
389 if (sections[i].content) {
390 SAFE_FREE(sections[i].content);
391 }
392 }
393
394 SAFE_FREE(sections);
395}
396
397const parsed_section_t *manpage_parser_find_section(const parsed_section_t *sections, size_t count,
398 const char *section_name) {
399 if (!sections || !section_name) {
400 return NULL;
401 }
402
403 for (size_t i = 0; i < count; i++) {
404 if (sections[i].section_name && strcmp(sections[i].section_name, section_name) == 0) {
405 return &sections[i];
406 }
407 }
408
409 return NULL;
410}
asciichat_error_t manpage_parser_parse_file(FILE *f, parsed_section_t **out_sections, size_t *out_count)
Definition parser.c:339
const parsed_section_t * manpage_parser_find_section(const parsed_section_t *sections, size_t count, const char *section_name)
Definition parser.c:397
asciichat_error_t manpage_parser_parse_memory(const char *content, size_t content_len, parsed_section_t **out_sections, size_t *out_count)
Definition parser.c:347
void manpage_parser_free_sections(parsed_section_t *sections, size_t count)
Definition parser.c:380
FILE * platform_tmpfile(void)