83 SET_ERRNO(ERROR_INVALID_PARAM,
"URL is NULL or empty");
87 pcre2_code *regex = url_regex_get();
93 char url_with_scheme[2048];
94 const char *url_to_match = url;
96 if (!strstr(url,
"://")) {
100 if (strcmp(url,
"http") == 0 || strcmp(url,
"https") == 0 || strcmp(url,
"ftp") == 0 || strcmp(url,
"ftps") == 0) {
105 if (strncmp(url,
"http/", 5) == 0 || strncmp(url,
"https/", 6) == 0) {
110 if (strchr(url,
'@')) {
115 if (strlen(url) == 64) {
117 for (
const char *p = url; *p && all_hex; p++) {
118 if (!((*p >=
'0' && *p <=
'9') || (*p >=
'a' && *p <=
'f') || (*p >=
'A' && *p <=
'F'))) {
128 const char *colon_pos = strchr(url,
':');
131 const char *after_colon = colon_pos + 1;
132 bool looks_like_port =
true;
133 for (
const char *p = after_colon; *p && *p !=
'/'; p++) {
134 if (!(*p >=
'0' && *p <=
'9')) {
135 looks_like_port =
false;
139 if (!looks_like_port) {
145 int result = snprintf(url_with_scheme,
sizeof(url_with_scheme),
"http://%s", url);
146 if (result < 0 || result >= (
int)
sizeof(url_with_scheme)) {
149 url_to_match = url_with_scheme;
152 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(regex, NULL);
158 int rc = pcre2_jit_match(regex, (PCRE2_SPTR)url_to_match, strlen(url_to_match), 0,
162 pcre2_match_data_free(match_data);
166asciichat_error_t
url_parse(
const char *url, url_parts_t *parts_out) {
168 return SET_ERRNO(ERROR_INVALID_PARAM,
"URL is NULL or empty");
172 return SET_ERRNO(ERROR_INVALID_PARAM,
"parts_out is NULL");
176 memset(parts_out, 0,
sizeof(*parts_out));
178 pcre2_code *regex = url_regex_get();
180 return SET_ERRNO(ERROR_CONFIG,
"URL validator not initialized");
186 char url_with_scheme[4096];
187 const char *url_to_match = url;
188 const char *original_url = url;
190 if (!strstr(url,
"://")) {
194 if (strcmp(url,
"http") == 0 || strcmp(url,
"https") == 0 || strcmp(url,
"ftp") == 0 || strcmp(url,
"ftps") == 0) {
195 return SET_ERRNO(ERROR_INVALID_PARAM,
"Invalid URL format: %s", url);
199 if (strncmp(url,
"http/", 5) == 0 || strncmp(url,
"https/", 6) == 0) {
200 return SET_ERRNO(ERROR_INVALID_PARAM,
"Invalid URL format (looks like malformed scheme): %s", url);
204 if (strlen(url) == 64) {
206 for (
const char *p = url; *p && all_hex; p++) {
207 if (!((*p >=
'0' && *p <=
'9') || (*p >=
'a' && *p <=
'f') || (*p >=
'A' && *p <=
'F'))) {
212 return SET_ERRNO(ERROR_INVALID_PARAM,
"Invalid URL: appears to be raw hex data, not a URL");
216 const char *colon_pos = strchr(url,
':');
219 const char *after_colon = colon_pos + 1;
220 bool looks_like_port =
true;
221 for (
const char *p = after_colon; *p && *p !=
'/'; p++) {
222 if (!(*p >=
'0' && *p <=
'9')) {
223 looks_like_port =
false;
227 if (!looks_like_port) {
228 return SET_ERRNO(ERROR_INVALID_PARAM,
"Invalid URL format (invalid scheme): %s", url);
233 int result = snprintf(url_with_scheme,
sizeof(url_with_scheme),
"http://%s", url);
234 if (result < 0 || result >= (
int)
sizeof(url_with_scheme)) {
235 return SET_ERRNO(ERROR_INVALID_PARAM,
"URL too long");
237 url_to_match = url_with_scheme;
240 pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(regex, NULL);
242 return SET_ERRNO(ERROR_MEMORY,
"Failed to create match data");
246 int rc = pcre2_jit_match(regex, (PCRE2_SPTR)url_to_match, strlen(url_to_match), 0,
251 pcre2_match_data_free(match_data);
252 return SET_ERRNO(ERROR_INVALID_PARAM,
"Invalid URL format: %s", original_url);
266 parts_out->port = (int)strtol(port_str, NULL, 10);
270 pcre2_match_data_free(match_data);
273 if (!parts_out->scheme || !parts_out->host) {
275 return SET_ERRNO(ERROR_INVALID_PARAM,
"Missing required URL components");
char * asciichat_pcre2_extract_named_group(pcre2_code *regex, pcre2_match_data *match_data, const char *group_name, const char *subject)
Extract named substring from PCRE2 match data.