23 if ((s[0] & 0xE0) == 0xC0) {
25 if ((s[1] & 0xC0) != 0x80)
27 *codepoint = (((uint32_t)(s[0] & 0x1F) << 6) | (uint32_t)(s[1] & 0x3F));
29 }
else if ((s[0] & 0xF0) == 0xE0) {
31 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
33 *codepoint = (((uint32_t)(s[0] & 0x0F) << 12) | ((uint32_t)(s[1] & 0x3F) << 6) | (uint32_t)(s[2] & 0x3F));
35 }
else if ((s[0] & 0xF8) == 0xF0) {
37 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
39 *codepoint = (((uint32_t)(s[0] & 0x07) << 18) | ((uint32_t)(s[1] & 0x3F) << 12) | ((uint32_t)(s[2] & 0x3F) << 6) |
40 (uint32_t)(s[3] & 0x3F));
48 SET_ERRNO(ERROR_INVALID_PARAM,
"str is NULL");
53 const utf8proc_uint8_t *p = (
const utf8proc_uint8_t *)str;
58 if (p[0] == 0x1b && p[1] ==
'[') {
62 while (*p && *p !=
'm') {
71 utf8proc_int32_t codepoint;
72 utf8proc_ssize_t len = utf8proc_iterate(p, -1, &codepoint);
80 int char_width = utf8proc_charwidth(codepoint);
93 if (!str || max_bytes == 0) {
94 SET_ERRNO(ERROR_INVALID_PARAM,
"str is NULL or max_bytes is 0");
99 const utf8proc_uint8_t *p = (
const utf8proc_uint8_t *)str;
100 const utf8proc_uint8_t *end = p + max_bytes;
102 while (p < end && *p) {
104 if (p + 1 < end && p[0] == 0x1b && p[1] ==
'[') {
108 while (p < end && *p && *p !=
'm') {
111 if (p < end && *p ==
'm') {
117 utf8proc_int32_t codepoint;
118 utf8proc_ssize_t len = utf8proc_iterate(p, end - p, &codepoint);
126 int char_width = utf8proc_charwidth(codepoint);
127 if (char_width < 0) {
188 if (!str || !out_codepoints || max_codepoints == 0) {
189 SET_ERRNO(ERROR_INVALID_PARAM,
"str is NULL or out_codepoints is NULL or max_codepoints is 0");
194 const uint8_t *p = (
const uint8_t *)str;
195 while (*p && count < max_codepoints) {
198 if (decode_len < 0) {
201 out_codepoints[count++] = codepoint;
208 if (!str || max_bytes == 0) {
209 SET_ERRNO(ERROR_INVALID_PARAM,
"str is NULL or max_bytes is 0");
214 utf8proc_int32_t codepoint;
215 utf8proc_ssize_t len = utf8proc_iterate((
const utf8proc_uint8_t *)str, (utf8proc_ssize_t)max_bytes, &codepoint);
241 int continuation_bytes,
int (*read_byte_fn)(
void)) {
242 if (!buffer || !cursor || !len || continuation_bytes <= 0 || !read_byte_fn) {
243 SET_ERRNO(ERROR_INVALID_PARAM,
"invalid params");
247 for (
int i = 0; i < continuation_bytes && *len < max_len - 1; i++) {
248 int next_byte = read_byte_fn();
249 if (next_byte == EOF) {
254 memmove(&buffer[*cursor + 1], &buffer[*cursor], *len - *cursor);
255 buffer[*cursor] = (char)next_byte;
275 if (!haystack || !needle) {
276 SET_ERRNO(ERROR_INVALID_PARAM,
"invalid params");
281 if (needle[0] ==
'\0') {
286 size_t haystack_len = strlen(haystack);
287 size_t needle_len = strlen(needle);
289 if (needle_len > haystack_len) {
297 utf8proc_option_t options = UTF8PROC_CASEFOLD | UTF8PROC_STABLE | UTF8PROC_COMPOSE;
300 utf8proc_uint8_t *needle_folded = NULL;
301 utf8proc_ssize_t needle_folded_len =
302 utf8proc_map((
const utf8proc_uint8_t *)needle, (utf8proc_ssize_t)needle_len, &needle_folded, options);
304 if (needle_folded_len < 0 || !needle_folded) {
313 const char *haystack_pos = haystack;
314 while (*haystack_pos !=
'\0') {
316 size_t remaining = haystack_len - (size_t)(haystack_pos - haystack);
318 if (remaining < needle_len) {
324 utf8proc_uint8_t *haystack_folded = NULL;
325 utf8proc_ssize_t haystack_folded_len =
326 utf8proc_map((
const utf8proc_uint8_t *)haystack_pos, (utf8proc_ssize_t)needle_len, &haystack_folded, options);
328 if (haystack_folded_len >= 0 && haystack_folded) {
330 if ((
size_t)haystack_folded_len == (
size_t)needle_folded_len &&
331 memcmp(haystack_folded, needle_folded, (
size_t)needle_folded_len) == 0) {
333 free(haystack_folded);
337 free(haystack_folded);
341 utf8proc_int32_t codepoint;
342 utf8proc_ssize_t bytes = utf8proc_iterate((
const utf8proc_uint8_t *)haystack_pos, -1, &codepoint);
347 haystack_pos += bytes;
int utf8_read_and_insert_continuation_bytes(char *buffer, size_t *cursor, size_t *len, size_t max_len, int continuation_bytes, int(*read_byte_fn)(void))