ascii-chat 0.6.0
Real-time terminal-based video chat with ASCII art conversion
Loading...
Searching...
No Matches
levenshtein.c
Go to the documentation of this file.
1
12#include <string.h>
13#include <stdlib.h>
14#include <stdint.h>
15#include "options/levenshtein.h"
16#include "common.h"
17
18// Returns a size_t, depicting the difference between `a` and `b`.
19// See <https://en.wikipedia.org/wiki/Levenshtein_distance> for more information.
20size_t levenshtein_n(const char *a, const size_t length, const char *b, const size_t bLength) {
21 // Shortcut optimizations / degenerate cases.
22 if (a == b) {
23 return 0;
24 }
25
26 if (length == 0) {
27 return bLength;
28 }
29
30 if (bLength == 0) {
31 return length;
32 }
33
34 size_t *cache = SAFE_CALLOC(length, sizeof(size_t), size_t *);
35 if (!cache) {
36 return SIZE_MAX; // Allocation failed
37 }
38
39 size_t index = 0;
40 size_t bIndex = 0;
41 size_t distance;
42 size_t bDistance;
43 size_t result = 0;
44 char code;
45
46 // initialize the vector.
47 while (index < length) {
48 cache[index] = index + 1;
49 index++;
50 }
51
52 // Loop.
53 while (bIndex < bLength) {
54 code = b[bIndex];
55 result = distance = bIndex++;
56
57 for (index = 0; index < length; index++) {
58 bDistance = code == a[index] ? distance : distance + 1;
59 distance = cache[index];
60
61 cache[index] = result = distance > result ? bDistance > result ? result + 1 : bDistance
62 : bDistance > distance ? distance + 1
63 : bDistance;
64 }
65 }
66
67 SAFE_FREE(cache);
68
69 return result;
70}
71
72size_t levenshtein(const char *a, const char *b) {
73 if (!a || !b) {
74 return SIZE_MAX;
75 }
76 const size_t length = strlen(a);
77 const size_t bLength = strlen(b);
78
79 return levenshtein_n(a, length, b, bLength);
80}
81
82const char *levenshtein_find_similar(const char *unknown, const char *const *candidates) {
83 if (!unknown || !candidates) {
84 return NULL;
85 }
86
87 const char *best_match = NULL;
88 size_t best_distance = SIZE_MAX;
89
90 for (int i = 0; candidates[i] != NULL; i++) {
91 size_t dist = levenshtein(unknown, candidates[i]);
92 if (dist < best_distance) {
93 best_distance = dist;
94 best_match = candidates[i];
95 }
96 }
97
98 // Only suggest if the distance is within our threshold
99 if (best_distance <= LEVENSHTEIN_SUGGESTION_THRESHOLD) {
100 return best_match;
101 }
102
103 return NULL;
104}
#define SAFE_FREE(ptr)
Definition common.h:320
#define SAFE_CALLOC(count, size, cast)
Definition common.h:218
const char * levenshtein_find_similar(const char *unknown, const char *const *candidates)
Find the most similar string from a NULL-terminated array.
Definition levenshtein.c:82
size_t levenshtein_n(const char *a, const size_t length, const char *b, const size_t bLength)
Calculate Levenshtein distance with explicit string lengths.
Definition levenshtein.c:20
#define LEVENSHTEIN_SUGGESTION_THRESHOLD
Maximum edit distance to suggest an option.
Definition levenshtein.h:29
size_t levenshtein(const char *a, const char *b)
Calculate Levenshtein distance between two strings.
Definition levenshtein.c:72
Levenshtein distance algorithm for fuzzy string matching.
Common SIMD utilities and structures.