mirror of
https://github.com/libgit2/libgit2.git
synced 2026-01-25 02:56:17 +00:00
util: provide git__memmem function
Unfortunately, neither the `memmem` nor the `strnstr` functions are part
of any C standard but are merely extensions of C that are implemented by
e.g. glibc. Thus, there is no standardized way to search for a string in
a block of memory with a limited size, and using `strstr` is to be
considered unsafe in case where the buffer has not been sanitized. In
fact, there are some uses of `strstr` in exactly that unsafe way in our
codebase.
Provide a new function `git__memmem` that implements the `memmem`
semantics. That is in a given haystack of `n` bytes, search for the
occurrence of a byte sequence of `m` bytes and return a pointer to the
first occurrence. The implementation chosen is the "Not So Naive"
algorithm from [1]. It was chosen as the implementation is comparably
simple while still being reasonably efficient in most cases.
Preprocessing happens in constant time and space, searching has a time
complexity of O(n*m) with a slightly sub-linear average case.
[1]: http://www-igm.univ-mlv.fr/~lecroq/string/
(cherry picked from commit 83e8a6b36a)
This commit is contained in:
41
src/util.c
41
src/util.c
@@ -353,6 +353,47 @@ size_t git__linenlen(const char *buffer, size_t buffer_len)
|
||||
return nl ? (size_t)(nl - buffer) + 1 : buffer_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adapted Not So Naive algorithm from http://www-igm.univ-mlv.fr/~lecroq/string/
|
||||
*/
|
||||
const void * git__memmem(const void *haystack, size_t haystacklen,
|
||||
const void *needle, size_t needlelen)
|
||||
{
|
||||
const char *h, *n;
|
||||
size_t j, k, l;
|
||||
|
||||
if (needlelen > haystacklen || !haystacklen || !needlelen)
|
||||
return NULL;
|
||||
|
||||
h = (const char *) haystack,
|
||||
n = (const char *) needle;
|
||||
|
||||
if (needlelen == 1)
|
||||
return memchr(haystack, *n, haystacklen);
|
||||
|
||||
if (n[0] == n[1]) {
|
||||
k = 2;
|
||||
l = 1;
|
||||
} else {
|
||||
k = 1;
|
||||
l = 2;
|
||||
}
|
||||
|
||||
j = 0;
|
||||
while (j <= haystacklen - needlelen) {
|
||||
if (n[1] != h[j + 1]) {
|
||||
j += k;
|
||||
} else {
|
||||
if (memcmp(n + 2, h + j + 2, needlelen - 2) == 0 &&
|
||||
n[0] == h[j])
|
||||
return h + j;
|
||||
j += l;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void git__hexdump(const char *buffer, size_t len)
|
||||
{
|
||||
static const size_t LINE_WIDTH = 16;
|
||||
|
||||
@@ -316,6 +316,9 @@ GIT_INLINE(const void *) git__memrchr(const void *s, int c, size_t n)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
extern const void * git__memmem(const void *haystack, size_t haystacklen,
|
||||
const void *needle, size_t needlelen);
|
||||
|
||||
typedef int (*git__tsort_cmp)(const void *a, const void *b);
|
||||
|
||||
extern void git__tsort(void **dst, size_t size, git__tsort_cmp cmp);
|
||||
|
||||
46
tests/core/memmem.c
Normal file
46
tests/core/memmem.c
Normal file
@@ -0,0 +1,46 @@
|
||||
#include "clar_libgit2.h"
|
||||
|
||||
static void assert_found(const char *haystack, const char *needle, size_t expected_pos)
|
||||
{
|
||||
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
|
||||
needle, needle ? strlen(needle) : 0),
|
||||
haystack + expected_pos);
|
||||
}
|
||||
|
||||
static void assert_absent(const char *haystack, const char *needle)
|
||||
{
|
||||
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
|
||||
needle, needle ? strlen(needle) : 0),
|
||||
NULL);
|
||||
}
|
||||
|
||||
void test_core_memmem__found(void)
|
||||
{
|
||||
assert_found("a", "a", 0);
|
||||
assert_found("ab", "a", 0);
|
||||
assert_found("ba", "a", 1);
|
||||
assert_found("aa", "a", 0);
|
||||
assert_found("aab", "aa", 0);
|
||||
assert_found("baa", "aa", 1);
|
||||
assert_found("dabc", "abc", 1);
|
||||
assert_found("abababc", "abc", 4);
|
||||
}
|
||||
|
||||
void test_core_memmem__absent(void)
|
||||
{
|
||||
assert_absent("a", "b");
|
||||
assert_absent("a", "aa");
|
||||
assert_absent("ba", "ab");
|
||||
assert_absent("ba", "ab");
|
||||
assert_absent("abc", "abcd");
|
||||
assert_absent("abcabcabc", "bcac");
|
||||
}
|
||||
|
||||
void test_core_memmem__edgecases(void)
|
||||
{
|
||||
assert_absent(NULL, NULL);
|
||||
assert_absent("a", NULL);
|
||||
assert_absent(NULL, "a");
|
||||
assert_absent("", "a");
|
||||
assert_absent("a", "");
|
||||
}
|
||||
Reference in New Issue
Block a user