mirror of
https://github.com/libgit2/libgit2.git
synced 2026-01-25 02:56:17 +00:00
refs: Speed up packed lookups.
Currently ref lookups require loading the entire packed-refs file into a hashmap in memory. For repos with large numbers of refs this can be painfully slow. This patch replaces the existing lookup code and instead mmap()'s the packed-refs file and performs a binary search to locate the ref entry. Git uses a similiar approach. The old hash table codepath is still used for unsorted packed-refs files. This patch also fixes a minor bug where the "peeled" trait is never parsed correctly from the packed-refs header.
This commit is contained in:
@@ -1963,3 +1963,8 @@ v0.22
|
||||
functions. This is not something which we can know to do. A
|
||||
last-resort convenience function is provided in sys/openssl.h,
|
||||
`git_openssl_set_locking()` which can be used to set the locking.
|
||||
|
||||
* `git_reference_*()` functions use mmap() + binary search for packed
|
||||
refs lookups when using the fs backend. Previously all entries were
|
||||
read into a hashtable, which could be slow for repositories with a
|
||||
large number of refs.
|
||||
|
||||
310
src/refdb_fs.c
310
src/refdb_fs.c
@@ -65,9 +65,13 @@ typedef struct refdb_fs_backend {
|
||||
git_iterator_flag_t iterator_flags;
|
||||
uint32_t direach_flags;
|
||||
int fsync;
|
||||
git_map packed_refs_map;
|
||||
git_mutex prlock; /* protect packed_refs_map */
|
||||
bool sorted;
|
||||
} refdb_fs_backend;
|
||||
|
||||
static int refdb_reflog_fs__delete(git_refdb_backend *_backend, const char *name);
|
||||
static const char * packed_set_peeling_mode(const char *data, size_t data_sz, refdb_fs_backend *backend);
|
||||
|
||||
GIT_INLINE(int) loose_path(
|
||||
git_str *out,
|
||||
@@ -137,27 +141,9 @@ static int packed_reload(refdb_fs_backend *backend)
|
||||
scan = (char *)packedrefs.ptr;
|
||||
eof = scan + packedrefs.size;
|
||||
|
||||
backend->peeling_mode = PEELING_NONE;
|
||||
|
||||
if (*scan == '#') {
|
||||
static const char *traits_header = "# pack-refs with: ";
|
||||
|
||||
if (git__prefixcmp(scan, traits_header) == 0) {
|
||||
scan += strlen(traits_header);
|
||||
eol = strchr(scan, '\n');
|
||||
|
||||
if (!eol)
|
||||
goto parse_failed;
|
||||
*eol = '\0';
|
||||
|
||||
if (strstr(scan, " fully-peeled ") != NULL) {
|
||||
backend->peeling_mode = PEELING_FULL;
|
||||
} else if (strstr(scan, " peeled ") != NULL) {
|
||||
backend->peeling_mode = PEELING_STANDARD;
|
||||
}
|
||||
|
||||
scan = eol + 1;
|
||||
}
|
||||
scan = (char *)packed_set_peeling_mode(scan, packedrefs.size, backend);
|
||||
if (!scan) {
|
||||
goto parse_failed;
|
||||
}
|
||||
|
||||
while (scan < eof && *scan == '#') {
|
||||
@@ -466,10 +452,176 @@ static int ref_error_notfound(const char *name)
|
||||
return GIT_ENOTFOUND;
|
||||
}
|
||||
|
||||
static int packed_lookup(
|
||||
git_reference **out,
|
||||
refdb_fs_backend *backend,
|
||||
const char *ref_name)
|
||||
static const char *packed_set_peeling_mode(
|
||||
const char *data,
|
||||
size_t data_sz,
|
||||
refdb_fs_backend *backend)
|
||||
{
|
||||
static const char *traits_header = "# pack-refs with:";
|
||||
const char *eol;
|
||||
backend->peeling_mode = PEELING_NONE;
|
||||
|
||||
if (data_sz == 0 || *data != '#') {
|
||||
return data;
|
||||
}
|
||||
|
||||
if (git__prefixncmp(data, data_sz, traits_header) == 0) {
|
||||
size_t hdr_sz = strlen(traits_header);
|
||||
const char *sorted = " sorted ";
|
||||
const char *peeled = " peeled ";
|
||||
const char *fully_peeled = " fully-peeled ";
|
||||
data += hdr_sz;
|
||||
data_sz -= hdr_sz;
|
||||
|
||||
eol = memchr(data, '\n', data_sz);
|
||||
|
||||
if (!eol)
|
||||
return NULL;
|
||||
|
||||
if (git__memmem(data, eol - data, fully_peeled, strlen(fully_peeled))) {
|
||||
backend->peeling_mode = PEELING_FULL;
|
||||
} else if (git__memmem(data, eol - data, peeled, strlen(peeled))) {
|
||||
backend->peeling_mode = PEELING_STANDARD;
|
||||
}
|
||||
|
||||
if (git__memmem(data, eol - data, sorted, strlen(sorted))) {
|
||||
backend->sorted = true;
|
||||
} else {
|
||||
backend->sorted = false;
|
||||
}
|
||||
|
||||
return eol + 1;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static int packed_map_check(refdb_fs_backend *backend)
|
||||
{
|
||||
int error = 0;
|
||||
git_file fd = -1;
|
||||
struct stat st;
|
||||
|
||||
if ((error = git_mutex_lock(&backend->prlock)) < 0) {
|
||||
return error;
|
||||
}
|
||||
|
||||
if (backend->packed_refs_map.data) {
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
return error;
|
||||
}
|
||||
|
||||
fd = git_futils_open_ro(backend->refcache->path);
|
||||
if (fd < 0) {
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
if (fd == GIT_ENOTFOUND) {
|
||||
git_error_clear();
|
||||
return 0;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
if (p_fstat(fd, &st) < 0) {
|
||||
p_close(fd);
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
git_error_set(GIT_ERROR_OS, "unable to stat packed-refs '%s'", backend->refcache->path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (st.st_size == 0) {
|
||||
p_close(fd);
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = git_futils_mmap_ro(&backend->packed_refs_map, fd, 0, (size_t)st.st_size);
|
||||
p_close(fd);
|
||||
if (error < 0) {
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
return error;
|
||||
}
|
||||
|
||||
packed_set_peeling_mode(
|
||||
backend->packed_refs_map.data, backend->packed_refs_map.len,
|
||||
backend);
|
||||
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find beginning of packed-ref record pointed to by p.
|
||||
* buf - a lower-bound pointer to some memory buffer
|
||||
* p - an upper-bound pointer to the same memory buffer
|
||||
*/
|
||||
static const char *start_of_record(const char *buf, const char *p)
|
||||
{
|
||||
const char *nl = p;
|
||||
while (1) {
|
||||
nl = git__memrchr(buf, '\n', nl - buf);
|
||||
if (!nl)
|
||||
return buf;
|
||||
|
||||
if (nl[1] == '^' && nl > buf)
|
||||
--nl;
|
||||
else
|
||||
break;
|
||||
};
|
||||
return nl + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find end of packed-ref record pointed to by p.
|
||||
* end - an upper-bound pointer to some memory buffer
|
||||
* p - a lower-bound pointer to the same memory buffer
|
||||
*/
|
||||
static const char *end_of_record(const char *p, const char *end)
|
||||
{
|
||||
while (1) {
|
||||
size_t sz = end - p;
|
||||
p = memchr(p, '\n', sz);
|
||||
if (!p) {
|
||||
return end;
|
||||
}
|
||||
++p;
|
||||
if (p < end && p[0] == '^')
|
||||
++p;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int
|
||||
cmp_record_to_refname(const char *rec, size_t data_end, const char *ref_name)
|
||||
{
|
||||
const size_t ref_len = strlen(ref_name);
|
||||
int cmp_val;
|
||||
const char *end;
|
||||
|
||||
rec += GIT_OID_HEXSZ + 1; /* <oid> + space */
|
||||
if (data_end < GIT_OID_HEXSZ + 3) {
|
||||
/* an incomplete (corrupt) record is treated as less than ref_name */
|
||||
return -1;
|
||||
}
|
||||
data_end -= GIT_OID_HEXSZ + 1;
|
||||
|
||||
end = memchr(rec, '\n', data_end);
|
||||
if (end) {
|
||||
data_end = end - rec;
|
||||
}
|
||||
|
||||
cmp_val = memcmp(rec, ref_name, min(ref_len, data_end));
|
||||
|
||||
if (cmp_val == 0 && data_end != ref_len) {
|
||||
return (data_end > ref_len) ? 1 : -1;
|
||||
}
|
||||
return cmp_val;
|
||||
}
|
||||
|
||||
static int packed_unsorted_lookup(
|
||||
git_reference **out,
|
||||
refdb_fs_backend *backend,
|
||||
const char *ref_name)
|
||||
{
|
||||
int error = 0;
|
||||
struct packref *entry;
|
||||
@@ -494,6 +646,86 @@ static int packed_lookup(
|
||||
return error;
|
||||
}
|
||||
|
||||
static int packed_lookup(
|
||||
git_reference **out,
|
||||
refdb_fs_backend *backend,
|
||||
const char *ref_name)
|
||||
{
|
||||
int error = 0;
|
||||
const char *left, *right, *data_end;
|
||||
|
||||
if ((error = packed_map_check(backend)) < 0)
|
||||
return error;
|
||||
|
||||
if (!backend->sorted) {
|
||||
return packed_unsorted_lookup(out, backend, ref_name);
|
||||
}
|
||||
|
||||
left = backend->packed_refs_map.data;
|
||||
right = data_end = ((const char *)backend->packed_refs_map.data) +
|
||||
backend->packed_refs_map.len;
|
||||
|
||||
while (left < right && *left == '#') {
|
||||
if (!(left = memchr(left, '\n', data_end - left)))
|
||||
goto parse_failed;
|
||||
left++;
|
||||
}
|
||||
|
||||
while (left < right) {
|
||||
const char *mid, *rec;
|
||||
int compare;
|
||||
|
||||
mid = left + (right - left) / 2;
|
||||
rec = start_of_record(left, mid);
|
||||
compare = cmp_record_to_refname(rec, data_end - rec, ref_name);
|
||||
|
||||
if (compare < 0) {
|
||||
left = end_of_record(mid, right);
|
||||
} else if (compare > 0) {
|
||||
right = rec;
|
||||
} else {
|
||||
const char *eol;
|
||||
git_oid oid, peel, *peel_ptr = NULL;
|
||||
|
||||
if (data_end - rec < GIT_OID_HEXSZ ||
|
||||
git_oid_fromstr(&oid, rec) < 0) {
|
||||
goto parse_failed;
|
||||
}
|
||||
rec += GIT_OID_HEXSZ + 1;
|
||||
if (!(eol = memchr(rec, '\n', data_end - rec))) {
|
||||
goto parse_failed;
|
||||
}
|
||||
|
||||
/* look for optional "^<OID>\n" */
|
||||
|
||||
if (eol + 1 < data_end) {
|
||||
rec = eol + 1;
|
||||
|
||||
if (*rec == '^') {
|
||||
rec++;
|
||||
if (data_end - rec < GIT_OID_HEXSZ ||
|
||||
git_oid_fromstr(&peel, rec) < 0) {
|
||||
goto parse_failed;
|
||||
}
|
||||
peel_ptr = &peel;
|
||||
}
|
||||
}
|
||||
|
||||
*out = git_reference__alloc(ref_name, &oid, peel_ptr);
|
||||
if (!*out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return GIT_ENOTFOUND;
|
||||
|
||||
parse_failed:
|
||||
git_error_set(GIT_ERROR_REFERENCE, "corrupted packed references file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int refdb_fs_backend__lookup(
|
||||
git_reference **out,
|
||||
git_refdb_backend *_backend,
|
||||
@@ -513,7 +745,6 @@ static int refdb_fs_backend__lookup(
|
||||
git_error_clear();
|
||||
error = packed_lookup(out, backend, ref_name);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1081,6 +1312,18 @@ static int packed_write(refdb_fs_backend *backend)
|
||||
int error, open_flags = 0;
|
||||
size_t i;
|
||||
|
||||
/* take lock and close up packed-refs mmap if open */
|
||||
if ((error = git_mutex_lock(&backend->prlock)) < 0) {
|
||||
return error;
|
||||
}
|
||||
|
||||
if (backend->packed_refs_map.data) {
|
||||
git_futils_mmap_free(&backend->packed_refs_map);
|
||||
backend->packed_refs_map.data = NULL;
|
||||
}
|
||||
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
|
||||
/* lock the cache to updates while we do this */
|
||||
if ((error = git_sortedcache_wlock(refcache)) < 0)
|
||||
return error;
|
||||
@@ -1568,6 +1811,15 @@ static void refdb_fs_backend__free(git_refdb_backend *_backend)
|
||||
return;
|
||||
|
||||
git_sortedcache_free(backend->refcache);
|
||||
|
||||
git_mutex_lock(&backend->prlock);
|
||||
if (backend->packed_refs_map.data) {
|
||||
git_futils_mmap_free(&backend->packed_refs_map);
|
||||
backend->packed_refs_map.data = NULL;
|
||||
}
|
||||
git_mutex_unlock(&backend->prlock);
|
||||
git_mutex_free(&backend->prlock);
|
||||
|
||||
git__free(backend->gitpath);
|
||||
git__free(backend->commonpath);
|
||||
git__free(backend);
|
||||
@@ -2121,6 +2373,11 @@ int git_refdb_backend_fs(
|
||||
|
||||
backend = git__calloc(1, sizeof(refdb_fs_backend));
|
||||
GIT_ERROR_CHECK_ALLOC(backend);
|
||||
if (git_mutex_init(&backend->prlock) < 0) {
|
||||
git__free(backend);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
if (git_refdb_init_backend(&backend->parent, GIT_REFDB_BACKEND_VERSION) < 0)
|
||||
goto fail;
|
||||
@@ -2183,6 +2440,7 @@ int git_refdb_backend_fs(
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
git_mutex_free(&backend->prlock);
|
||||
git_str_dispose(&gitpath);
|
||||
git__free(backend->gitpath);
|
||||
git__free(backend->commonpath);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "clar_libgit2.h"
|
||||
#include "refs.h"
|
||||
|
||||
void test_refs_crashes__double_free(void)
|
||||
{
|
||||
@@ -18,3 +19,26 @@ void test_refs_crashes__double_free(void)
|
||||
|
||||
cl_git_sandbox_cleanup();
|
||||
}
|
||||
|
||||
void test_refs_crashes__empty_packedrefs(void)
|
||||
{
|
||||
git_repository *repo;
|
||||
git_reference *ref;
|
||||
const char *REFNAME = "refs/heads/xxx";
|
||||
git_str temp_path = GIT_STR_INIT;
|
||||
int fd = 0;
|
||||
|
||||
repo = cl_git_sandbox_init("empty_bare.git");
|
||||
|
||||
/* create zero-length packed-refs file */
|
||||
cl_git_pass(git_str_joinpath(&temp_path, git_repository_path(repo), GIT_PACKEDREFS_FILE));
|
||||
cl_git_pass(((fd = p_creat(temp_path.ptr, 0644)) < 0));
|
||||
cl_git_pass(p_close(fd));
|
||||
|
||||
/* should fail gracefully */
|
||||
cl_git_fail_with(
|
||||
GIT_ENOTFOUND, git_reference_lookup(&ref, repo, REFNAME));
|
||||
|
||||
cl_git_sandbox_cleanup();
|
||||
git_str_dispose(&temp_path);
|
||||
}
|
||||
|
||||
2
tests/resources/peeled.git/packed-refs
vendored
2
tests/resources/peeled.git/packed-refs
vendored
@@ -1,4 +1,4 @@
|
||||
# pack-refs with: peeled fully-peeled
|
||||
# pack-refs with: peeled fully-peeled sorted
|
||||
c2596aa0151888587ec5c0187f261e63412d9e11 refs/foo/tag-outside-tags
|
||||
^0df1a5865c8abfc09f1f2182e6a31be550e99f07
|
||||
0df1a5865c8abfc09f1f2182e6a31be550e99f07 refs/heads/master
|
||||
|
||||
2
tests/resources/testrepo.git/packed-refs
vendored
2
tests/resources/testrepo.git/packed-refs
vendored
@@ -1,3 +1,3 @@
|
||||
# pack-refs with: peeled
|
||||
# pack-refs with: peeled sorted
|
||||
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed
|
||||
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test
|
||||
|
||||
2
tests/resources/testrepo/.gitted/packed-refs
vendored
2
tests/resources/testrepo/.gitted/packed-refs
vendored
@@ -1,4 +1,4 @@
|
||||
# pack-refs with: peeled
|
||||
# pack-refs with: peeled sorted
|
||||
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed
|
||||
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test
|
||||
b25fa35b38051e4ae45d4222e795f9df2e43f1d1 refs/tags/packed-tag
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# pack-refs with: peeled fully-peeled
|
||||
# pack-refs with: peeled fully-peeled sorted
|
||||
36060c58702ed4c2a40832c51758d5344201d89a refs/remotes/origin/master
|
||||
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/remotes/origin/packed
|
||||
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/tags/v0.9
|
||||
|
||||
Reference in New Issue
Block a user