From ad7a51d83496cb6f2bb7b08a426f832f589c7707 Mon Sep 17 00:00:00 2001 From: Colin Stolley Date: Thu, 7 Oct 2021 13:26:52 -0500 Subject: [PATCH] refs: Speed up packed lookups. Currently ref lookups require loading the entire packed-refs file into a hashmap in memory. For repos with large numbers of refs this can be painfully slow. This patch replaces the existing lookup code and instead mmap()'s the packed-refs file and performs a binary search to locate the ref entry. Git uses a similiar approach. The old hash table codepath is still used for unsorted packed-refs files. This patch also fixes a minor bug where the "peeled" trait is never parsed correctly from the packed-refs header. --- docs/changelog.md | 5 + src/refdb_fs.c | 310 ++++++++++++++++-- tests/refs/crashes.c | 24 ++ tests/resources/peeled.git/packed-refs | 2 +- tests/resources/testrepo.git/packed-refs | 2 +- tests/resources/testrepo/.gitted/packed-refs | 2 +- tests/resources/testrepo2/.gitted/packed-refs | 2 +- 7 files changed, 317 insertions(+), 30 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 8060874df..3723a080c 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1963,3 +1963,8 @@ v0.22 functions. This is not something which we can know to do. A last-resort convenience function is provided in sys/openssl.h, `git_openssl_set_locking()` which can be used to set the locking. + +* `git_reference_*()` functions use mmap() + binary search for packed + refs lookups when using the fs backend. Previously all entries were + read into a hashtable, which could be slow for repositories with a + large number of refs. diff --git a/src/refdb_fs.c b/src/refdb_fs.c index 055ca2559..2b9d05fa5 100644 --- a/src/refdb_fs.c +++ b/src/refdb_fs.c @@ -65,9 +65,13 @@ typedef struct refdb_fs_backend { git_iterator_flag_t iterator_flags; uint32_t direach_flags; int fsync; + git_map packed_refs_map; + git_mutex prlock; /* protect packed_refs_map */ + bool sorted; } refdb_fs_backend; static int refdb_reflog_fs__delete(git_refdb_backend *_backend, const char *name); +static const char * packed_set_peeling_mode(const char *data, size_t data_sz, refdb_fs_backend *backend); GIT_INLINE(int) loose_path( git_str *out, @@ -137,27 +141,9 @@ static int packed_reload(refdb_fs_backend *backend) scan = (char *)packedrefs.ptr; eof = scan + packedrefs.size; - backend->peeling_mode = PEELING_NONE; - - if (*scan == '#') { - static const char *traits_header = "# pack-refs with: "; - - if (git__prefixcmp(scan, traits_header) == 0) { - scan += strlen(traits_header); - eol = strchr(scan, '\n'); - - if (!eol) - goto parse_failed; - *eol = '\0'; - - if (strstr(scan, " fully-peeled ") != NULL) { - backend->peeling_mode = PEELING_FULL; - } else if (strstr(scan, " peeled ") != NULL) { - backend->peeling_mode = PEELING_STANDARD; - } - - scan = eol + 1; - } + scan = (char *)packed_set_peeling_mode(scan, packedrefs.size, backend); + if (!scan) { + goto parse_failed; } while (scan < eof && *scan == '#') { @@ -466,10 +452,176 @@ static int ref_error_notfound(const char *name) return GIT_ENOTFOUND; } -static int packed_lookup( - git_reference **out, - refdb_fs_backend *backend, - const char *ref_name) +static const char *packed_set_peeling_mode( + const char *data, + size_t data_sz, + refdb_fs_backend *backend) +{ + static const char *traits_header = "# pack-refs with:"; + const char *eol; + backend->peeling_mode = PEELING_NONE; + + if (data_sz == 0 || *data != '#') { + return data; + } + + if (git__prefixncmp(data, data_sz, traits_header) == 0) { + size_t hdr_sz = strlen(traits_header); + const char *sorted = " sorted "; + const char *peeled = " peeled "; + const char *fully_peeled = " fully-peeled "; + data += hdr_sz; + data_sz -= hdr_sz; + + eol = memchr(data, '\n', data_sz); + + if (!eol) + return NULL; + + if (git__memmem(data, eol - data, fully_peeled, strlen(fully_peeled))) { + backend->peeling_mode = PEELING_FULL; + } else if (git__memmem(data, eol - data, peeled, strlen(peeled))) { + backend->peeling_mode = PEELING_STANDARD; + } + + if (git__memmem(data, eol - data, sorted, strlen(sorted))) { + backend->sorted = true; + } else { + backend->sorted = false; + } + + return eol + 1; + } + return data; +} + +static int packed_map_check(refdb_fs_backend *backend) +{ + int error = 0; + git_file fd = -1; + struct stat st; + + if ((error = git_mutex_lock(&backend->prlock)) < 0) { + return error; + } + + if (backend->packed_refs_map.data) { + git_mutex_unlock(&backend->prlock); + return error; + } + + fd = git_futils_open_ro(backend->refcache->path); + if (fd < 0) { + git_mutex_unlock(&backend->prlock); + if (fd == GIT_ENOTFOUND) { + git_error_clear(); + return 0; + } + return fd; + } + + if (p_fstat(fd, &st) < 0) { + p_close(fd); + git_mutex_unlock(&backend->prlock); + git_error_set(GIT_ERROR_OS, "unable to stat packed-refs '%s'", backend->refcache->path); + return -1; + } + + if (st.st_size == 0) { + p_close(fd); + git_mutex_unlock(&backend->prlock); + return 0; + } + + error = git_futils_mmap_ro(&backend->packed_refs_map, fd, 0, (size_t)st.st_size); + p_close(fd); + if (error < 0) { + git_mutex_unlock(&backend->prlock); + return error; + } + + packed_set_peeling_mode( + backend->packed_refs_map.data, backend->packed_refs_map.len, + backend); + + git_mutex_unlock(&backend->prlock); + return error; +} + +/* + * Find beginning of packed-ref record pointed to by p. + * buf - a lower-bound pointer to some memory buffer + * p - an upper-bound pointer to the same memory buffer + */ +static const char *start_of_record(const char *buf, const char *p) +{ + const char *nl = p; + while (1) { + nl = git__memrchr(buf, '\n', nl - buf); + if (!nl) + return buf; + + if (nl[1] == '^' && nl > buf) + --nl; + else + break; + }; + return nl + 1; +} + +/* + * Find end of packed-ref record pointed to by p. + * end - an upper-bound pointer to some memory buffer + * p - a lower-bound pointer to the same memory buffer + */ +static const char *end_of_record(const char *p, const char *end) +{ + while (1) { + size_t sz = end - p; + p = memchr(p, '\n', sz); + if (!p) { + return end; + } + ++p; + if (p < end && p[0] == '^') + ++p; + else + break; + } + return p; +} + +static int +cmp_record_to_refname(const char *rec, size_t data_end, const char *ref_name) +{ + const size_t ref_len = strlen(ref_name); + int cmp_val; + const char *end; + + rec += GIT_OID_HEXSZ + 1; /* + space */ + if (data_end < GIT_OID_HEXSZ + 3) { + /* an incomplete (corrupt) record is treated as less than ref_name */ + return -1; + } + data_end -= GIT_OID_HEXSZ + 1; + + end = memchr(rec, '\n', data_end); + if (end) { + data_end = end - rec; + } + + cmp_val = memcmp(rec, ref_name, min(ref_len, data_end)); + + if (cmp_val == 0 && data_end != ref_len) { + return (data_end > ref_len) ? 1 : -1; + } + return cmp_val; +} + +static int packed_unsorted_lookup( + git_reference **out, + refdb_fs_backend *backend, + const char *ref_name) { int error = 0; struct packref *entry; @@ -494,6 +646,86 @@ static int packed_lookup( return error; } +static int packed_lookup( + git_reference **out, + refdb_fs_backend *backend, + const char *ref_name) +{ + int error = 0; + const char *left, *right, *data_end; + + if ((error = packed_map_check(backend)) < 0) + return error; + + if (!backend->sorted) { + return packed_unsorted_lookup(out, backend, ref_name); + } + + left = backend->packed_refs_map.data; + right = data_end = ((const char *)backend->packed_refs_map.data) + + backend->packed_refs_map.len; + + while (left < right && *left == '#') { + if (!(left = memchr(left, '\n', data_end - left))) + goto parse_failed; + left++; + } + + while (left < right) { + const char *mid, *rec; + int compare; + + mid = left + (right - left) / 2; + rec = start_of_record(left, mid); + compare = cmp_record_to_refname(rec, data_end - rec, ref_name); + + if (compare < 0) { + left = end_of_record(mid, right); + } else if (compare > 0) { + right = rec; + } else { + const char *eol; + git_oid oid, peel, *peel_ptr = NULL; + + if (data_end - rec < GIT_OID_HEXSZ || + git_oid_fromstr(&oid, rec) < 0) { + goto parse_failed; + } + rec += GIT_OID_HEXSZ + 1; + if (!(eol = memchr(rec, '\n', data_end - rec))) { + goto parse_failed; + } + + /* look for optional "^\n" */ + + if (eol + 1 < data_end) { + rec = eol + 1; + + if (*rec == '^') { + rec++; + if (data_end - rec < GIT_OID_HEXSZ || + git_oid_fromstr(&peel, rec) < 0) { + goto parse_failed; + } + peel_ptr = &peel; + } + } + + *out = git_reference__alloc(ref_name, &oid, peel_ptr); + if (!*out) { + return -1; + } + + return 0; + } + } + return GIT_ENOTFOUND; + +parse_failed: + git_error_set(GIT_ERROR_REFERENCE, "corrupted packed references file"); + return -1; +} + static int refdb_fs_backend__lookup( git_reference **out, git_refdb_backend *_backend, @@ -513,7 +745,6 @@ static int refdb_fs_backend__lookup( git_error_clear(); error = packed_lookup(out, backend, ref_name); } - return error; } @@ -1081,6 +1312,18 @@ static int packed_write(refdb_fs_backend *backend) int error, open_flags = 0; size_t i; + /* take lock and close up packed-refs mmap if open */ + if ((error = git_mutex_lock(&backend->prlock)) < 0) { + return error; + } + + if (backend->packed_refs_map.data) { + git_futils_mmap_free(&backend->packed_refs_map); + backend->packed_refs_map.data = NULL; + } + + git_mutex_unlock(&backend->prlock); + /* lock the cache to updates while we do this */ if ((error = git_sortedcache_wlock(refcache)) < 0) return error; @@ -1568,6 +1811,15 @@ static void refdb_fs_backend__free(git_refdb_backend *_backend) return; git_sortedcache_free(backend->refcache); + + git_mutex_lock(&backend->prlock); + if (backend->packed_refs_map.data) { + git_futils_mmap_free(&backend->packed_refs_map); + backend->packed_refs_map.data = NULL; + } + git_mutex_unlock(&backend->prlock); + git_mutex_free(&backend->prlock); + git__free(backend->gitpath); git__free(backend->commonpath); git__free(backend); @@ -2121,6 +2373,11 @@ int git_refdb_backend_fs( backend = git__calloc(1, sizeof(refdb_fs_backend)); GIT_ERROR_CHECK_ALLOC(backend); + if (git_mutex_init(&backend->prlock) < 0) { + git__free(backend); + return -1; + } + if (git_refdb_init_backend(&backend->parent, GIT_REFDB_BACKEND_VERSION) < 0) goto fail; @@ -2183,6 +2440,7 @@ int git_refdb_backend_fs( return 0; fail: + git_mutex_free(&backend->prlock); git_str_dispose(&gitpath); git__free(backend->gitpath); git__free(backend->commonpath); diff --git a/tests/refs/crashes.c b/tests/refs/crashes.c index 228f479a0..4f508aed2 100644 --- a/tests/refs/crashes.c +++ b/tests/refs/crashes.c @@ -1,4 +1,5 @@ #include "clar_libgit2.h" +#include "refs.h" void test_refs_crashes__double_free(void) { @@ -18,3 +19,26 @@ void test_refs_crashes__double_free(void) cl_git_sandbox_cleanup(); } + +void test_refs_crashes__empty_packedrefs(void) +{ + git_repository *repo; + git_reference *ref; + const char *REFNAME = "refs/heads/xxx"; + git_str temp_path = GIT_STR_INIT; + int fd = 0; + + repo = cl_git_sandbox_init("empty_bare.git"); + + /* create zero-length packed-refs file */ + cl_git_pass(git_str_joinpath(&temp_path, git_repository_path(repo), GIT_PACKEDREFS_FILE)); + cl_git_pass(((fd = p_creat(temp_path.ptr, 0644)) < 0)); + cl_git_pass(p_close(fd)); + + /* should fail gracefully */ + cl_git_fail_with( + GIT_ENOTFOUND, git_reference_lookup(&ref, repo, REFNAME)); + + cl_git_sandbox_cleanup(); + git_str_dispose(&temp_path); +} diff --git a/tests/resources/peeled.git/packed-refs b/tests/resources/peeled.git/packed-refs index ad053d550..078f237ed 100644 --- a/tests/resources/peeled.git/packed-refs +++ b/tests/resources/peeled.git/packed-refs @@ -1,4 +1,4 @@ -# pack-refs with: peeled fully-peeled +# pack-refs with: peeled fully-peeled sorted c2596aa0151888587ec5c0187f261e63412d9e11 refs/foo/tag-outside-tags ^0df1a5865c8abfc09f1f2182e6a31be550e99f07 0df1a5865c8abfc09f1f2182e6a31be550e99f07 refs/heads/master diff --git a/tests/resources/testrepo.git/packed-refs b/tests/resources/testrepo.git/packed-refs index 52f5e876f..fadd7e88a 100644 --- a/tests/resources/testrepo.git/packed-refs +++ b/tests/resources/testrepo.git/packed-refs @@ -1,3 +1,3 @@ -# pack-refs with: peeled +# pack-refs with: peeled sorted 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test diff --git a/tests/resources/testrepo/.gitted/packed-refs b/tests/resources/testrepo/.gitted/packed-refs index 6018a19d2..b57898695 100644 --- a/tests/resources/testrepo/.gitted/packed-refs +++ b/tests/resources/testrepo/.gitted/packed-refs @@ -1,4 +1,4 @@ -# pack-refs with: peeled +# pack-refs with: peeled sorted 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test b25fa35b38051e4ae45d4222e795f9df2e43f1d1 refs/tags/packed-tag diff --git a/tests/resources/testrepo2/.gitted/packed-refs b/tests/resources/testrepo2/.gitted/packed-refs index 97ea6a848..01de87173 100644 --- a/tests/resources/testrepo2/.gitted/packed-refs +++ b/tests/resources/testrepo2/.gitted/packed-refs @@ -1,4 +1,4 @@ -# pack-refs with: peeled fully-peeled +# pack-refs with: peeled fully-peeled sorted 36060c58702ed4c2a40832c51758d5344201d89a refs/remotes/origin/master 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/remotes/origin/packed 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/tags/v0.9