diff --git a/src/cli/cmd_blame.c b/src/cli/cmd_blame.c index 180a948f9..1b3deb727 100644 --- a/src/cli/cmd_blame.c +++ b/src/cli/cmd_blame.c @@ -95,6 +95,7 @@ static int print_standard(git_blame *blame) int ret = 0; /* Compute the maximum size of things */ + /* for (i = 0; i < git_blame_hunkcount(blame); i++) { const git_blame_hunk *hunk = git_blame_hunk_byindex(blame, i); size_t hunk_author_len = strlen(hunk->orig_signature->name); @@ -124,9 +125,12 @@ static int print_standard(git_blame *blame) } max_lineno_len = strintlen(max_line_number); + */ + max_lineno_len = 42, max_line_len = 42, max_author_len = 42, max_path_len = 42; max_author_len--; +printf("%d\n", git_blame_linecount(blame)); for (i = 1; i < git_blame_linecount(blame); i++) { const git_blame_hunk *hunk = git_blame_hunk_byline(blame, i); int oid_abbrev; diff --git a/src/libgit2/blame.c b/src/libgit2/blame.c index 4f99de69b..e5a39d447 100644 --- a/src/libgit2/blame.c +++ b/src/libgit2/blame.c @@ -7,205 +7,630 @@ #include "blame.h" -#include "git2/commit.h" -#include "git2/revparse.h" -#include "git2/revwalk.h" -#include "git2/tree.h" -#include "git2/diff.h" +#include "commit.h" +#include "reader.h" +#include "tree.h" + #include "git2/blob.h" -#include "git2/signature.h" -#include "git2/mailmap.h" -#include "util.h" -#include "repository.h" -#include "blame_git.h" +#include "git2/revwalk.h" -static int hunk_byfinalline_search_cmp(const void *key, const void *entry) +GIT_HASHMAP_OID_FUNCTIONS(git_blame_contributormap, GIT_HASHMAP_INLINE, git_commit *); + +int git_blame_options_init(git_blame_options *opts, unsigned int version) { - git_blame_hunk *hunk = (git_blame_hunk*)entry; - - size_t lineno = *(size_t*)key; - size_t lines_in_hunk = hunk->lines_in_hunk; - size_t final_start_line_number = hunk->final_start_line_number; - - if (lineno < final_start_line_number) - return -1; - if (lineno >= final_start_line_number + lines_in_hunk) - return 1; + GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version, + git_blame_options, GIT_BLAME_OPTIONS_INIT); return 0; } -static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); } -static int hunk_cmp(const void *_a, const void *_b) +static int normalize_options( + git_blame_options *out, + const git_blame_options *in) { - git_blame_hunk *a = (git_blame_hunk*)_a, - *b = (git_blame_hunk*)_b; + git_blame_options default_opts = GIT_BLAME_OPTIONS_INIT; - if (a->final_start_line_number > b->final_start_line_number) - return 1; - else if (a->final_start_line_number < b->final_start_line_number) - return -1; - else - return 0; + memcpy(out, in ? in : &default_opts, sizeof(git_blame_options)); + + return 0; } -static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line) -{ - return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1); -} - -static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line) -{ - return line <= hunk->final_start_line_number; -} - -static git_blame_hunk *new_hunk( - size_t start, - size_t lines, - size_t orig_start, - const char *path, - git_blame *blame) -{ - git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk)); - if (!hunk) return NULL; - - hunk->lines_in_hunk = lines; - hunk->final_start_line_number = start; - hunk->orig_start_line_number = orig_start; - hunk->orig_path = path ? git__strdup(path) : NULL; - git_oid_clear(&hunk->orig_commit_id, blame->repository->oid_type); - git_oid_clear(&hunk->final_commit_id, blame->repository->oid_type); - - return hunk; -} - -static void free_hunk(git_blame_hunk *hunk) -{ - git__free((char *)hunk->orig_path); - git__free((char *)hunk->summary); - git_signature_free(hunk->final_signature); - git_signature_free(hunk->final_committer); - git_signature_free(hunk->orig_signature); - git_signature_free(hunk->orig_committer); - git__free(hunk); -} - -static git_blame_hunk *dup_hunk(git_blame_hunk *hunk, git_blame *blame) -{ - git_blame_hunk *newhunk = new_hunk( - hunk->final_start_line_number, - hunk->lines_in_hunk, - hunk->orig_start_line_number, - hunk->orig_path, - blame); - - if (!newhunk) - return NULL; - - git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id); - git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id); - newhunk->boundary = hunk->boundary; - - if (git_signature_dup(&newhunk->final_signature, hunk->final_signature) < 0 || - git_signature_dup(&newhunk->final_committer, hunk->final_committer) < 0 || - git_signature_dup(&newhunk->orig_signature, hunk->orig_signature) < 0 || - git_signature_dup(&newhunk->orig_committer, hunk->orig_committer) < 0 || - (newhunk->summary = git__strdup(hunk->summary)) == NULL) { - free_hunk(newhunk); - return NULL; - } - - return newhunk; -} - -/* Starting with the hunk that includes start_line, shift all following hunks' - * final_start_line by shift_by lines */ -static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by) -{ - size_t i; - for (i = 0; i < v->length; i++) { - git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i]; - if(hunk->final_start_line_number < start_line){ - continue; - } - hunk->final_start_line_number += shift_by; - } -} - -git_blame *git_blame__alloc( +static git_blame *blame_alloc( git_repository *repo, - git_blame_options opts, + git_blame_options *given_opts, const char *path) { - git_blame *gbr = git__calloc(1, sizeof(git_blame)); - if (!gbr) + git_blame *blame; + + if ((blame = git__calloc(1, sizeof(git_blame))) == NULL) return NULL; - gbr->repository = repo; - gbr->options = opts; + blame->repository = repo; - if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 || - git_vector_init(&gbr->paths, 8, paths_cmp) < 0 || - (gbr->path = git__strdup(path)) == NULL || - git_vector_insert(&gbr->paths, git__strdup(path)) < 0) { - git_blame_free(gbr); + if (normalize_options(&blame->options, given_opts) < 0 || + (blame->path = git__strdup(path)) == NULL) { + git_blame_free(blame); return NULL; } - if (opts.flags & GIT_BLAME_USE_MAILMAP && - git_mailmap_from_repository(&gbr->mailmap, repo) < 0) { - git_blame_free(gbr); - return NULL; - } - - return gbr; + return blame; } -void git_blame_free(git_blame *blame) +struct diff_line_data { + git_blame *blame; + git_commit *commit; + bool has_changes; + bool reassigned; +}; + +static int diff_line_cb( + const git_diff_delta *delta_diff, + const git_diff_hunk *hunk_diff, + const git_diff_line *line_diff, + void *payload) { + struct diff_line_data *diff_line_data = payload; + git_blame *blame = diff_line_data->blame; + git_blame_line_candidate *line; + + GIT_UNUSED(delta_diff); + GIT_UNUSED(hunk_diff); + + /* printf("%d\n", line_diff->new_lineno); */ + + diff_line_data->has_changes = true; + + /* Ignore deletions. */ + if (line_diff->new_lineno < 0) + return 0; + + GIT_ASSERT(line_diff->new_lineno <= (int)blame->lines.size); + + /* printf("%c / %d / %d / %.*s", line_diff->origin, line_diff->old_lineno, line_diff->new_lineno, (int)line_diff->content_len, line_diff->content); */ + + /* + * We've already assigned presumptive blame to the current commit, + * so here we're only interested in context lines, which are lines + * that are unchanged from the parent. A context line indicates + * that the blame doesn't belong to the current commit, but to this + * parentage. We'll reassign it to this parent and then continue. + */ + if (line_diff->origin != GIT_DIFF_LINE_CONTEXT) + return 0; + + line = git_array_get(blame->lines, (size_t)(line_diff->new_lineno - 1)); + + /* printf("%c / %s\n", line->definitive ? '!' : '?', git_oid_tostr_s(git_commit_id(line->commit))); */ + + /* + * If the current line is already blamed, nothing to do. + */ + if (line->definitive) + return 0; + + /* + * Make sure that we're examining a presumptive commit and not + * something where we've already reassigned blame. + */ + if (line->commit == blame->current_commit) { + git_commit_free(line->commit); + git_commit_dup(&line->commit, diff_line_data->commit); + + diff_line_data->reassigned = 1; + } + + return 0; +} + +static int setup_contents_lines(git_blame *blame) +{ + const char *start, *p; + size_t remain = blame->contents_len; + git_blame_line_candidate *line; + + /* + * Set up the lines - we are the presumptive blame for all + * changes, and we will diff against our parents to reassign + * that presumptive blame to one of them, or take definitive + * ownership. + */ + for (start = p = blame->contents, remain = blame->contents_len; + remain > 0; + p++, remain--) { + if (*p == '\n') { + if ((line = git_array_alloc(blame->lines)) == NULL) + return -1; + + if (git_commit_dup(&line->commit, blame->current_commit) < 0) + return -1; + + line->definitive = 0; + line->contents = start; + line->contents_len = p - start; + + start = remain ? p + 1 : NULL; + } + } + + /* TODO: test no trailing newline */ + if (start != p) { + if ((line = git_array_alloc(blame->lines)) == NULL) + return -1; + + if (git_commit_dup(&line->commit, blame->current_commit) < 0) + return -1; + + line->definitive = 0; + line->contents = start; + line->contents_len = p - start; + } + + /* + * diff's line callback uses ints for line numbers + */ + if (blame->lines.size >= INT_MAX) { + git_error_set(GIT_ERROR_INVALID, "file is too large to blame"); + return -1; + } + + return 0; +} + +static int mark_as_contributor(git_blame *blame, git_commit *commit) +{ + git_commit *dup = NULL; + + if (git_commit_dup(&dup, commit) < 0 || + git_blame_contributormap_put(&blame->contributors, git_commit_id(dup), dup) < 0) { + git_commit_free(dup); + return -1; + } + + return 0; +} + +static int setup_blame_from_buf(git_blame *blame, git_str *buf) +{ + git_commit *fake_commit = NULL; + git_reference *head = NULL, *head_resolved = NULL; + git_oid *fake_parent; + int error = -1; + + if (git_repository_head(&head, blame->repository) < 0 || + git_reference_resolve(&head_resolved, head) < 0) + goto done; + + fake_commit = git__calloc(1, sizeof(git_commit)); + GIT_ERROR_CHECK_ALLOC(fake_commit); + + fake_parent = git_array_alloc(fake_commit->parent_ids); + GIT_ERROR_CHECK_ALLOC(fake_parent); + git_oid_cpy(fake_parent, git_reference_target(head_resolved)); + + git_oid_clear(&fake_commit->object.cached.oid, blame->repository->oid_type); + + fake_commit->object.cached.type = GIT_OBJECT_COMMIT; + fake_commit->object.repo = blame->repository; + + if (git_commit_dup(&blame->current_commit, fake_commit) < 0 || + mark_as_contributor(blame, fake_commit) < 0) + goto done; + + git_str_swap(&blame->contents_buf, buf); + blame->contents = blame->contents_buf.ptr; + blame->contents_len = blame->contents_buf.size; + + error = setup_contents_lines(blame); + +done: + git_commit_free(fake_commit); + git_reference_free(head_resolved); + git_reference_free(head); + return error; +} + +static int setup_blame_from_head(git_blame *blame) +{ + git_oid commit_id; + git_commit *commit = NULL; + git_tree *tree = NULL; + git_tree_entry *tree_entry = NULL; + git_blob *blob = NULL; + int error = -1; + + if (git_revwalk_next(&commit_id, blame->revwalk) < 0 || + git_commit_lookup(&commit, blame->repository, &commit_id) < 0 || + git_commit_dup(&blame->current_commit, commit) < 0 || + git_commit_tree(&tree, commit) < 0 || + git_tree_entry_bypath(&tree_entry, tree, blame->path) < 0 || + git_blob_lookup(&blob, blame->repository, &tree_entry->oid) < 0 || + git_blob_dup(&blame->contents_blob, blob) < 0 || + mark_as_contributor(blame, commit) < 0) + goto done; + + blame->contents = git_blob_rawcontent(blame->contents_blob); + blame->contents_len = git_blob_rawsize(blame->contents_blob); + + error = setup_contents_lines(blame); + +done: + git_blob_free(blob); + git_tree_entry_free(tree_entry); + git_tree_free(tree); + git_commit_free(commit); + return error; +} + +static int compare_to_parent( + bool *is_unchanged, + bool *has_reassigned, + git_blame *blame, + git_commit *parent) +{ + git_tree *current_tree = NULL, *parent_tree = NULL; + git_tree_entry *current_tree_entry = NULL, *parent_tree_entry = NULL; + git_blob *current_blob = NULL, *parent_blob = NULL; + git_diff_options diff_options = GIT_DIFF_OPTIONS_INIT; + struct diff_line_data diff_line_data; + const char *path = blame->path; + int error = -1; + + /* TODO: move options into blame so that we don't set them up over and over again */ + diff_options.context_lines = UINT32_MAX; + + diff_line_data.blame = blame; + diff_line_data.commit = parent; + diff_line_data.has_changes = false; + diff_line_data.reassigned = false; + + if (git_commit_tree(&parent_tree, parent) < 0) + goto done; + + /* TODO: handle renames */ + if ((error = git_tree_entry_bypath(&parent_tree_entry, parent_tree, blame->path)) < 0) { + if (error == GIT_ENOTFOUND) + error = 0; + + goto done; + } + + if ((error = git_blob_lookup(&parent_blob, blame->repository, &parent_tree_entry->oid)) < 0) + goto done; + + /* + * If the blob in the current commit is equal to the parent then + * we know all lines came from them; otherwise, we diff them. + */ + if (!git_oid_iszero(git_commit_id(blame->current_commit))) { + /* TODO: renames here too */ + if (git_commit_tree(¤t_tree, blame->current_commit) < 0 || + git_tree_entry_bypath(¤t_tree_entry, current_tree, blame->path) < 0 || + git_blob_lookup(¤t_blob, blame->repository, ¤t_tree_entry->oid) < 0) + goto done; + +printf("-- %s", git_oid_tostr_s(git_blob_id(current_blob))); printf(" %s\n", git_oid_tostr_s(git_blob_id(parent_blob))); + if (git_oid_equal(git_blob_id(current_blob), git_blob_id(parent_blob))) { + *is_unchanged = 1; + *has_reassigned = 0; + error = 0; + goto done; + } + } + + if ((error = git_diff_blob_to_buffer(parent_blob, path, + blame->contents, blame->contents_len, + blame->path, &diff_options, NULL, NULL, + NULL, diff_line_cb, &diff_line_data)) < 0) + goto done; + + *is_unchanged = !diff_line_data.has_changes; + *has_reassigned = diff_line_data.reassigned; + +done: + git_blob_free(current_blob); + git_blob_free(parent_blob); + git_tree_entry_free(current_tree_entry); + git_tree_entry_free(parent_tree_entry); + git_tree_free(current_tree); + git_tree_free(parent_tree); + + return error; +} + +static int pass_presumptive_blame(git_blame *blame, git_commit *parent) +{ + git_blame_line_candidate *line; size_t i; - git_blame_hunk *hunk; - if (!blame) return; + for (i = 0; i < blame->lines.size; i++) { + line = git_array_get(blame->lines, i); - git_vector_foreach(&blame->hunks, i, hunk) - free_hunk(hunk); + if (line->definitive) + continue; - git_vector_dispose(&blame->hunks); - git_array_clear(blame->lines); + if (line->commit == blame->current_commit) { + git_commit_free(line->commit); + git_commit_dup(&line->commit, parent); + } + } - git_vector_dispose_deep(&blame->paths); + return 0; +} - git_array_clear(blame->line_index); +static int take_definitive_blame(git_blame *blame) +{ + git_blame_line_candidate *line; + size_t i; - git_mailmap_free(blame->mailmap); + for (i = 0; i < blame->lines.size; i++) { + line = git_array_get(blame->lines, i); - git__free(blame->path); - git_blob_free(blame->final_blob); - git__free(blame); + if (line->commit == blame->current_commit) { + GIT_ASSERT(!line->definitive); + line->definitive = 1; + } + } + + return 0; +} + +static void dump_state(git_blame *blame) +{ + git_blame_line_candidate *line; + size_t i; + + for (i = 0; i < blame->lines.size; i++) { + line = git_array_get(blame->lines, i); + + printf("%ld %c %s %.*s\n", + i, + line->definitive ? '!' : '?', + git_oid_tostr_s(git_commit_id(line->commit)), + (int)line->contents_len, + line->contents); + } +} + +static int consider_current_commit(git_blame *blame) +{ + git_commit *this = NULL, *parent = NULL; + size_t i, parent_count; + int error = -1; + + /* + * If this commit does not actually contribute to the blame, don't + * bother looking at it. + */ + /* huh???? */ + /* + if (git_oidmap_get_and_delete((void **)&this, blame->contributors, + git_commit_id(blame->current_commit)) == GIT_ENOTFOUND) { + return 0; + } + */ + + printf("CONSIDERING CURRENT COMMIT\n"); + + /* TODO: honor first parent mode here? */ + parent_count = git_commit_parentcount(blame->current_commit); + + /* + * Compare to each parent - this will reassign presumptive blame + * for any lines that originated with them. + */ + for (i = 0; i < parent_count; i++) { + bool is_unchanged = false; + bool has_reassigned = false; + + /* printf(" EXAMINING PARENT: %d\n", (int)i); */ + + if (git_commit_parent(&parent, blame->current_commit, i) < 0 || + compare_to_parent(&is_unchanged, &has_reassigned, blame, parent) < 0) + goto done; + + /* + * If we were unchanged from this parent, then all the + * presumptive blame moves to them. + */ + if (is_unchanged) { + /* printf("UNCHANGED!\n"); */ + mark_as_contributor(blame, parent); + + error = pass_presumptive_blame(blame, parent); + goto done; + } + + /* Record this commit if it contributed. */ + if (has_reassigned) + mark_as_contributor(blame, parent); + + git_commit_free(parent); + parent = NULL; + + /* + * If this commit didn't contribute to the blame, + * don't follow it. + * + * TODO: drop the first-parent check - it should be + * contributing too! + */ + /* + if (!has_reassigned && i > 0) { + printf("HIDING: %s\n", git_oid_tostr_s(&blame->current_parents[i])); + git_revwalk_hide(blame->revwalk, &blame->current_parents[i]); + } + */ + } + + /* + * Take definitive ownership of any lines that our parents didn't + * touch. + */ + +/* printf("TAKING SOME OWNERSHIP\n");*/ + error = take_definitive_blame(blame); + +done: +/* printf("DONE ERROR IS: %d\n", error);*/ + git_commit_free(parent); + return error; +} + +/* TODO: coalesce with setup_from_head */ +static int move_next_commit(git_blame *blame) +{ + git_oid commit_id; + git_commit *commit = NULL; + int error = -1; + + git_commit_free(blame->current_commit); + blame->current_commit = NULL; + + /* TODO: lookup the blob and ignore seen blobs? */ + + if (git_revwalk_next(&commit_id, blame->revwalk) < 0 || + git_commit_lookup(&commit, blame->repository, &commit_id) < 0 || + git_commit_dup(&blame->current_commit, commit) < 0) + goto done; + + error = 0; + +done: + git_commit_free(commit); + return error; +} + +static int blame_file_from_buffer( + git_blame **out, + git_repository *repo, + const char *path, + git_str *contents_buf, + git_blame_options *options) +{ + git_blame *blame; + int error = -1; + + if ((blame = blame_alloc(repo, options, path)) == NULL) + goto on_error; + + /* TODO: commit boundaries */ + if (git_revwalk_new(&blame->revwalk, blame->repository) < 0 || + git_revwalk_sorting(blame->revwalk, GIT_SORT_TOPOLOGICAL) < 0 || + git_revwalk_push_head(blame->revwalk) < 0) + goto on_error; + + error = contents_buf ? + setup_blame_from_buf(blame, contents_buf) : + setup_blame_from_head(blame); + + do { + if ((error = consider_current_commit(blame)) < 0) { + if (error == GIT_ITEROVER) { + printf("DONE!\n"); + break; + } + + goto on_error; + } + + if (move_next_commit(blame) < 0) + goto on_error; + } while (git_blame_contributormap_size(&blame->contributors) > 0); + +/* printf("=========================================================\n"); */ + +dump_state(blame); + + if (error != GIT_ITEROVER) + goto on_error; + + *out = blame; + return 0; + +on_error: + git_blame_free(blame); + return error; +} + +int git_blame_file( + git_blame **out, + git_repository *repo, + const char *path, + git_blame_options *options) +{ + git_reader *reader = NULL; + git_str contents = GIT_STR_INIT; + int error = -1; + + /* + * TODO: need an option (like apply) to know whether we're + * looking at the workdir, the index, or HEAD. + */ + + if (git_reader_for_workdir(&reader, repo, false) < 0 || + git_reader_read(&contents, NULL, NULL, reader, path) < 0) + goto done; + + error = blame_file_from_buffer(out, repo, path, &contents, options); + +done: + git_str_dispose(&contents); + git_reader_free(reader); + return error; +} + +int git_blame_file_from_buffer( + git_blame **out, + git_repository *repo, + const char *path, + const char *contents, + size_t contents_len, + git_blame_options *options) +{ + git_str contents_buf = GIT_STR_INIT; + int error = -1; + + GIT_ASSERT_ARG(out); + GIT_ASSERT_ARG(repo); + GIT_ASSERT_ARG(path); + GIT_ASSERT_ARG(contents); + + if (git_str_put(&contents_buf, contents, contents_len) < 0) + goto done; + + error = blame_file_from_buffer(out, repo, path, &contents_buf, options); + +done: + git_str_dispose(&contents_buf); + return error; +} + +int git_blame_buffer( + git_blame **out, + git_blame *base, + const char *buffer, + size_t buffer_len) +{ + git_blame *blame; + + GIT_ASSERT_ARG(out); + GIT_ASSERT_ARG(base); + GIT_ASSERT_ARG(buffer || !buffer_len); + + if ((blame = blame_alloc(base->repository, &base->options, base->path)) == NULL) + return -1; + +if (1) +return -42; + + *out = blame; + return 0; } size_t git_blame_hunkcount(git_blame *blame) { GIT_ASSERT_ARG(blame); - return blame->hunks.length; -} - -size_t git_blame_linecount(git_blame *blame) -{ - GIT_ASSERT_ARG(blame); - - return git_array_size(blame->line_index); -} - -const git_blame_line *git_blame_line_byindex( - git_blame *blame, - size_t idx) -{ - GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL); - GIT_ASSERT_WITH_RETVAL(idx > 0 && idx <= git_array_size(blame->line_index), NULL); - - return git_array_get(blame->lines, idx - 1); + return 42; } const git_blame_hunk *git_blame_hunk_byindex( @@ -213,22 +638,17 @@ const git_blame_hunk *git_blame_hunk_byindex( size_t index) { GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL); - return git_vector_get(&blame->hunks, index); + + return (index == 0) ? NULL : NULL; } const git_blame_hunk *git_blame_hunk_byline( git_blame *blame, size_t lineno) { - size_t i, new_lineno = lineno; - GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL); - if (git_vector_bsearch2(&i, &blame->hunks, - hunk_byfinalline_search_cmp, &new_lineno) != 0) - return NULL; - - return git_blame_hunk_byindex(blame, i); + return (lineno == 0) ? NULL : NULL; } #ifndef GIT_DEPRECATE_HARD @@ -254,399 +674,35 @@ const git_blame_hunk *git_blame_get_hunk_byline( } #endif -static int normalize_options( - git_blame_options *out, - const git_blame_options *in, - git_repository *repo) +size_t git_blame_linecount(git_blame *blame) { - git_blame_options dummy = GIT_BLAME_OPTIONS_INIT; - if (!in) in = &dummy; + GIT_ASSERT_ARG(blame); - memcpy(out, in, sizeof(git_blame_options)); - - /* No newest_commit => HEAD */ - if (git_oid_is_zero(&out->newest_commit)) { - if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) { - return -1; - } - } - - /* min_line 0 really means 1 */ - if (!out->min_line) out->min_line = 1; - /* max_line 0 really means N, but we don't know N yet */ - - /* Fix up option implications */ - if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES) - out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES; - if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES) - out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES; - if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES) - out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE; - - return 0; + return git_array_size(blame->lines); } -static git_blame_hunk *split_hunk_in_vector( - git_vector *vec, - git_blame_hunk *hunk, - size_t rel_line, - bool return_new, - git_blame *blame) +const git_blame_line *git_blame_line_byindex(git_blame *blame, size_t idx) { - size_t new_line_count; - git_blame_hunk *nh; + GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL); - /* Don't split if already at a boundary */ - if (rel_line <= 0 || - rel_line >= hunk->lines_in_hunk) - { - return hunk; - } - - new_line_count = hunk->lines_in_hunk - rel_line; - nh = new_hunk(hunk->final_start_line_number + rel_line, - new_line_count, hunk->orig_start_line_number + rel_line, - hunk->orig_path, blame); - - if (!nh) - return NULL; - - git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id); - git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id); - - /* Adjust hunk that was split */ - hunk->lines_in_hunk -= new_line_count; - git_vector_insert_sorted(vec, nh, NULL); - { - git_blame_hunk *ret = return_new ? nh : hunk; - return ret; - } + /* TODO: fix the line_candidate struct to actually contain git_blame_lines instead */ + return (git_blame_line *)git_array_get(blame->lines, idx); } -/* - * Construct a list of char indices for where lines begin - * Adapted from core git: - * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789 - */ -static int index_blob_lines(git_blame *blame) +void git_blame_free(git_blame *blame) { - const char *buf = blame->final_buf; - size_t len = blame->final_buf_size; - int num = 0, incomplete = 0, bol = 1; - git_blame_line *line = NULL; - size_t *i; + git_commit *commit; + git_hashmap_iter_t iter = GIT_HASHMAP_ITER_INIT; - if (len && buf[len-1] != '\n') - incomplete++; /* incomplete line at the end */ + if (!blame) + return; - while (len--) { - if (bol) { - i = git_array_alloc(blame->line_index); - GIT_ERROR_CHECK_ALLOC(i); - *i = buf - blame->final_buf; + while (git_blame_contributormap_iterate(&iter, NULL, &commit, &blame->contributors) == 0) + git_commit_free(commit); - GIT_ASSERT(line == NULL); - line = git_array_alloc(blame->lines); - GIT_ERROR_CHECK_ALLOC(line); - - line->ptr = buf; - bol = 0; - } - - if (*buf++ == '\n') { - GIT_ASSERT(line); - line->len = (buf - line->ptr) - 1; - line = NULL; - - num++; - bol = 1; - } - } - - i = git_array_alloc(blame->line_index); - GIT_ERROR_CHECK_ALLOC(i); - *i = buf - blame->final_buf; - - if (!bol) { - GIT_ASSERT(line); - line->len = buf - line->ptr; - line = NULL; - } - - GIT_ASSERT(!line); - - blame->num_lines = num + incomplete; - return blame->num_lines; + git_blame_contributormap_dispose(&blame->contributors); + git_commit_free(blame->current_commit); + git_revwalk_free(blame->revwalk); + git_str_dispose(&blame->contents_buf); + git__free(blame); } - -static git_blame_hunk *hunk_from_entry(git_blame__entry *e, git_blame *blame) -{ - const char *summary; - git_blame_hunk *h = new_hunk( - e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path, - blame); - - if (!h) - return NULL; - - git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit)); - git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit)); - - if (git_commit_author_with_mailmap( - &h->final_signature, e->suspect->commit, blame->mailmap) < 0 || - git_commit_committer_with_mailmap( - &h->final_committer, e->suspect->commit, blame->mailmap) < 0 || - git_signature_dup(&h->orig_signature, h->final_signature) < 0 || - git_signature_dup(&h->orig_committer, h->final_committer) < 0 || - (summary = git_commit_summary(e->suspect->commit)) == NULL || - (h->summary = git__strdup(summary)) == NULL) { - free_hunk(h); - return NULL; - } - - h->boundary = e->is_boundary ? 1 : 0; - return h; -} - -static int load_blob(git_blame *blame) -{ - int error; - - if (blame->final_blob) return 0; - - error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit); - if (error < 0) - goto cleanup; - error = git_object_lookup_bypath((git_object**)&blame->final_blob, - (git_object*)blame->final, blame->path, GIT_OBJECT_BLOB); - -cleanup: - return error; -} - -static int blame_internal(git_blame *blame) -{ - int error; - git_blame__entry *ent = NULL; - git_blame__origin *o; - - if ((error = load_blob(blame)) < 0 || - (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0) - goto on_error; - - if (git_blob_rawsize(blame->final_blob) > SIZE_MAX) { - git_error_set(GIT_ERROR_NOMEMORY, "blob is too large to blame"); - error = -1; - goto on_error; - } - - blame->final_buf = git_blob_rawcontent(blame->final_blob); - blame->final_buf_size = (size_t)git_blob_rawsize(blame->final_blob); - - ent = git__calloc(1, sizeof(git_blame__entry)); - GIT_ERROR_CHECK_ALLOC(ent); - - ent->num_lines = index_blob_lines(blame); - ent->lno = blame->options.min_line - 1; - ent->num_lines = ent->num_lines - blame->options.min_line + 1; - if (blame->options.max_line > 0) - ent->num_lines = blame->options.max_line - blame->options.min_line + 1; - ent->s_lno = ent->lno; - ent->suspect = o; - - blame->ent = ent; - - if ((error = git_blame__like_git(blame, blame->options.flags)) < 0) - goto on_error; - - for (ent = blame->ent; ent; ent = ent->next) { - git_blame_hunk *h = hunk_from_entry(ent, blame); - git_vector_insert(&blame->hunks, h); - } - -on_error: - for (ent = blame->ent; ent; ) { - git_blame__entry *next = ent->next; - git_blame__free_entry(ent); - ent = next; - } - - return error; -} - -/******************************************************************************* - * File blaming - ******************************************************************************/ - -int git_blame_file( - git_blame **out, - git_repository *repo, - const char *path, - git_blame_options *options) -{ - int error = -1; - git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT; - git_blame *blame = NULL; - - GIT_ASSERT_ARG(out); - GIT_ASSERT_ARG(repo); - GIT_ASSERT_ARG(path); - - if ((error = normalize_options(&normOptions, options, repo)) < 0) - goto on_error; - - blame = git_blame__alloc(repo, normOptions, path); - GIT_ERROR_CHECK_ALLOC(blame); - - if ((error = load_blob(blame)) < 0) - goto on_error; - - if ((error = blame_internal(blame)) < 0) - goto on_error; - - *out = blame; - return 0; - -on_error: - git_blame_free(blame); - return error; -} - -/******************************************************************************* - * Buffer blaming - *******************************************************************************/ - -static bool hunk_is_bufferblame(git_blame_hunk *hunk) -{ - return hunk && git_oid_is_zero(&hunk->final_commit_id); -} - -static int buffer_hunk_cb( - const git_diff_delta *delta, - const git_diff_hunk *hunk, - void *payload) -{ - git_blame *blame = (git_blame*)payload; - uint32_t wedge_line; - - GIT_UNUSED(delta); - - wedge_line = (hunk->new_start >= hunk->old_start || hunk->old_lines==0) ? hunk->new_start : hunk->old_start; - blame->current_diff_line = wedge_line; - blame->current_hunk = (git_blame_hunk*)git_blame_hunk_byline(blame, wedge_line); - if (!blame->current_hunk) { - /* Line added at the end of the file */ - blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, - blame->path, blame); - blame->current_diff_line++; - GIT_ERROR_CHECK_ALLOC(blame->current_hunk); - git_vector_insert(&blame->hunks, blame->current_hunk); - } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){ - /* If this hunk doesn't start between existing hunks, split a hunk up so it does */ - blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk, - wedge_line - blame->current_hunk->final_start_line_number, true, - blame); - GIT_ERROR_CHECK_ALLOC(blame->current_hunk); - } - - return 0; -} - -static int ptrs_equal_cmp(const void *a, const void *b) { return ab ? 1 : 0; } -static int buffer_line_cb( - const git_diff_delta *delta, - const git_diff_hunk *hunk, - const git_diff_line *line, - void *payload) -{ - git_blame *blame = (git_blame*)payload; - - GIT_UNUSED(delta); - GIT_UNUSED(hunk); - GIT_UNUSED(line); - - if (line->origin == GIT_DIFF_LINE_ADDITION) { - if (hunk_is_bufferblame(blame->current_hunk) && - hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) { - /* Append to the current buffer-blame hunk */ - blame->current_hunk->lines_in_hunk++; - shift_hunks_by(&blame->hunks, blame->current_diff_line, 1); - } else { - /* Create a new buffer-blame hunk with this line */ - shift_hunks_by(&blame->hunks, blame->current_diff_line, 1); - blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path, blame); - GIT_ERROR_CHECK_ALLOC(blame->current_hunk); - git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL); - } - blame->current_diff_line++; - } - - if (line->origin == GIT_DIFF_LINE_DELETION) { - /* Trim the line from the current hunk; remove it if it's now empty */ - size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk; - - if (--(blame->current_hunk->lines_in_hunk) == 0) { - size_t i; - size_t i_next; - if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) { - git_vector_remove(&blame->hunks, i); - free_hunk(blame->current_hunk); - i_next = min( i , blame->hunks.length -1); - blame->current_hunk = (git_blame_hunk*)git_blame_hunk_byindex(blame, (uint32_t)i_next); - } - } - shift_hunks_by(&blame->hunks, shift_base, -1); - } - return 0; -} - -int git_blame_buffer( - git_blame **out, - git_blame *reference, - const char *buffer, - size_t buffer_len) -{ - git_blame *blame; - git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; - size_t i; - git_blame_hunk *hunk; - - diffopts.context_lines = 0; - - GIT_ASSERT_ARG(out); - GIT_ASSERT_ARG(reference); - GIT_ASSERT_ARG(buffer && buffer_len); - - blame = git_blame__alloc(reference->repository, reference->options, reference->path); - GIT_ERROR_CHECK_ALLOC(blame); - - /* Duplicate all of the hunk structures in the reference blame */ - git_vector_foreach(&reference->hunks, i, hunk) { - git_blame_hunk *h = dup_hunk(hunk, blame); - GIT_ERROR_CHECK_ALLOC(h); - - git_vector_insert(&blame->hunks, h); - } - - /* Diff to the reference blob */ - git_diff_blob_to_buffer(reference->final_blob, blame->path, - buffer, buffer_len, blame->path, &diffopts, - NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame); - - *out = blame; - return 0; -} - -int git_blame_options_init(git_blame_options *opts, unsigned int version) -{ - GIT_INIT_STRUCTURE_FROM_TEMPLATE( - opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT); - return 0; -} - -#ifndef GIT_DEPRECATE_HARD -int git_blame_init_options(git_blame_options *opts, unsigned int version) -{ - return git_blame_options_init(opts, version); -} -#endif diff --git a/src/libgit2/blame.h b/src/libgit2/blame.h index 152834ebb..5102286a6 100644 --- a/src/libgit2/blame.h +++ b/src/libgit2/blame.h @@ -1,96 +1,48 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ #ifndef INCLUDE_blame_h__ #define INCLUDE_blame_h__ #include "common.h" - -#include "git2/blame.h" -#include "vector.h" -#include "diff.h" #include "array.h" -#include "git2/oid.h" +#include "hashmap_oid.h" -/* - * One blob in a commit that is being suspected - */ -typedef struct git_blame__origin { - int refcnt; - struct git_blame__origin *previous; +GIT_HASHMAP_OID_STRUCT(git_blame_contributormap, git_commit *); + +typedef struct { + const char *contents; + size_t contents_len; git_commit *commit; - git_blob *blob; - char path[GIT_FLEX_ARRAY]; -} git_blame__origin; - -/* - * Each group of lines is described by a git_blame__entry; it can be split - * as we pass blame to the parents. They form a linked list in the - * scoreboard structure, sorted by the target line number. - */ -typedef struct git_blame__entry { - struct git_blame__entry *prev; - struct git_blame__entry *next; - - /* the first line of this group in the final image; - * internally all line numbers are 0 based. - */ - size_t lno; - - /* how many lines this group has */ - size_t num_lines; - - /* the commit that introduced this group into the final image */ - git_blame__origin *suspect; - - /* true if the suspect is truly guilty; false while we have not - * checked if the group came from one of its parents. - */ - bool guilty; - - /* true if the entry has been scanned for copies in the current parent - */ - bool scanned; - - /* the line number of the first line of this group in the - * suspect's file; internally all line numbers are 0 based. - */ - size_t s_lno; - - /* how significant this entry is -- cached to avoid - * scanning the lines over and over. - */ - unsigned score; - - /* Whether this entry has been tracked to a boundary commit. - */ - bool is_boundary; -} git_blame__entry; + unsigned int definitive; +} git_blame_line_candidate; struct git_blame { - char *path; git_repository *repository; - git_mailmap *mailmap; git_blame_options options; - git_vector hunks; - git_array_t(git_blame_line) lines; - git_vector paths; + char *path; - git_blob *final_blob; - git_array_t(size_t) line_index; + /* + * The contents of the final file (either the "newest" blob) + * or the contents of the working directory file. The contents + * pointer points to either the contents_buf or the contents_blob. + */ + const char *contents; + size_t contents_len; - size_t current_diff_line; - git_blame_hunk *current_hunk; + git_array_t(git_blame_line_candidate) lines; - /* Scoreboard fields */ - git_commit *final; - git_blame__entry *ent; - int num_lines; - const char *final_buf; - size_t final_buf_size; + git_str contents_buf; + git_blob *contents_blob; + + git_revwalk *revwalk; + git_blame_contributormap contributors; + + git_commit *current_commit; }; -git_blame *git_blame__alloc( - git_repository *repo, - git_blame_options opts, - const char *path); - #endif diff --git a/src/libgit2/blame_git.c b/src/libgit2/blame_git.c deleted file mode 100644 index 69897b386..000000000 --- a/src/libgit2/blame_git.c +++ /dev/null @@ -1,684 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ - -#include "blame_git.h" - -#include "commit.h" -#include "blob.h" -#include "diff_xdiff.h" - -/* - * Origin is refcounted and usually we keep the blob contents to be - * reused. - */ -static git_blame__origin *origin_incref(git_blame__origin *o) -{ - if (o) - o->refcnt++; - return o; -} - -static void origin_decref(git_blame__origin *o) -{ - if (o && --o->refcnt <= 0) { - if (o->previous) - origin_decref(o->previous); - git_blob_free(o->blob); - git_commit_free(o->commit); - git__free(o); - } -} - -/* Given a commit and a path in it, create a new origin structure. */ -static int make_origin(git_blame__origin **out, git_commit *commit, const char *path) -{ - git_blame__origin *o; - git_object *blob; - size_t path_len = strlen(path), alloc_len; - int error = 0; - - if ((error = git_object_lookup_bypath(&blob, (git_object*)commit, - path, GIT_OBJECT_BLOB)) < 0) - return error; - - GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len); - GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1); - o = git__calloc(1, alloc_len); - GIT_ERROR_CHECK_ALLOC(o); - - o->commit = commit; - o->blob = (git_blob *) blob; - o->refcnt = 1; - strcpy(o->path, path); - - *out = o; - - return 0; -} - -/* Locate an existing origin or create a new one. */ -int git_blame__get_origin( - git_blame__origin **out, - git_blame *blame, - git_commit *commit, - const char *path) -{ - git_blame__entry *e; - - for (e = blame->ent; e; e = e->next) { - if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) { - *out = origin_incref(e->suspect); - } - } - return make_origin(out, commit, path); -} - -typedef struct blame_chunk_cb_data { - git_blame *blame; - git_blame__origin *target; - git_blame__origin *parent; - long tlno; - long plno; -}blame_chunk_cb_data; - -static bool same_suspect(git_blame__origin *a, git_blame__origin *b) -{ - if (a == b) - return true; - if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit))) - return false; - return 0 == strcmp(a->path, b->path); -} - -/* find the line number of the last line the target is suspected for */ -static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target) -{ - git_blame__entry *e; - size_t last_in_target = 0; - bool found = false; - - *out = 0; - - for (e=blame->ent; e; e=e->next) { - if (e->guilty || !same_suspect(e->suspect, target)) - continue; - if (last_in_target < e->s_lno + e->num_lines) { - found = true; - last_in_target = e->s_lno + e->num_lines; - } - } - - *out = last_in_target; - return found; -} - -/* - * It is known that lines between tlno to same came from parent, and e - * has an overlap with that range. it also is known that parent's - * line plno corresponds to e's line tlno. - * - * <---- e -----> - * <------> (entirely within) - * <------------> (extends past) - * <------------> (starts before) - * <------------------> (entirely encloses) - * - * Split e into potentially three parts; before this chunk, the chunk - * to be blamed for the parent, and after that portion. - */ -static void split_overlap(git_blame__entry *split, git_blame__entry *e, - size_t tlno, size_t plno, size_t same, git_blame__origin *parent) -{ - size_t chunk_end_lno; - - if (e->s_lno < tlno) { - /* there is a pre-chunk part not blamed on the parent */ - split[0].suspect = origin_incref(e->suspect); - split[0].lno = e->lno; - split[0].s_lno = e->s_lno; - split[0].num_lines = tlno - e->s_lno; - split[1].lno = e->lno + tlno - e->s_lno; - split[1].s_lno = plno; - } else { - split[1].lno = e->lno; - split[1].s_lno = plno + (e->s_lno - tlno); - } - - if (same < e->s_lno + e->num_lines) { - /* there is a post-chunk part not blamed on parent */ - split[2].suspect = origin_incref(e->suspect); - split[2].lno = e->lno + (same - e->s_lno); - split[2].s_lno = e->s_lno + (same - e->s_lno); - split[2].num_lines = e->s_lno + e->num_lines - same; - chunk_end_lno = split[2].lno; - } else { - chunk_end_lno = e->lno + e->num_lines; - } - split[1].num_lines = chunk_end_lno - split[1].lno; - - /* - * if it turns out there is nothing to blame the parent for, forget about - * the splitting. !split[1].suspect signals this. - */ - if (split[1].num_lines < 1) - return; - split[1].suspect = origin_incref(parent); -} - -/* - * Link in a new blame entry to the scoreboard. Entries that cover the same - * line range have been removed from the scoreboard previously. - */ -static void add_blame_entry(git_blame *blame, git_blame__entry *e) -{ - git_blame__entry *ent, *prev = NULL; - - origin_incref(e->suspect); - - for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next) - prev = ent; - - /* prev, if not NULL, is the last one that is below e */ - e->prev = prev; - if (prev) { - e->next = prev->next; - prev->next = e; - } else { - e->next = blame->ent; - blame->ent = e; - } - if (e->next) - e->next->prev = e; -} - -/* - * src typically is on-stack; we want to copy the information in it to - * a malloced blame_entry that is already on the linked list of the scoreboard. - * The origin of dst loses a refcnt while the origin of src gains one. - */ -static void dup_entry(git_blame__entry *dst, git_blame__entry *src) -{ - git_blame__entry *p, *n; - - p = dst->prev; - n = dst->next; - origin_incref(src->suspect); - origin_decref(dst->suspect); - memcpy(dst, src, sizeof(*src)); - dst->prev = p; - dst->next = n; - dst->score = 0; -} - -/* - * split_overlap() divided an existing blame e into up to three parts in split. - * Adjust the linked list of blames in the scoreboard to reflect the split. - */ -static int split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e) -{ - git_blame__entry *new_entry; - - if (split[0].suspect && split[2].suspect) { - /* The first part (reuse storage for the existing entry e */ - dup_entry(e, &split[0]); - - /* The last part -- me */ - new_entry = git__malloc(sizeof(*new_entry)); - GIT_ERROR_CHECK_ALLOC(new_entry); - memcpy(new_entry, &(split[2]), sizeof(git_blame__entry)); - add_blame_entry(blame, new_entry); - - /* ... and the middle part -- parent */ - new_entry = git__malloc(sizeof(*new_entry)); - GIT_ERROR_CHECK_ALLOC(new_entry); - memcpy(new_entry, &(split[1]), sizeof(git_blame__entry)); - add_blame_entry(blame, new_entry); - } else if (!split[0].suspect && !split[2].suspect) { - /* - * The parent covers the entire area; reuse storage for e and replace it - * with the parent - */ - dup_entry(e, &split[1]); - } else if (split[0].suspect) { - /* me and then parent */ - dup_entry(e, &split[0]); - new_entry = git__malloc(sizeof(*new_entry)); - GIT_ERROR_CHECK_ALLOC(new_entry); - memcpy(new_entry, &(split[1]), sizeof(git_blame__entry)); - add_blame_entry(blame, new_entry); - } else { - /* parent and then me */ - dup_entry(e, &split[1]); - new_entry = git__malloc(sizeof(*new_entry)); - GIT_ERROR_CHECK_ALLOC(new_entry); - memcpy(new_entry, &(split[2]), sizeof(git_blame__entry)); - add_blame_entry(blame, new_entry); - } - - return 0; -} - -/* - * After splitting the blame, the origins used by the on-stack blame_entry - * should lose one refcnt each. - */ -static void decref_split(git_blame__entry *split) -{ - int i; - for (i=0; i<3; i++) - origin_decref(split[i].suspect); -} - -/* - * Helper for blame_chunk(). blame_entry e is known to overlap with the patch - * hunk; split it and pass blame to the parent. - */ -static int blame_overlap( - git_blame *blame, - git_blame__entry *e, - size_t tlno, - size_t plno, - size_t same, - git_blame__origin *parent) -{ - git_blame__entry split[3] = {{0}}; - - split_overlap(split, e, tlno, plno, same, parent); - if (split[1].suspect) - if (split_blame(blame, split, e) < 0) - return -1; - decref_split(split); - - return 0; -} - -/* - * Process one hunk from the patch between the current suspect for blame_entry - * e and its parent. Find and split the overlap, and pass blame to the - * overlapping part to the parent. - */ -static int blame_chunk( - git_blame *blame, - size_t tlno, - size_t plno, - size_t same, - git_blame__origin *target, - git_blame__origin *parent) -{ - git_blame__entry *e; - - for (e = blame->ent; e; e = e->next) { - if (e->guilty || !same_suspect(e->suspect, target)) - continue; - if (same <= e->s_lno) - continue; - if (tlno < e->s_lno + e->num_lines) { - if (blame_overlap(blame, e, tlno, plno, same, parent) < 0) - return -1; - } - } - - return 0; -} - -static int my_emit( - long start_a, long count_a, - long start_b, long count_b, - void *cb_data) -{ - blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data; - - if (blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent) < 0) - return -1; - d->plno = start_a + count_a; - d->tlno = start_b + count_b; - - return 0; -} - -static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx) -{ - const int blk = 1024; - long trimmed = 0, recovered = 0; - char *ap = a->ptr + a->size; - char *bp = b->ptr + b->size; - long smaller = (long)((a->size < b->size) ? a->size : b->size); - - if (ctx) - return; - - while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) { - trimmed += blk; - ap -= blk; - bp -= blk; - } - - while (recovered < trimmed) - if (ap[recovered++] == '\n') - break; - a->size -= trimmed - recovered; - b->size -= trimmed - recovered; -} - -static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data, git_blame_options *options) -{ - xdemitconf_t xecfg = {0}; - xdemitcb_t ecb = {0}; - xpparam_t xpp = {0}; - - if (options->flags & GIT_BLAME_IGNORE_WHITESPACE) - xpp.flags |= XDF_IGNORE_WHITESPACE; - - xecfg.hunk_func = my_emit; - ecb.priv = cb_data; - - trim_common_tail(&file_a, &file_b, 0); - - if (file_a.size > GIT_XDIFF_MAX_SIZE || - file_b.size > GIT_XDIFF_MAX_SIZE) { - git_error_set(GIT_ERROR_INVALID, "file too large to blame"); - return -1; - } - - return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb); -} - -static void fill_origin_blob(git_blame__origin *o, mmfile_t *file) -{ - memset(file, 0, sizeof(*file)); - if (o->blob) { - file->ptr = (char*)git_blob_rawcontent(o->blob); - file->size = (long)git_blob_rawsize(o->blob); - } -} - -static int pass_blame_to_parent( - git_blame *blame, - git_blame__origin *target, - git_blame__origin *parent) -{ - size_t last_in_target; - mmfile_t file_p, file_o; - blame_chunk_cb_data d = { blame, target, parent, 0, 0 }; - - if (!find_last_in_target(&last_in_target, blame, target)) - return 1; /* nothing remains for this target */ - - fill_origin_blob(parent, &file_p); - fill_origin_blob(target, &file_o); - - if (diff_hunks(file_p, file_o, &d, &blame->options) < 0) - return -1; - - /* The reset (i.e. anything after tlno) are the same as the parent */ - if (blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent) < 0) - return -1; - - return 0; -} - -static int paths_on_dup(void **old, void *new) -{ - GIT_UNUSED(old); - git__free(new); - return -1; -} - -static git_blame__origin *find_origin( - git_blame *blame, - git_commit *parent, - git_blame__origin *origin) -{ - git_blame__origin *porigin = NULL; - git_diff *difflist = NULL; - git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; - git_tree *otree=NULL, *ptree=NULL; - - /* Get the trees from this commit and its parent */ - if (0 != git_commit_tree(&otree, origin->commit) || - 0 != git_commit_tree(&ptree, parent)) - goto cleanup; - - /* Configure the diff */ - diffopts.context_lines = 0; - diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK; - - /* Check to see if files we're interested have changed */ - diffopts.pathspec.count = blame->paths.length; - diffopts.pathspec.strings = (char**)blame->paths.contents; - if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts)) - goto cleanup; - - if (!git_diff_num_deltas(difflist)) { - /* No changes; copy data */ - git_blame__get_origin(&porigin, blame, parent, origin->path); - } else { - git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT; - int i; - - /* Generate a full diff between the two trees */ - git_diff_free(difflist); - diffopts.pathspec.count = 0; - if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts)) - goto cleanup; - - /* Let diff find renames */ - findopts.flags = GIT_DIFF_FIND_RENAMES; - if (0 != git_diff_find_similar(difflist, &findopts)) - goto cleanup; - - /* Find one that matches */ - for (i=0; i<(int)git_diff_num_deltas(difflist); i++) { - const git_diff_delta *delta = git_diff_get_delta(difflist, i); - - if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path)) - { - git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path), - paths_on_dup); - make_origin(&porigin, parent, delta->old_file.path); - } - } - } - -cleanup: - git_diff_free(difflist); - git_tree_free(otree); - git_tree_free(ptree); - return porigin; -} - -/* - * The blobs of origin and porigin exactly match, so everything origin is - * suspected for can be blamed on the parent. - */ -static int pass_whole_blame(git_blame *blame, - git_blame__origin *origin, git_blame__origin *porigin) -{ - git_blame__entry *e; - - if (!porigin->blob && - git_object_lookup((git_object**)&porigin->blob, blame->repository, - git_blob_id(origin->blob), GIT_OBJECT_BLOB) < 0) - return -1; - for (e=blame->ent; e; e=e->next) { - if (!same_suspect(e->suspect, origin)) - continue; - origin_incref(porigin); - origin_decref(e->suspect); - e->suspect = porigin; - } - - return 0; -} - -static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt) -{ - git_commit *commit = origin->commit; - int i, num_parents; - git_blame__origin *sg_buf[16]; - git_blame__origin *porigin, **sg_origin = sg_buf; - int ret, error = 0; - - num_parents = git_commit_parentcount(commit); - if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit)) - /* Stop at oldest specified commit */ - num_parents = 0; - else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1) - /* Limit search to the first parent */ - num_parents = 1; - - if (!num_parents) { - git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit)); - goto finish; - } else if (num_parents < (int)ARRAY_SIZE(sg_buf)) - memset(sg_buf, 0, sizeof(sg_buf)); - else { - sg_origin = git__calloc(num_parents, sizeof(*sg_origin)); - GIT_ERROR_CHECK_ALLOC(sg_origin); - } - - for (i=0; icommit, i)) < 0) - goto finish; - porigin = find_origin(blame, p, origin); - - if (!porigin) { - /* - * We only have to decrement the parent's - * reference count when no porigin has - * been created, as otherwise the commit - * is assigned to the created object. - */ - git_commit_free(p); - continue; - } - if (porigin->blob && origin->blob && - !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) { - error = pass_whole_blame(blame, origin, porigin); - origin_decref(porigin); - goto finish; - } - for (j = same = 0; jblob), git_blob_id(porigin->blob))) { - same = 1; - break; - } - if (!same) - sg_origin[i] = porigin; - else - origin_decref(porigin); - } - - /* Standard blame */ - for (i=0; iprevious) { - origin_incref(porigin); - origin->previous = porigin; - } - - if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) { - if (ret < 0) - error = -1; - - goto finish; - } - } - - /* TODO: optionally find moves in parents' files */ - - /* TODO: optionally find copies in parents' files */ - -finish: - for (i=0; i pair), - * merge them together. - */ -static void coalesce(git_blame *blame) -{ - git_blame__entry *ent, *next; - - for (ent=blame->ent; ent && (next = ent->next); ent = next) { - if (same_suspect(ent->suspect, next->suspect) && - ent->guilty == next->guilty && - ent->s_lno + ent->num_lines == next->s_lno) - { - ent->num_lines += next->num_lines; - ent->next = next->next; - if (ent->next) - ent->next->prev = ent; - origin_decref(next->suspect); - git__free(next); - ent->score = 0; - next = ent; /* again */ - } - } -} - -int git_blame__like_git(git_blame *blame, uint32_t opt) -{ - int error = 0; - - while (true) { - git_blame__entry *ent; - git_blame__origin *suspect = NULL; - - /* Find a suspect to break down */ - for (ent = blame->ent; !suspect && ent; ent = ent->next) - if (!ent->guilty) - suspect = ent->suspect; - if (!suspect) - break; - - /* We'll use this suspect later in the loop, so hold on to it for now. */ - origin_incref(suspect); - - if ((error = pass_blame(blame, suspect, opt)) < 0) - break; - - /* Take responsibility for the remaining entries */ - for (ent = blame->ent; ent; ent = ent->next) { - if (same_suspect(ent->suspect, suspect)) { - ent->guilty = true; - ent->is_boundary = !git_oid_cmp( - git_commit_id(suspect->commit), - &blame->options.oldest_commit); - } - } - origin_decref(suspect); - } - - if (!error) - coalesce(blame); - - return error; -} - -void git_blame__free_entry(git_blame__entry *ent) -{ - if (!ent) return; - origin_decref(ent->suspect); - git__free(ent); -} diff --git a/tests/libgit2/blame/getters.c b/tests/libgit2/blame/getters.c deleted file mode 100644 index fc7e44445..000000000 --- a/tests/libgit2/blame/getters.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "clar_libgit2.h" - -#include "blame.h" - -git_blame *g_blame; - -void test_blame_getters__initialize(void) -{ - size_t i; - git_blame_options opts = GIT_BLAME_OPTIONS_INIT; - - git_blame_hunk hunks[] = { - { 3, GIT_OID_SHA1_ZERO, 1, NULL, NULL, GIT_OID_SHA1_ZERO, "a", 0}, - { 3, GIT_OID_SHA1_ZERO, 4, NULL, NULL, GIT_OID_SHA1_ZERO, "b", 0}, - { 3, GIT_OID_SHA1_ZERO, 7, NULL, NULL, GIT_OID_SHA1_ZERO, "c", 0}, - { 3, GIT_OID_SHA1_ZERO, 10, NULL, NULL, GIT_OID_SHA1_ZERO, "d", 0}, - { 3, GIT_OID_SHA1_ZERO, 13, NULL, NULL, GIT_OID_SHA1_ZERO, "e", 0}, - }; - - g_blame = git_blame__alloc(NULL, opts, ""); - - for (i=0; i<5; i++) { - git_blame_hunk *h = git__calloc(1, sizeof(git_blame_hunk)); - h->final_start_line_number = hunks[i].final_start_line_number; - h->orig_path = git__strdup(hunks[i].orig_path); - h->lines_in_hunk = hunks[i].lines_in_hunk; - - git_vector_insert(&g_blame->hunks, h); - } -} - -void test_blame_getters__cleanup(void) -{ - git_blame_free(g_blame); -} - - -void test_blame_getters__byindex(void) -{ - const git_blame_hunk *h = git_blame_hunk_byindex(g_blame, 2); - cl_assert(h); - cl_assert_equal_s(h->orig_path, "c"); - - h = git_blame_hunk_byindex(g_blame, 95); - cl_assert_equal_p(h, NULL); -} - -void test_blame_getters__byline(void) -{ - const git_blame_hunk *h = git_blame_hunk_byline(g_blame, 5); - cl_assert(h); - cl_assert_equal_s(h->orig_path, "b"); - - h = git_blame_hunk_byline(g_blame, 95); - cl_assert_equal_p(h, NULL); -} diff --git a/tests/libgit2/blame/simple.c b/tests/libgit2/blame/simple.c index ee6d5f866..50e0ece71 100644 --- a/tests/libgit2/blame/simple.c +++ b/tests/libgit2/blame/simple.c @@ -1,5 +1,7 @@ #include "blame_helpers.h" +#include "cache.h" + static git_repository *g_repo; static git_blame *g_blame;