git-pickaxe: improve "best match" heuristics
Instead of comparing number of lines matched, look at the matched characters and count alnums, so that we do not pass blame on not-so-interesting lines, such as an empty line and a line that is indentation followed by a closing brace. Add an option --score-debug to show the score of each blame_entry while we cook this further on the "next" branch. Signed-off-by: Junio C Hamano <junkio@cox.net>maint
parent
1ca6ca876e
commit
5ff62c3002
|
@ -34,8 +34,7 @@ static int longest_file;
|
||||||
static int longest_author;
|
static int longest_author;
|
||||||
static int max_orig_digits;
|
static int max_orig_digits;
|
||||||
static int max_digits;
|
static int max_digits;
|
||||||
|
static int max_score_digits;
|
||||||
#define DEBUG 0
|
|
||||||
|
|
||||||
#define PICKAXE_BLAME_MOVE 01
|
#define PICKAXE_BLAME_MOVE 01
|
||||||
#define PICKAXE_BLAME_COPY 02
|
#define PICKAXE_BLAME_COPY 02
|
||||||
|
@ -78,6 +77,11 @@ struct blame_entry {
|
||||||
* suspect's file; internally all line numbers are 0 based.
|
* suspect's file; internally all line numbers are 0 based.
|
||||||
*/
|
*/
|
||||||
int s_lno;
|
int s_lno;
|
||||||
|
|
||||||
|
/* how significant this entry is -- cached to avoid
|
||||||
|
* scanning the lines over and over
|
||||||
|
*/
|
||||||
|
unsigned score;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct scoreboard {
|
struct scoreboard {
|
||||||
|
@ -215,9 +219,6 @@ static void process_u_diff(void *state_, char *line, unsigned long len)
|
||||||
struct chunk *chunk;
|
struct chunk *chunk;
|
||||||
int off1, off2, len1, len2, num;
|
int off1, off2, len1, len2, num;
|
||||||
|
|
||||||
if (DEBUG)
|
|
||||||
fprintf(stderr, "%.*s", (int) len, line);
|
|
||||||
|
|
||||||
num = state->ret->num;
|
num = state->ret->num;
|
||||||
if (len < 4 || line[0] != '@' || line[1] != '@') {
|
if (len < 4 || line[0] != '@' || line[1] != '@') {
|
||||||
if (state->hunk_in_pre_context && line[0] == ' ')
|
if (state->hunk_in_pre_context && line[0] == ' ')
|
||||||
|
@ -295,10 +296,6 @@ static struct patch *get_patch(struct origin *parent, struct origin *origin)
|
||||||
char *blob_p, *blob_o;
|
char *blob_p, *blob_o;
|
||||||
struct patch *patch;
|
struct patch *patch;
|
||||||
|
|
||||||
if (DEBUG) fprintf(stderr, "get patch %.8s %.8s\n",
|
|
||||||
sha1_to_hex(parent->commit->object.sha1),
|
|
||||||
sha1_to_hex(origin->commit->object.sha1));
|
|
||||||
|
|
||||||
blob_p = read_sha1_file(parent->blob_sha1, type,
|
blob_p = read_sha1_file(parent->blob_sha1, type,
|
||||||
(unsigned long *) &file_p.size);
|
(unsigned long *) &file_p.size);
|
||||||
blob_o = read_sha1_file(origin->blob_sha1, type,
|
blob_o = read_sha1_file(origin->blob_sha1, type,
|
||||||
|
@ -352,6 +349,7 @@ static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
|
||||||
memcpy(dst, src, sizeof(*src));
|
memcpy(dst, src, sizeof(*src));
|
||||||
dst->prev = p;
|
dst->prev = p;
|
||||||
dst->next = n;
|
dst->next = n;
|
||||||
|
dst->score = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *nth_line(struct scoreboard *sb, int lno)
|
static const char *nth_line(struct scoreboard *sb, int lno)
|
||||||
|
@ -448,7 +446,7 @@ static void split_blame(struct scoreboard *sb,
|
||||||
add_blame_entry(sb, new_entry);
|
add_blame_entry(sb, new_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG) {
|
if (1) { /* sanity */
|
||||||
struct blame_entry *ent;
|
struct blame_entry *ent;
|
||||||
int lno = 0, corrupt = 0;
|
int lno = 0, corrupt = 0;
|
||||||
|
|
||||||
|
@ -530,12 +528,6 @@ static int pass_blame_to_parent(struct scoreboard *sb,
|
||||||
for (i = 0; i < patch->num; i++) {
|
for (i = 0; i < patch->num; i++) {
|
||||||
struct chunk *chunk = &patch->chunks[i];
|
struct chunk *chunk = &patch->chunks[i];
|
||||||
|
|
||||||
if (DEBUG)
|
|
||||||
fprintf(stderr,
|
|
||||||
"plno = %d, tlno = %d, "
|
|
||||||
"same as parent up to %d, resync %d and %d\n",
|
|
||||||
plno, tlno,
|
|
||||||
chunk->same, chunk->p_next, chunk->t_next);
|
|
||||||
blame_chunk(sb, tlno, plno, chunk->same, target, parent);
|
blame_chunk(sb, tlno, plno, chunk->same, target, parent);
|
||||||
plno = chunk->p_next;
|
plno = chunk->p_next;
|
||||||
tlno = chunk->t_next;
|
tlno = chunk->t_next;
|
||||||
|
@ -547,14 +539,37 @@ static int pass_blame_to_parent(struct scoreboard *sb,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void copy_split_if_better(struct blame_entry best_so_far[3],
|
static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
|
||||||
|
{
|
||||||
|
unsigned score;
|
||||||
|
const char *cp, *ep;
|
||||||
|
|
||||||
|
if (e->score)
|
||||||
|
return e->score;
|
||||||
|
|
||||||
|
score = 0;
|
||||||
|
cp = nth_line(sb, e->lno);
|
||||||
|
ep = nth_line(sb, e->lno + e->num_lines);
|
||||||
|
while (cp < ep) {
|
||||||
|
unsigned ch = *((unsigned char *)cp);
|
||||||
|
if (isalnum(ch))
|
||||||
|
score++;
|
||||||
|
cp++;
|
||||||
|
}
|
||||||
|
e->score = score;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_split_if_better(struct scoreboard *sb,
|
||||||
|
struct blame_entry best_so_far[3],
|
||||||
struct blame_entry this[3])
|
struct blame_entry this[3])
|
||||||
{
|
{
|
||||||
if (!this[1].suspect)
|
if (!this[1].suspect)
|
||||||
return;
|
return;
|
||||||
if (best_so_far[1].suspect &&
|
if (best_so_far[1].suspect) {
|
||||||
(this[1].num_lines < best_so_far[1].num_lines))
|
if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
|
memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -596,7 +611,7 @@ static void find_copy_in_blob(struct scoreboard *sb,
|
||||||
tlno + ent->s_lno, plno,
|
tlno + ent->s_lno, plno,
|
||||||
chunk->same + ent->s_lno,
|
chunk->same + ent->s_lno,
|
||||||
parent);
|
parent);
|
||||||
copy_split_if_better(split, this);
|
copy_split_if_better(sb, split, this);
|
||||||
}
|
}
|
||||||
plno = chunk->p_next;
|
plno = chunk->p_next;
|
||||||
tlno = chunk->t_next;
|
tlno = chunk->t_next;
|
||||||
|
@ -699,7 +714,7 @@ static int find_copy_in_parent(struct scoreboard *sb,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
find_copy_in_blob(sb, ent, norigin, this, &file_p);
|
find_copy_in_blob(sb, ent, norigin, this, &file_p);
|
||||||
copy_split_if_better(split, this);
|
copy_split_if_better(sb, split, this);
|
||||||
}
|
}
|
||||||
if (split[1].suspect)
|
if (split[1].suspect)
|
||||||
split_blame(sb, split, ent);
|
split_blame(sb, split, ent);
|
||||||
|
@ -944,6 +959,7 @@ static void get_commit_info(struct commit *commit,
|
||||||
#define OUTPUT_PORCELAIN 010
|
#define OUTPUT_PORCELAIN 010
|
||||||
#define OUTPUT_SHOW_NAME 020
|
#define OUTPUT_SHOW_NAME 020
|
||||||
#define OUTPUT_SHOW_NUMBER 040
|
#define OUTPUT_SHOW_NUMBER 040
|
||||||
|
#define OUTPUT_SHOW_SCORE 0100
|
||||||
|
|
||||||
static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
|
static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
|
||||||
{
|
{
|
||||||
|
@ -1016,6 +1032,8 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
|
||||||
show_raw_time),
|
show_raw_time),
|
||||||
ent->lno + 1 + cnt);
|
ent->lno + 1 + cnt);
|
||||||
else {
|
else {
|
||||||
|
if (opt & OUTPUT_SHOW_SCORE)
|
||||||
|
printf(" %*d", max_score_digits, ent->score);
|
||||||
if (opt & OUTPUT_SHOW_NAME)
|
if (opt & OUTPUT_SHOW_NAME)
|
||||||
printf(" %-*.*s", longest_file, longest_file,
|
printf(" %-*.*s", longest_file, longest_file,
|
||||||
suspect->path);
|
suspect->path);
|
||||||
|
@ -1060,9 +1078,10 @@ static void output(struct scoreboard *sb, int option)
|
||||||
for (ent = sb->ent; ent; ent = ent->next) {
|
for (ent = sb->ent; ent; ent = ent->next) {
|
||||||
if (option & OUTPUT_PORCELAIN)
|
if (option & OUTPUT_PORCELAIN)
|
||||||
emit_porcelain(sb, ent);
|
emit_porcelain(sb, ent);
|
||||||
else
|
else {
|
||||||
emit_other(sb, ent, option);
|
emit_other(sb, ent, option);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int prepare_lines(struct scoreboard *sb)
|
static int prepare_lines(struct scoreboard *sb)
|
||||||
|
@ -1121,6 +1140,7 @@ static void find_alignment(struct scoreboard *sb, int *option)
|
||||||
{
|
{
|
||||||
int longest_src_lines = 0;
|
int longest_src_lines = 0;
|
||||||
int longest_dst_lines = 0;
|
int longest_dst_lines = 0;
|
||||||
|
unsigned largest_score = 0;
|
||||||
struct blame_entry *e;
|
struct blame_entry *e;
|
||||||
|
|
||||||
for (e = sb->ent; e; e = e->next) {
|
for (e = sb->ent; e; e = e->next) {
|
||||||
|
@ -1146,9 +1166,12 @@ static void find_alignment(struct scoreboard *sb, int *option)
|
||||||
num = e->lno + e->num_lines;
|
num = e->lno + e->num_lines;
|
||||||
if (longest_dst_lines < num)
|
if (longest_dst_lines < num)
|
||||||
longest_dst_lines = num;
|
longest_dst_lines = num;
|
||||||
|
if (largest_score < ent_score(sb, e))
|
||||||
|
largest_score = ent_score(sb, e);
|
||||||
}
|
}
|
||||||
max_orig_digits = lineno_width(longest_src_lines);
|
max_orig_digits = lineno_width(longest_src_lines);
|
||||||
max_digits = lineno_width(longest_dst_lines);
|
max_digits = lineno_width(longest_dst_lines);
|
||||||
|
max_score_digits = lineno_width(largest_score);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int has_path_in_work_tree(const char *path)
|
static int has_path_in_work_tree(const char *path)
|
||||||
|
@ -1209,6 +1232,8 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
|
||||||
tmp = top; top = bottom; bottom = tmp;
|
tmp = top; top = bottom; bottom = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (!strcmp("--score-debug", arg))
|
||||||
|
output_option |= OUTPUT_SHOW_SCORE;
|
||||||
else if (!strcmp("-f", arg) ||
|
else if (!strcmp("-f", arg) ||
|
||||||
!strcmp("--show-name", arg))
|
!strcmp("--show-name", arg))
|
||||||
output_option |= OUTPUT_SHOW_NAME;
|
output_option |= OUTPUT_SHOW_NAME;
|
||||||
|
|
Loading…
Reference in New Issue