|
|
|
/*
|
|
|
|
* Blame
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006, 2014 by its authors
|
|
|
|
* See COPYING for licensing conditions
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "git-compat-util.h"
|
|
|
|
#include "alloc.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "color.h"
|
|
|
|
#include "builtin.h"
|
|
|
|
#include "environment.h"
|
|
|
|
#include "gettext.h"
|
|
|
|
#include "hex.h"
|
|
|
|
#include "repository.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "quote.h"
|
|
|
|
#include "string-list.h"
|
|
|
|
#include "mailmap.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "prio-queue.h"
|
|
|
|
#include "utf8.h"
|
|
|
|
#include "userdiff.h"
|
|
|
|
#include "line-range.h"
|
|
|
|
#include "line-log.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "progress.h"
|
|
|
|
#include "object-name.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "pager.h"
|
|
|
|
#include "blame.h"
|
blame: default to HEAD in a bare repo when no start commit is given
When 'git blame' is invoked without specifying the commit to start
blaming from, it starts from the given file's state in the work tree.
However, when invoked in a bare repository without a start commit,
then there is no work tree state to start from, and it dies with the
following error message:
$ git rev-parse --is-bare-repository
true
$ git blame file.c
fatal: this operation must be run in a work tree
This is misleading, because it implies that 'git blame' doesn't work
in bare repositories at all, but it does, in fact, work just fine when
it is given a commit to start from.
We could improve the error message, of course, but let's just default
to HEAD in a bare repository instead, as most likely that is what the
user wanted anyway (if they wanted to start from an other commit, then
they would have specified that in the first place).
'git annotate' is just a thin wrapper around 'git blame', so in the
same situation it printed the same misleading error message, and this
patch fixes it, too.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
#include "refs.h"
|
|
|
|
#include "setup.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "write-or-die.h"
|
|
|
|
|
|
|
|
static char blame_usage[] = N_("git blame [<options>] [<rev-opts>] [<rev>] [--] <file>");
|
|
|
|
static char annotate_usage[] = N_("git annotate [<options>] [<rev-opts>] [<rev>] [--] <file>");
|
|
|
|
|
|
|
|
static const char *blame_opt_usage[] = {
|
|
|
|
blame_usage,
|
|
|
|
"",
|
|
|
|
N_("<rev-opts> are documented in git-rev-list(1)"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *annotate_opt_usage[] = {
|
|
|
|
annotate_usage,
|
|
|
|
"",
|
|
|
|
N_("<rev-opts> are documented in git-rev-list(1)"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static int longest_file;
|
|
|
|
static int longest_author;
|
|
|
|
static int max_orig_digits;
|
|
|
|
static int max_digits;
|
|
|
|
static int max_score_digits;
|
|
|
|
static int show_root;
|
|
|
|
static int reverse;
|
|
|
|
static int blank_boundary;
|
|
|
|
static int incremental;
|
|
|
|
static int xdl_opts;
|
|
|
|
static int abbrev = -1;
|
|
|
|
static int no_whole_file_rename;
|
|
|
|
static int show_progress;
|
|
|
|
static char repeated_meta_color[COLOR_MAXLEN];
|
|
|
|
static int coloring_mode;
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP;
|
blame: add config options for the output of ignored or unblamable lines
When ignoring commits, the commit that is blamed might not be
responsible for the change, due to the inaccuracy of our heuristic.
Users might want to know when a particular line has a potentially
inaccurate blame.
Furthermore, guess_line_blames() may fail to find any parent commit for
a given line touched by an ignored commit. Those 'unblamable' lines
remain blamed on an ignored commit. Users might want to know if a line
is unblamable so that they do not spend time investigating a commit they
know is uninteresting.
This patch adds two config options to mark these two types of lines in
the output of blame.
The first option can identify ignored lines by specifying
blame.markIgnoredLines. When this option is set, each blame line that
was blamed on a commit other than the ignored commit is marked with a
'?'.
For example:
278b6158d6fdb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
?278b6158d6fd (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
where the '?' is placed before the commit, and the hash has one fewer
characters.
Sometimes we are unable to even guess at what ancestor commit touched a
line. These lines are 'unblamable.' The second option,
blame.markUnblamableLines, will mark the line with '*'.
For example, say we ignore e5e8d36d04cbe, yet we are unable to blame
this line on another commit:
e5e8d36d04cbe (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
*e5e8d36d04cb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
When these config options are used together, every line touched by an
ignored commit will be marked with either a '?' or a '*'.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
static int mark_unblamable_lines;
|
|
|
|
static int mark_ignored_lines;
|
|
|
|
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static struct date_mode blame_date_mode = { DATE_ISO8601 };
|
|
|
|
static size_t blame_date_width;
|
|
|
|
|
|
|
|
static struct string_list mailmap = STRING_LIST_INIT_NODUP;
|
|
|
|
|
|
|
|
#ifndef DEBUG_BLAME
|
|
|
|
#define DEBUG_BLAME 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static unsigned blame_move_score;
|
|
|
|
static unsigned blame_copy_score;
|
|
|
|
|
|
|
|
/* Remember to update object flag allocation in object.h */
|
|
|
|
#define METAINFO_SHOWN (1u<<12)
|
|
|
|
#define MORE_THAN_ONE_PATH (1u<<13)
|
|
|
|
|
|
|
|
struct progress_info {
|
|
|
|
struct progress *progress;
|
|
|
|
int blamed_lines;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *nth_line_cb(void *data, long lno)
|
|
|
|
{
|
|
|
|
return blame_nth_line((struct blame_scoreboard *)data, lno);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Information on commits, used for output.
|
|
|
|
*/
|
|
|
|
struct commit_info {
|
|
|
|
struct strbuf author;
|
|
|
|
struct strbuf author_mail;
|
|
|
|
timestamp_t author_time;
|
|
|
|
struct strbuf author_tz;
|
|
|
|
|
|
|
|
/* filled only when asked for details */
|
|
|
|
struct strbuf committer;
|
|
|
|
struct strbuf committer_mail;
|
|
|
|
timestamp_t committer_time;
|
|
|
|
struct strbuf committer_tz;
|
|
|
|
|
|
|
|
struct strbuf summary;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define COMMIT_INFO_INIT { \
|
|
|
|
.author = STRBUF_INIT, \
|
|
|
|
.author_mail = STRBUF_INIT, \
|
|
|
|
.author_tz = STRBUF_INIT, \
|
|
|
|
.committer = STRBUF_INIT, \
|
|
|
|
.committer_mail = STRBUF_INIT, \
|
|
|
|
.committer_tz = STRBUF_INIT, \
|
|
|
|
.summary = STRBUF_INIT, \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse author/committer line in the commit object buffer
|
|
|
|
*/
|
|
|
|
static void get_ac_line(const char *inbuf, const char *what,
|
|
|
|
struct strbuf *name, struct strbuf *mail,
|
|
|
|
timestamp_t *time, struct strbuf *tz)
|
|
|
|
{
|
|
|
|
struct ident_split ident;
|
|
|
|
size_t len, maillen, namelen;
|
|
|
|
char *tmp, *endp;
|
|
|
|
const char *namebuf, *mailbuf;
|
|
|
|
|
|
|
|
tmp = strstr(inbuf, what);
|
|
|
|
if (!tmp)
|
|
|
|
goto error_out;
|
|
|
|
tmp += strlen(what);
|
|
|
|
endp = strchr(tmp, '\n');
|
|
|
|
if (!endp)
|
|
|
|
len = strlen(tmp);
|
|
|
|
else
|
|
|
|
len = endp - tmp;
|
|
|
|
|
|
|
|
if (split_ident_line(&ident, tmp, len)) {
|
|
|
|
error_out:
|
|
|
|
/* Ugh */
|
|
|
|
tmp = "(unknown)";
|
|
|
|
strbuf_addstr(name, tmp);
|
|
|
|
strbuf_addstr(mail, tmp);
|
|
|
|
strbuf_addstr(tz, tmp);
|
|
|
|
*time = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
namelen = ident.name_end - ident.name_begin;
|
|
|
|
namebuf = ident.name_begin;
|
|
|
|
|
|
|
|
maillen = ident.mail_end - ident.mail_begin;
|
|
|
|
mailbuf = ident.mail_begin;
|
|
|
|
|
|
|
|
if (ident.date_begin && ident.date_end)
|
|
|
|
*time = strtoul(ident.date_begin, NULL, 10);
|
|
|
|
else
|
|
|
|
*time = 0;
|
|
|
|
|
|
|
|
if (ident.tz_begin && ident.tz_end)
|
|
|
|
strbuf_add(tz, ident.tz_begin, ident.tz_end - ident.tz_begin);
|
|
|
|
else
|
|
|
|
strbuf_addstr(tz, "(unknown)");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, convert both name and e-mail using mailmap
|
|
|
|
*/
|
|
|
|
map_user(&mailmap, &mailbuf, &maillen,
|
|
|
|
&namebuf, &namelen);
|
|
|
|
|
|
|
|
strbuf_addf(mail, "<%.*s>", (int)maillen, mailbuf);
|
|
|
|
strbuf_add(name, namebuf, namelen);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void commit_info_destroy(struct commit_info *ci)
|
|
|
|
{
|
|
|
|
|
|
|
|
strbuf_release(&ci->author);
|
|
|
|
strbuf_release(&ci->author_mail);
|
|
|
|
strbuf_release(&ci->author_tz);
|
|
|
|
strbuf_release(&ci->committer);
|
|
|
|
strbuf_release(&ci->committer_mail);
|
|
|
|
strbuf_release(&ci->committer_tz);
|
|
|
|
strbuf_release(&ci->summary);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void get_commit_info(struct commit *commit,
|
|
|
|
struct commit_info *ret,
|
|
|
|
int detailed)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
const char *subject, *encoding;
|
|
|
|
const char *message;
|
|
|
|
|
|
|
|
encoding = get_log_output_encoding();
|
|
|
|
message = repo_logmsg_reencode(the_repository, commit, NULL, encoding);
|
|
|
|
get_ac_line(message, "\nauthor ",
|
|
|
|
&ret->author, &ret->author_mail,
|
|
|
|
&ret->author_time, &ret->author_tz);
|
|
|
|
|
|
|
|
if (!detailed) {
|
|
|
|
repo_unuse_commit_buffer(the_repository, commit, message);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
get_ac_line(message, "\ncommitter ",
|
|
|
|
&ret->committer, &ret->committer_mail,
|
|
|
|
&ret->committer_time, &ret->committer_tz);
|
|
|
|
|
|
|
|
len = find_commit_subject(message, &subject);
|
|
|
|
if (len)
|
|
|
|
strbuf_add(&ret->summary, subject, len);
|
|
|
|
else
|
|
|
|
strbuf_addf(&ret->summary, "(%s)", oid_to_hex(&commit->object.oid));
|
|
|
|
|
|
|
|
repo_unuse_commit_buffer(the_repository, commit, message);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
* Write out any suspect information which depends on the path. This must be
|
|
|
|
* handled separately from emit_one_suspect_detail(), because a given commit
|
|
|
|
* may have changes in multiple paths. So this needs to appear each time
|
|
|
|
* we mention a new group.
|
|
|
|
*
|
|
|
|
* To allow LF and other nonportable characters in pathnames,
|
|
|
|
* they are c-style quoted as needed.
|
|
|
|
*/
|
|
|
|
static void write_filename_info(struct blame_origin *suspect)
|
|
|
|
{
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (suspect->previous) {
|
|
|
|
struct blame_origin *prev = suspect->previous;
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
printf("previous %s ", oid_to_hex(&prev->commit->object.oid));
|
|
|
|
write_name_quoted(prev->path, stdout, '\n');
|
|
|
|
}
|
|
|
|
printf("filename ");
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
write_name_quoted(suspect->path, stdout, '\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Porcelain/Incremental format wants to show a lot of details per
|
|
|
|
* commit. Instead of repeating this every line, emit it only once,
|
|
|
|
* the first time each commit appears in the output (unless the
|
|
|
|
* user has specifically asked for us to repeat).
|
|
|
|
*/
|
|
|
|
static int emit_one_suspect_detail(struct blame_origin *suspect, int repeat)
|
|
|
|
{
|
|
|
|
struct commit_info ci = COMMIT_INFO_INIT;
|
|
|
|
|
|
|
|
if (!repeat && (suspect->commit->object.flags & METAINFO_SHOWN))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
suspect->commit->object.flags |= METAINFO_SHOWN;
|
|
|
|
get_commit_info(suspect->commit, &ci, 1);
|
|
|
|
printf("author %s\n", ci.author.buf);
|
|
|
|
printf("author-mail %s\n", ci.author_mail.buf);
|
|
|
|
printf("author-time %"PRItime"\n", ci.author_time);
|
|
|
|
printf("author-tz %s\n", ci.author_tz.buf);
|
|
|
|
printf("committer %s\n", ci.committer.buf);
|
|
|
|
printf("committer-mail %s\n", ci.committer_mail.buf);
|
|
|
|
printf("committer-time %"PRItime"\n", ci.committer_time);
|
|
|
|
printf("committer-tz %s\n", ci.committer_tz.buf);
|
|
|
|
printf("summary %s\n", ci.summary.buf);
|
|
|
|
if (suspect->commit->object.flags & UNINTERESTING)
|
|
|
|
printf("boundary\n");
|
|
|
|
|
|
|
|
commit_info_destroy(&ci);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The blame_entry is found to be guilty for the range.
|
|
|
|
* Show it in incremental output.
|
|
|
|
*/
|
|
|
|
static void found_guilty_entry(struct blame_entry *ent, void *data)
|
|
|
|
{
|
|
|
|
struct progress_info *pi = (struct progress_info *)data;
|
|
|
|
|
|
|
|
if (incremental) {
|
|
|
|
struct blame_origin *suspect = ent->suspect;
|
|
|
|
|
|
|
|
printf("%s %d %d %d\n",
|
|
|
|
oid_to_hex(&suspect->commit->object.oid),
|
|
|
|
ent->s_lno + 1, ent->lno + 1, ent->num_lines);
|
|
|
|
emit_one_suspect_detail(suspect, 0);
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
write_filename_info(suspect);
|
|
|
|
maybe_flush_or_die(stdout, "stdout");
|
|
|
|
}
|
|
|
|
pi->blamed_lines += ent->num_lines;
|
|
|
|
display_progress(pi->progress, pi->blamed_lines);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *format_time(timestamp_t time, const char *tz_str,
|
|
|
|
int show_raw_time)
|
|
|
|
{
|
blame: fix broken time_buf paddings in relative timestamp
Command `git blame --date relative` aligns the date field with a
fixed-width (defined by blame_date_width), and if time_str is shorter
than that, it adds spaces for padding. But there are two bugs in the
following codes:
time_len = strlen(time_str);
...
memset(time_buf + time_len, ' ', blame_date_width - time_len);
1. The type of blame_date_width is size_t, which is unsigned. If
time_len is greater than blame_date_width, the result of
"blame_date_width - time_len" will never be a negative number, but a
really big positive number, and will cause memory overwrite.
This bug can be triggered if either l10n message for function
show_date_relative() in date.c is longer than 30 characters, then
`git blame --date relative` may exit abnormally.
2. When show blame information with relative time, the UTF-8 characters
in time_str will break the alignment of columns after the date field.
This is because the time_buf padding with spaces should have a
constant display width, not a fixed strlen size. So we should call
utf8_strwidth() instead of strlen() for width calibration.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
static struct strbuf time_buf = STRBUF_INIT;
|
|
|
|
|
blame: fix broken time_buf paddings in relative timestamp
Command `git blame --date relative` aligns the date field with a
fixed-width (defined by blame_date_width), and if time_str is shorter
than that, it adds spaces for padding. But there are two bugs in the
following codes:
time_len = strlen(time_str);
...
memset(time_buf + time_len, ' ', blame_date_width - time_len);
1. The type of blame_date_width is size_t, which is unsigned. If
time_len is greater than blame_date_width, the result of
"blame_date_width - time_len" will never be a negative number, but a
really big positive number, and will cause memory overwrite.
This bug can be triggered if either l10n message for function
show_date_relative() in date.c is longer than 30 characters, then
`git blame --date relative` may exit abnormally.
2. When show blame information with relative time, the UTF-8 characters
in time_str will break the alignment of columns after the date field.
This is because the time_buf padding with spaces should have a
constant display width, not a fixed strlen size. So we should call
utf8_strwidth() instead of strlen() for width calibration.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
strbuf_reset(&time_buf);
|
|
|
|
if (show_raw_time) {
|
|
|
|
strbuf_addf(&time_buf, "%"PRItime" %s", time, tz_str);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const char *time_str;
|
blame: fix broken time_buf paddings in relative timestamp
Command `git blame --date relative` aligns the date field with a
fixed-width (defined by blame_date_width), and if time_str is shorter
than that, it adds spaces for padding. But there are two bugs in the
following codes:
time_len = strlen(time_str);
...
memset(time_buf + time_len, ' ', blame_date_width - time_len);
1. The type of blame_date_width is size_t, which is unsigned. If
time_len is greater than blame_date_width, the result of
"blame_date_width - time_len" will never be a negative number, but a
really big positive number, and will cause memory overwrite.
This bug can be triggered if either l10n message for function
show_date_relative() in date.c is longer than 30 characters, then
`git blame --date relative` may exit abnormally.
2. When show blame information with relative time, the UTF-8 characters
in time_str will break the alignment of columns after the date field.
This is because the time_buf padding with spaces should have a
constant display width, not a fixed strlen size. So we should call
utf8_strwidth() instead of strlen() for width calibration.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
size_t time_width;
|
|
|
|
int tz;
|
|
|
|
tz = atoi(tz_str);
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
time_str = show_date(time, tz, &blame_date_mode);
|
blame: fix broken time_buf paddings in relative timestamp
Command `git blame --date relative` aligns the date field with a
fixed-width (defined by blame_date_width), and if time_str is shorter
than that, it adds spaces for padding. But there are two bugs in the
following codes:
time_len = strlen(time_str);
...
memset(time_buf + time_len, ' ', blame_date_width - time_len);
1. The type of blame_date_width is size_t, which is unsigned. If
time_len is greater than blame_date_width, the result of
"blame_date_width - time_len" will never be a negative number, but a
really big positive number, and will cause memory overwrite.
This bug can be triggered if either l10n message for function
show_date_relative() in date.c is longer than 30 characters, then
`git blame --date relative` may exit abnormally.
2. When show blame information with relative time, the UTF-8 characters
in time_str will break the alignment of columns after the date field.
This is because the time_buf padding with spaces should have a
constant display width, not a fixed strlen size. So we should call
utf8_strwidth() instead of strlen() for width calibration.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
strbuf_addstr(&time_buf, time_str);
|
|
|
|
/*
|
|
|
|
* Add space paddings to time_buf to display a fixed width
|
|
|
|
* string, and use time_width for display width calibration.
|
|
|
|
*/
|
|
|
|
for (time_width = utf8_strwidth(time_str);
|
|
|
|
time_width < blame_date_width;
|
|
|
|
time_width++)
|
|
|
|
strbuf_addch(&time_buf, ' ');
|
|
|
|
}
|
blame: fix broken time_buf paddings in relative timestamp
Command `git blame --date relative` aligns the date field with a
fixed-width (defined by blame_date_width), and if time_str is shorter
than that, it adds spaces for padding. But there are two bugs in the
following codes:
time_len = strlen(time_str);
...
memset(time_buf + time_len, ' ', blame_date_width - time_len);
1. The type of blame_date_width is size_t, which is unsigned. If
time_len is greater than blame_date_width, the result of
"blame_date_width - time_len" will never be a negative number, but a
really big positive number, and will cause memory overwrite.
This bug can be triggered if either l10n message for function
show_date_relative() in date.c is longer than 30 characters, then
`git blame --date relative` may exit abnormally.
2. When show blame information with relative time, the UTF-8 characters
in time_str will break the alignment of columns after the date field.
This is because the time_buf padding with spaces should have a
constant display width, not a fixed strlen size. So we should call
utf8_strwidth() instead of strlen() for width calibration.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
return time_buf.buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define OUTPUT_ANNOTATE_COMPAT (1U<<0)
|
|
|
|
#define OUTPUT_LONG_OBJECT_NAME (1U<<1)
|
|
|
|
#define OUTPUT_RAW_TIMESTAMP (1U<<2)
|
|
|
|
#define OUTPUT_PORCELAIN (1U<<3)
|
|
|
|
#define OUTPUT_SHOW_NAME (1U<<4)
|
|
|
|
#define OUTPUT_SHOW_NUMBER (1U<<5)
|
|
|
|
#define OUTPUT_SHOW_SCORE (1U<<6)
|
|
|
|
#define OUTPUT_NO_AUTHOR (1U<<7)
|
|
|
|
#define OUTPUT_SHOW_EMAIL (1U<<8)
|
|
|
|
#define OUTPUT_LINE_PORCELAIN (1U<<9)
|
|
|
|
#define OUTPUT_COLOR_LINE (1U<<10)
|
|
|
|
#define OUTPUT_SHOW_AGE_WITH_COLOR (1U<<11)
|
|
|
|
|
|
|
|
static void emit_porcelain_details(struct blame_origin *suspect, int repeat)
|
|
|
|
{
|
|
|
|
if (emit_one_suspect_detail(suspect, repeat) ||
|
|
|
|
(suspect->commit->object.flags & MORE_THAN_ONE_PATH))
|
blame: output porcelain "previous" header for each file
It's possible for content currently found in one file to
have originated in two separate files, each of which may
have been modified in some single older commit. The
--porcelain output generates an incorrect "previous" header
in this case, whereas --line-porcelain gets it right. The
problem is that the porcelain output tries to omit repeated
details of commits, and treats "previous" as a property of
the commit, when it is really a property of the blamed block
of lines.
Let's look at an example. In a case like this, you might see
this output from --line-porcelain:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
author ...
committer ...
previous SOME_SHA1^ file_two
filename file_two
...some different content....
The "filename" fields tell us that the two lines are from
two different files. But notice that the filename also
appears in the "previous" field, which tells us where to
start a re-blame. The second content line never appeared in
file_one at all, so we would obviously need to re-blame from
file_two (or possibly even some other file, if had just been
renamed to file_two in SOME_SHA1).
So far so good. Now here's what --porcelain looks like:
SOME_SHA1 1 1 1
author ...
committer ...
previous SOME_SHA1^ file_one
filename file_one
...some line content...
SOME_SHA1 2 1 1
filename file_two
...some different content....
We've dropped the author and committer fields from the
second line, as they would just be repeats. But we can't
omit "filename", because it depends on the actual block of
blamed lines, not just the commit. This is handled by
emit_porcelain_details(), which will show the filename
either if it is the first mention of the commit _or_ if the
commit has multiple paths in it.
But we don't give "previous" the same handling. It's written
inside emit_one_suspect_detail(), which bails early if we've
already seen that commit. And so the output above is wrong;
a reader would assume that the correct place to re-blame
line two is from file_one, but that's obviously nonsense.
Let's treat "previous" the same as "filename", and show it
fresh whenever we know we are in a confusing case like this.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
write_filename_info(suspect);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emit_porcelain(struct blame_scoreboard *sb, struct blame_entry *ent,
|
|
|
|
int opt)
|
|
|
|
{
|
|
|
|
int repeat = opt & OUTPUT_LINE_PORCELAIN;
|
|
|
|
int cnt;
|
|
|
|
const char *cp;
|
|
|
|
struct blame_origin *suspect = ent->suspect;
|
|
|
|
char hex[GIT_MAX_HEXSZ + 1];
|
|
|
|
|
|
|
|
oid_to_hex_r(hex, &suspect->commit->object.oid);
|
|
|
|
printf("%s %d %d %d\n",
|
|
|
|
hex,
|
|
|
|
ent->s_lno + 1,
|
|
|
|
ent->lno + 1,
|
|
|
|
ent->num_lines);
|
|
|
|
emit_porcelain_details(suspect, repeat);
|
|
|
|
|
|
|
|
cp = blame_nth_line(sb, ent->lno);
|
|
|
|
for (cnt = 0; cnt < ent->num_lines; cnt++) {
|
|
|
|
char ch;
|
|
|
|
if (cnt) {
|
|
|
|
printf("%s %d %d\n", hex,
|
|
|
|
ent->s_lno + 1 + cnt,
|
|
|
|
ent->lno + 1 + cnt);
|
|
|
|
if (repeat)
|
|
|
|
emit_porcelain_details(suspect, 1);
|
|
|
|
}
|
|
|
|
putchar('\t');
|
|
|
|
do {
|
|
|
|
ch = *cp++;
|
|
|
|
putchar(ch);
|
|
|
|
} while (ch != '\n' &&
|
|
|
|
cp < sb->final_buf + sb->final_buf_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sb->final_buf_size && cp[-1] != '\n')
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct color_field {
|
|
|
|
timestamp_t hop;
|
|
|
|
char col[COLOR_MAXLEN];
|
|
|
|
} *colorfield;
|
|
|
|
static int colorfield_nr, colorfield_alloc;
|
|
|
|
|
|
|
|
static void parse_color_fields(const char *s)
|
|
|
|
{
|
|
|
|
struct string_list l = STRING_LIST_INIT_DUP;
|
|
|
|
struct string_list_item *item;
|
|
|
|
enum { EXPECT_DATE, EXPECT_COLOR } next = EXPECT_COLOR;
|
|
|
|
|
|
|
|
colorfield_nr = 0;
|
|
|
|
|
|
|
|
/* Ideally this would be stripped and split at the same time? */
|
|
|
|
string_list_split(&l, s, ',', -1);
|
|
|
|
ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc);
|
|
|
|
|
|
|
|
for_each_string_list_item(item, &l) {
|
|
|
|
switch (next) {
|
|
|
|
case EXPECT_DATE:
|
|
|
|
colorfield[colorfield_nr].hop = approxidate(item->string);
|
|
|
|
next = EXPECT_COLOR;
|
|
|
|
colorfield_nr++;
|
|
|
|
ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc);
|
|
|
|
break;
|
|
|
|
case EXPECT_COLOR:
|
|
|
|
if (color_parse(item->string, colorfield[colorfield_nr].col))
|
|
|
|
die(_("expecting a color: %s"), item->string);
|
|
|
|
next = EXPECT_DATE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (next == EXPECT_COLOR)
|
|
|
|
die(_("must end with a color"));
|
|
|
|
|
|
|
|
colorfield[colorfield_nr].hop = TIME_MAX;
|
|
|
|
string_list_clear(&l, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void setup_default_color_by_age(void)
|
|
|
|
{
|
|
|
|
parse_color_fields("blue,12 month ago,white,1 month ago,red");
|
|
|
|
}
|
|
|
|
|
blame: remove unnecessary use of get_commit_info()
When `git blame --color-by-age`, the determine_line_heat() is called to
select how to color the output based on the commit's author date. It
uses the get_commit_info() to parse the information into a `commit_info`
structure, however, this is actually unnecessary because the
determine_line_heat() caller also does the same.
Instead, let's change the determine_line_heat() to take a `commit_info`
structure and remove the internal call to get_commit_info() thus
cleaning up and optimizing the code path.
Enabling Git's trace2 API in order to record the execution time for
every call to determine_line_heat() function:
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
determine_line_heat(ent, &default_color);
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
Then, running `git blame` for "kernel/fork.c" in linux.git and summing
all the execution time for every call (around 1.3k calls) resulted in
2.6x faster execution (best out 3):
git built from 328c109303 (The eighth batch, 2021-02-12) = 42ms
git built from 328c109303 + this change = 16ms
Signed-off-by: Rafael Silva <rafaeloliveira.cs@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
static void determine_line_heat(struct commit_info *ci, const char **dest_color)
|
|
|
|
{
|
|
|
|
int i = 0;
|
|
|
|
|
blame: remove unnecessary use of get_commit_info()
When `git blame --color-by-age`, the determine_line_heat() is called to
select how to color the output based on the commit's author date. It
uses the get_commit_info() to parse the information into a `commit_info`
structure, however, this is actually unnecessary because the
determine_line_heat() caller also does the same.
Instead, let's change the determine_line_heat() to take a `commit_info`
structure and remove the internal call to get_commit_info() thus
cleaning up and optimizing the code path.
Enabling Git's trace2 API in order to record the execution time for
every call to determine_line_heat() function:
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
determine_line_heat(ent, &default_color);
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
Then, running `git blame` for "kernel/fork.c" in linux.git and summing
all the execution time for every call (around 1.3k calls) resulted in
2.6x faster execution (best out 3):
git built from 328c109303 (The eighth batch, 2021-02-12) = 42ms
git built from 328c109303 + this change = 16ms
Signed-off-by: Rafael Silva <rafaeloliveira.cs@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
while (i < colorfield_nr && ci->author_time > colorfield[i].hop)
|
|
|
|
i++;
|
|
|
|
|
|
|
|
*dest_color = colorfield[i].col;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emit_other(struct blame_scoreboard *sb, struct blame_entry *ent, int opt)
|
|
|
|
{
|
|
|
|
int cnt;
|
|
|
|
const char *cp;
|
|
|
|
struct blame_origin *suspect = ent->suspect;
|
|
|
|
struct commit_info ci = COMMIT_INFO_INIT;
|
|
|
|
char hex[GIT_MAX_HEXSZ + 1];
|
|
|
|
int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
|
|
|
|
const char *default_color = NULL, *color = NULL, *reset = NULL;
|
|
|
|
|
|
|
|
get_commit_info(suspect->commit, &ci, 1);
|
|
|
|
oid_to_hex_r(hex, &suspect->commit->object.oid);
|
|
|
|
|
|
|
|
cp = blame_nth_line(sb, ent->lno);
|
|
|
|
|
|
|
|
if (opt & OUTPUT_SHOW_AGE_WITH_COLOR) {
|
blame: remove unnecessary use of get_commit_info()
When `git blame --color-by-age`, the determine_line_heat() is called to
select how to color the output based on the commit's author date. It
uses the get_commit_info() to parse the information into a `commit_info`
structure, however, this is actually unnecessary because the
determine_line_heat() caller also does the same.
Instead, let's change the determine_line_heat() to take a `commit_info`
structure and remove the internal call to get_commit_info() thus
cleaning up and optimizing the code path.
Enabling Git's trace2 API in order to record the execution time for
every call to determine_line_heat() function:
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
determine_line_heat(ent, &default_color);
+ trace2_region_enter("blame", "determine_line_heat", the_repository);
Then, running `git blame` for "kernel/fork.c" in linux.git and summing
all the execution time for every call (around 1.3k calls) resulted in
2.6x faster execution (best out 3):
git built from 328c109303 (The eighth batch, 2021-02-12) = 42ms
git built from 328c109303 + this change = 16ms
Signed-off-by: Rafael Silva <rafaeloliveira.cs@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
determine_line_heat(&ci, &default_color);
|
|
|
|
color = default_color;
|
|
|
|
reset = GIT_COLOR_RESET;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (cnt = 0; cnt < ent->num_lines; cnt++) {
|
|
|
|
char ch;
|
|
|
|
int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? the_hash_algo->hexsz : abbrev;
|
|
|
|
|
|
|
|
if (opt & OUTPUT_COLOR_LINE) {
|
|
|
|
if (cnt > 0) {
|
|
|
|
color = repeated_meta_color;
|
|
|
|
reset = GIT_COLOR_RESET;
|
|
|
|
} else {
|
|
|
|
color = default_color ? default_color : NULL;
|
|
|
|
reset = default_color ? GIT_COLOR_RESET : NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (color)
|
|
|
|
fputs(color, stdout);
|
|
|
|
|
|
|
|
if (suspect->commit->object.flags & UNINTERESTING) {
|
|
|
|
if (blank_boundary)
|
|
|
|
memset(hex, ' ', length);
|
|
|
|
else if (!(opt & OUTPUT_ANNOTATE_COMPAT)) {
|
|
|
|
length--;
|
|
|
|
putchar('^');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
blame: add config options for the output of ignored or unblamable lines
When ignoring commits, the commit that is blamed might not be
responsible for the change, due to the inaccuracy of our heuristic.
Users might want to know when a particular line has a potentially
inaccurate blame.
Furthermore, guess_line_blames() may fail to find any parent commit for
a given line touched by an ignored commit. Those 'unblamable' lines
remain blamed on an ignored commit. Users might want to know if a line
is unblamable so that they do not spend time investigating a commit they
know is uninteresting.
This patch adds two config options to mark these two types of lines in
the output of blame.
The first option can identify ignored lines by specifying
blame.markIgnoredLines. When this option is set, each blame line that
was blamed on a commit other than the ignored commit is marked with a
'?'.
For example:
278b6158d6fdb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
?278b6158d6fd (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
where the '?' is placed before the commit, and the hash has one fewer
characters.
Sometimes we are unable to even guess at what ancestor commit touched a
line. These lines are 'unblamable.' The second option,
blame.markUnblamableLines, will mark the line with '*'.
For example, say we ignore e5e8d36d04cbe, yet we are unable to blame
this line on another commit:
e5e8d36d04cbe (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
*e5e8d36d04cb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
When these config options are used together, every line touched by an
ignored commit will be marked with either a '?' or a '*'.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
if (mark_unblamable_lines && ent->unblamable) {
|
|
|
|
length--;
|
|
|
|
putchar('*');
|
|
|
|
}
|
|
|
|
if (mark_ignored_lines && ent->ignored) {
|
|
|
|
length--;
|
|
|
|
putchar('?');
|
|
|
|
}
|
|
|
|
printf("%.*s", length, hex);
|
|
|
|
if (opt & OUTPUT_ANNOTATE_COMPAT) {
|
|
|
|
const char *name;
|
|
|
|
if (opt & OUTPUT_SHOW_EMAIL)
|
|
|
|
name = ci.author_mail.buf;
|
|
|
|
else
|
|
|
|
name = ci.author.buf;
|
|
|
|
printf("\t(%10s\t%10s\t%d)", name,
|
|
|
|
format_time(ci.author_time, ci.author_tz.buf,
|
|
|
|
show_raw_time),
|
|
|
|
ent->lno + 1 + cnt);
|
|
|
|
} else {
|
|
|
|
if (opt & OUTPUT_SHOW_SCORE)
|
|
|
|
printf(" %*d %02d",
|
|
|
|
max_score_digits, ent->score,
|
|
|
|
ent->suspect->refcnt);
|
|
|
|
if (opt & OUTPUT_SHOW_NAME)
|
|
|
|
printf(" %-*.*s", longest_file, longest_file,
|
|
|
|
suspect->path);
|
|
|
|
if (opt & OUTPUT_SHOW_NUMBER)
|
|
|
|
printf(" %*d", max_orig_digits,
|
|
|
|
ent->s_lno + 1 + cnt);
|
|
|
|
|
|
|
|
if (!(opt & OUTPUT_NO_AUTHOR)) {
|
|
|
|
const char *name;
|
|
|
|
int pad;
|
|
|
|
if (opt & OUTPUT_SHOW_EMAIL)
|
|
|
|
name = ci.author_mail.buf;
|
|
|
|
else
|
|
|
|
name = ci.author.buf;
|
|
|
|
pad = longest_author - utf8_strwidth(name);
|
|
|
|
printf(" (%s%*s %10s",
|
|
|
|
name, pad, "",
|
|
|
|
format_time(ci.author_time,
|
|
|
|
ci.author_tz.buf,
|
|
|
|
show_raw_time));
|
|
|
|
}
|
|
|
|
printf(" %*d) ",
|
|
|
|
max_digits, ent->lno + 1 + cnt);
|
|
|
|
}
|
|
|
|
if (reset)
|
|
|
|
fputs(reset, stdout);
|
|
|
|
do {
|
|
|
|
ch = *cp++;
|
|
|
|
putchar(ch);
|
|
|
|
} while (ch != '\n' &&
|
|
|
|
cp < sb->final_buf + sb->final_buf_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sb->final_buf_size && cp[-1] != '\n')
|
|
|
|
putchar('\n');
|
|
|
|
|
|
|
|
commit_info_destroy(&ci);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void output(struct blame_scoreboard *sb, int option)
|
|
|
|
{
|
|
|
|
struct blame_entry *ent;
|
|
|
|
|
|
|
|
if (option & OUTPUT_PORCELAIN) {
|
|
|
|
for (ent = sb->ent; ent; ent = ent->next) {
|
|
|
|
int count = 0;
|
|
|
|
struct blame_origin *suspect;
|
|
|
|
struct commit *commit = ent->suspect->commit;
|
|
|
|
if (commit->object.flags & MORE_THAN_ONE_PATH)
|
|
|
|
continue;
|
|
|
|
for (suspect = get_blame_suspects(commit); suspect; suspect = suspect->next) {
|
|
|
|
if (suspect->guilty && count++) {
|
|
|
|
commit->object.flags |= MORE_THAN_ONE_PATH;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (ent = sb->ent; ent; ent = ent->next) {
|
|
|
|
if (option & OUTPUT_PORCELAIN)
|
|
|
|
emit_porcelain(sb, ent, option);
|
|
|
|
else {
|
|
|
|
emit_other(sb, ent, option);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add phony grafts for use with -S; this is primarily to
|
|
|
|
* support git's cvsserver that wants to give a linear history
|
|
|
|
* to its clients.
|
|
|
|
*/
|
|
|
|
static int read_ancestry(const char *graft_file)
|
|
|
|
{
|
|
|
|
FILE *fp = fopen_or_warn(graft_file, "r");
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
if (!fp)
|
|
|
|
return -1;
|
|
|
|
while (!strbuf_getwholeline(&buf, fp, '\n')) {
|
|
|
|
/* The format is just "Commit Parent1 Parent2 ...\n" */
|
|
|
|
struct commit_graft *graft = read_graft_line(&buf);
|
|
|
|
if (graft)
|
|
|
|
register_commit_graft(the_repository, graft, 0);
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int update_auto_abbrev(int auto_abbrev, struct blame_origin *suspect)
|
|
|
|
{
|
|
|
|
const char *uniq = repo_find_unique_abbrev(the_repository,
|
|
|
|
&suspect->commit->object.oid,
|
|
|
|
auto_abbrev);
|
|
|
|
int len = strlen(uniq);
|
|
|
|
if (auto_abbrev < len)
|
|
|
|
return len;
|
|
|
|
return auto_abbrev;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* How many columns do we need to show line numbers, authors,
|
|
|
|
* and filenames?
|
|
|
|
*/
|
|
|
|
static void find_alignment(struct blame_scoreboard *sb, int *option)
|
|
|
|
{
|
|
|
|
int longest_src_lines = 0;
|
|
|
|
int longest_dst_lines = 0;
|
|
|
|
unsigned largest_score = 0;
|
|
|
|
struct blame_entry *e;
|
|
|
|
int compute_auto_abbrev = (abbrev < 0);
|
|
|
|
int auto_abbrev = DEFAULT_ABBREV;
|
|
|
|
|
|
|
|
for (e = sb->ent; e; e = e->next) {
|
|
|
|
struct blame_origin *suspect = e->suspect;
|
|
|
|
int num;
|
|
|
|
|
|
|
|
if (compute_auto_abbrev)
|
|
|
|
auto_abbrev = update_auto_abbrev(auto_abbrev, suspect);
|
|
|
|
if (strcmp(suspect->path, sb->path))
|
|
|
|
*option |= OUTPUT_SHOW_NAME;
|
|
|
|
num = strlen(suspect->path);
|
|
|
|
if (longest_file < num)
|
|
|
|
longest_file = num;
|
|
|
|
if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
|
|
|
|
struct commit_info ci = COMMIT_INFO_INIT;
|
|
|
|
suspect->commit->object.flags |= METAINFO_SHOWN;
|
|
|
|
get_commit_info(suspect->commit, &ci, 1);
|
|
|
|
if (*option & OUTPUT_SHOW_EMAIL)
|
|
|
|
num = utf8_strwidth(ci.author_mail.buf);
|
|
|
|
else
|
|
|
|
num = utf8_strwidth(ci.author.buf);
|
|
|
|
if (longest_author < num)
|
|
|
|
longest_author = num;
|
|
|
|
commit_info_destroy(&ci);
|
|
|
|
}
|
|
|
|
num = e->s_lno + e->num_lines;
|
|
|
|
if (longest_src_lines < num)
|
|
|
|
longest_src_lines = num;
|
|
|
|
num = e->lno + e->num_lines;
|
|
|
|
if (longest_dst_lines < num)
|
|
|
|
longest_dst_lines = num;
|
|
|
|
if (largest_score < blame_entry_score(sb, e))
|
|
|
|
largest_score = blame_entry_score(sb, e);
|
|
|
|
}
|
|
|
|
max_orig_digits = decimal_width(longest_src_lines);
|
|
|
|
max_digits = decimal_width(longest_dst_lines);
|
|
|
|
max_score_digits = decimal_width(largest_score);
|
|
|
|
|
|
|
|
if (compute_auto_abbrev)
|
|
|
|
/* one more abbrev length is needed for the boundary commit */
|
|
|
|
abbrev = auto_abbrev + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sanity_check_on_fail(struct blame_scoreboard *sb, int baa)
|
|
|
|
{
|
|
|
|
int opt = OUTPUT_SHOW_SCORE | OUTPUT_SHOW_NUMBER | OUTPUT_SHOW_NAME;
|
|
|
|
find_alignment(sb, &opt);
|
|
|
|
output(sb, opt);
|
|
|
|
die("Baa %d!", baa);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned parse_score(const char *arg)
|
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
unsigned long score = strtoul(arg, &end, 10);
|
|
|
|
if (*end)
|
|
|
|
return 0;
|
|
|
|
return score;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *add_prefix(const char *prefix, const char *path)
|
|
|
|
{
|
|
|
|
return prefix_path(prefix, prefix ? strlen(prefix) : 0, path);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int git_blame_config(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
if (!strcmp(var, "blame.showroot")) {
|
|
|
|
show_root = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "blame.blankboundary")) {
|
|
|
|
blank_boundary = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "blame.showemail")) {
|
|
|
|
int *output_option = cb;
|
|
|
|
if (git_config_bool(var, value))
|
|
|
|
*output_option |= OUTPUT_SHOW_EMAIL;
|
|
|
|
else
|
|
|
|
*output_option &= ~OUTPUT_SHOW_EMAIL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "blame.date")) {
|
|
|
|
if (!value)
|
|
|
|
return config_error_nonbool(var);
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
parse_date_format(value, &blame_date_mode);
|
|
|
|
return 0;
|
|
|
|
}
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
if (!strcmp(var, "blame.ignorerevsfile")) {
|
|
|
|
const char *str;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = git_config_pathname(&str, var, value);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
string_list_insert(&ignore_revs_file_list, str);
|
|
|
|
return 0;
|
|
|
|
}
|
blame: add config options for the output of ignored or unblamable lines
When ignoring commits, the commit that is blamed might not be
responsible for the change, due to the inaccuracy of our heuristic.
Users might want to know when a particular line has a potentially
inaccurate blame.
Furthermore, guess_line_blames() may fail to find any parent commit for
a given line touched by an ignored commit. Those 'unblamable' lines
remain blamed on an ignored commit. Users might want to know if a line
is unblamable so that they do not spend time investigating a commit they
know is uninteresting.
This patch adds two config options to mark these two types of lines in
the output of blame.
The first option can identify ignored lines by specifying
blame.markIgnoredLines. When this option is set, each blame line that
was blamed on a commit other than the ignored commit is marked with a
'?'.
For example:
278b6158d6fdb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
?278b6158d6fd (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
where the '?' is placed before the commit, and the hash has one fewer
characters.
Sometimes we are unable to even guess at what ancestor commit touched a
line. These lines are 'unblamable.' The second option,
blame.markUnblamableLines, will mark the line with '*'.
For example, say we ignore e5e8d36d04cbe, yet we are unable to blame
this line on another commit:
e5e8d36d04cbe (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
appears as:
*e5e8d36d04cb (Barret Rhoden 2016-04-11 13:57:54 -0400 26)
When these config options are used together, every line touched by an
ignored commit will be marked with either a '?' or a '*'.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
if (!strcmp(var, "blame.markunblamablelines")) {
|
|
|
|
mark_unblamable_lines = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "blame.markignoredlines")) {
|
|
|
|
mark_ignored_lines = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "color.blame.repeatedlines")) {
|
|
|
|
if (color_parse_mem(value, strlen(value), repeated_meta_color))
|
|
|
|
warning(_("invalid value for '%s': '%s'"),
|
|
|
|
"color.blame.repeatedLines", value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "color.blame.highlightrecent")) {
|
|
|
|
parse_color_fields(value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(var, "blame.coloring")) {
|
|
|
|
if (!strcmp(value, "repeatedLines")) {
|
|
|
|
coloring_mode |= OUTPUT_COLOR_LINE;
|
|
|
|
} else if (!strcmp(value, "highlightRecent")) {
|
|
|
|
coloring_mode |= OUTPUT_SHOW_AGE_WITH_COLOR;
|
|
|
|
} else if (!strcmp(value, "none")) {
|
|
|
|
coloring_mode &= ~(OUTPUT_COLOR_LINE |
|
|
|
|
OUTPUT_SHOW_AGE_WITH_COLOR);
|
|
|
|
} else {
|
|
|
|
warning(_("invalid value for '%s': '%s'"),
|
|
|
|
"blame.coloring", value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (git_diff_heuristic_config(var, value, cb) < 0)
|
|
|
|
return -1;
|
drop odd return value semantics from userdiff_config
When the userdiff_config function was introduced in be58e70
(diff: unify external diff and funcname parsing code,
2008-10-05), it used a return value convention unlike any
other config callback. Like other callbacks, it used "-1" to
signal error. But it returned "1" to indicate that it found
something, and "0" otherwise; other callbacks simply
returned "0" to indicate that no error occurred.
This distinction was necessary at the time, because the
userdiff namespace overlapped slightly with the color
configuration namespace. So "diff.color.foo" could mean "the
'foo' slot of diff coloring" or "the 'foo' component of the
"color" userdiff driver". Because the color-parsing code
would die on an unknown color slot, we needed the userdiff
code to indicate that it had matched the variable, letting
us bypass the color-parsing code entirely.
Later, in 8b8e862 (ignore unknown color configuration,
2009-12-12), the color-parsing code learned to silently
ignore unknown slots. This means we no longer need to
protect userdiff-matched variables from reaching the
color-parsing code.
We can therefore change the userdiff_config calling
convention to a more normal one. This drops some code from
each caller, which is nice. But more importantly, it reduces
the cognitive load for readers who may wonder why
userdiff_config is unlike every other config callback.
There's no need to add a new test confirming that this
works; t4020 already contains a test that sets
diff.color.external.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
if (userdiff_config(var, value) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return git_default_config(var, value, cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int blame_copy_callback(const struct option *option, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
int *opt = option->value;
|
|
|
|
|
assert NOARG/NONEG behavior of parse-options callbacks
When we define a parse-options callback, the flags we put in the option
struct must match what the callback expects. For example, a callback
which does not handle the "unset" parameter should only be used with
PARSE_OPT_NONEG. But since the callback and the option struct are not
defined next to each other, it's easy to get this wrong (as earlier
patches in this series show).
Fortunately, the compiler can help us here: compiling with
-Wunused-parameters can show us which callbacks ignore their "unset"
parameters (and likewise, ones that ignore "arg" expect to be triggered
with PARSE_OPT_NOARG).
But after we've inspected a callback and determined that all of its
callers use the right flags, what do we do next? We'd like to silence
the compiler warning, but do so in a way that will catch any wrong calls
in the future.
We can do that by actually checking those variables and asserting that
they match our expectations. Because this is such a common pattern,
we'll introduce some helper macros. The resulting messages aren't
as descriptive as we could make them, but the file/line information from
BUG() is enough to identify the problem (and anyway, the point is that
these should never be seen).
Each of the annotated callbacks in this patch triggers
-Wunused-parameters, and was manually inspected to make sure all callers
use the correct options (so none of these BUGs should be triggerable).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* -C enables copy from removed files;
|
|
|
|
* -C -C enables copy from existing files, but only
|
|
|
|
* when blaming a new file;
|
|
|
|
* -C -C -C enables copy from existing files for
|
|
|
|
* everybody
|
|
|
|
*/
|
|
|
|
if (*opt & PICKAXE_BLAME_COPY_HARDER)
|
|
|
|
*opt |= PICKAXE_BLAME_COPY_HARDEST;
|
|
|
|
if (*opt & PICKAXE_BLAME_COPY)
|
|
|
|
*opt |= PICKAXE_BLAME_COPY_HARDER;
|
|
|
|
*opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
|
|
|
|
|
|
|
|
if (arg)
|
|
|
|
blame_copy_score = parse_score(arg);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int blame_move_callback(const struct option *option, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
int *opt = option->value;
|
|
|
|
|
assert NOARG/NONEG behavior of parse-options callbacks
When we define a parse-options callback, the flags we put in the option
struct must match what the callback expects. For example, a callback
which does not handle the "unset" parameter should only be used with
PARSE_OPT_NONEG. But since the callback and the option struct are not
defined next to each other, it's easy to get this wrong (as earlier
patches in this series show).
Fortunately, the compiler can help us here: compiling with
-Wunused-parameters can show us which callbacks ignore their "unset"
parameters (and likewise, ones that ignore "arg" expect to be triggered
with PARSE_OPT_NOARG).
But after we've inspected a callback and determined that all of its
callers use the right flags, what do we do next? We'd like to silence
the compiler warning, but do so in a way that will catch any wrong calls
in the future.
We can do that by actually checking those variables and asserting that
they match our expectations. Because this is such a common pattern,
we'll introduce some helper macros. The resulting messages aren't
as descriptive as we could make them, but the file/line information from
BUG() is enough to identify the problem (and anyway, the point is that
these should never be seen).
Each of the annotated callbacks in this patch triggers
-Wunused-parameters, and was manually inspected to make sure all callers
use the correct options (so none of these BUGs should be triggerable).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
|
|
|
*opt |= PICKAXE_BLAME_MOVE;
|
|
|
|
|
|
|
|
if (arg)
|
|
|
|
blame_move_score = parse_score(arg);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
blame: tighten command line parser
The command line parser of "git blame" is prepared to take an
ancient odd argument order "blame <path> <rev>" in addition to the
usual "blame [<rev>] <path>". It has at least two negative
ramifications:
- In order to tell these two apart, it checks if the last command
line argument names a path in the working tree, using
file_exists(). However, "blame <rev> <path>" is a request to
explain each and every line in the contents of <path> stored in
revision <rev> and does not need to have a working tree version
of the file. A check with file_exists() is simply wrong.
- To coerce that mistaken file_exists() check to work, the code
calls setup_work_tree() before doing so, because the path it has
is relative to the top-level of the project tree. However,
"blame <rev> <path>" MUST be usable even in a bare repository,
and there is no reason for letting setup_work_tree() complain
and die with "This operation must be run in a work tree".
To correct the former, switch to check if the last token is a
revision (and if so, parse arguments using "blame <path> <rev>"
rule). Correct the latter by getting rid of setup_work_tree() and
file_exists() check--the only case the call to this function matters
is when we are running "blame <path>" (i.e. no starting revision and
asking to blame the working tree file at <path>, digging through the
HEAD revision), but there is a call in setup_scoreboard() just
before it calls fake_working_tree_commit().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
static int is_a_rev(const char *name)
|
|
|
|
{
|
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
if (repo_get_oid(the_repository, name, &oid))
|
blame: tighten command line parser
The command line parser of "git blame" is prepared to take an
ancient odd argument order "blame <path> <rev>" in addition to the
usual "blame [<rev>] <path>". It has at least two negative
ramifications:
- In order to tell these two apart, it checks if the last command
line argument names a path in the working tree, using
file_exists(). However, "blame <rev> <path>" is a request to
explain each and every line in the contents of <path> stored in
revision <rev> and does not need to have a working tree version
of the file. A check with file_exists() is simply wrong.
- To coerce that mistaken file_exists() check to work, the code
calls setup_work_tree() before doing so, because the path it has
is relative to the top-level of the project tree. However,
"blame <rev> <path>" MUST be usable even in a bare repository,
and there is no reason for letting setup_work_tree() complain
and die with "This operation must be run in a work tree".
To correct the former, switch to check if the last token is a
revision (and if so, parse arguments using "blame <path> <rev>"
rule). Correct the latter by getting rid of setup_work_tree() and
file_exists() check--the only case the call to this function matters
is when we are running "blame <path>" (i.e. no starting revision and
asking to blame the working tree file at <path>, digging through the
HEAD revision), but there is a call in setup_scoreboard() just
before it calls fake_working_tree_commit().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
return 0;
|
|
|
|
return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
|
blame: tighten command line parser
The command line parser of "git blame" is prepared to take an
ancient odd argument order "blame <path> <rev>" in addition to the
usual "blame [<rev>] <path>". It has at least two negative
ramifications:
- In order to tell these two apart, it checks if the last command
line argument names a path in the working tree, using
file_exists(). However, "blame <rev> <path>" is a request to
explain each and every line in the contents of <path> stored in
revision <rev> and does not need to have a working tree version
of the file. A check with file_exists() is simply wrong.
- To coerce that mistaken file_exists() check to work, the code
calls setup_work_tree() before doing so, because the path it has
is relative to the top-level of the project tree. However,
"blame <rev> <path>" MUST be usable even in a bare repository,
and there is no reason for letting setup_work_tree() complain
and die with "This operation must be run in a work tree".
To correct the former, switch to check if the last token is a
revision (and if so, parse arguments using "blame <path> <rev>"
rule). Correct the latter by getting rid of setup_work_tree() and
file_exists() check--the only case the call to this function matters
is when we are running "blame <path>" (i.e. no starting revision and
asking to blame the working tree file at <path>, digging through the
HEAD revision), but there is a call in setup_scoreboard() just
before it calls fake_working_tree_commit().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
}
|
|
|
|
|
|
|
|
static int peel_to_commit_oid(struct object_id *oid_ret, void *cbdata)
|
|
|
|
{
|
|
|
|
struct repository *r = ((struct blame_scoreboard *)cbdata)->repo;
|
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
oidcpy(&oid, oid_ret);
|
|
|
|
while (1) {
|
|
|
|
struct object *obj;
|
|
|
|
int kind = oid_object_info(r, &oid, NULL);
|
|
|
|
if (kind == OBJ_COMMIT) {
|
|
|
|
oidcpy(oid_ret, &oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (kind != OBJ_TAG)
|
|
|
|
return -1;
|
|
|
|
obj = deref_tag(r, parse_object(r, &oid), NULL, 0);
|
|
|
|
if (!obj)
|
|
|
|
return -1;
|
|
|
|
oidcpy(&oid, &obj->oid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
static void build_ignorelist(struct blame_scoreboard *sb,
|
|
|
|
struct string_list *ignore_revs_file_list,
|
|
|
|
struct string_list *ignore_rev_list)
|
|
|
|
{
|
|
|
|
struct string_list_item *i;
|
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
oidset_init(&sb->ignore_list, 0);
|
|
|
|
for_each_string_list_item(i, ignore_revs_file_list) {
|
|
|
|
if (!strcmp(i->string, ""))
|
|
|
|
oidset_clear(&sb->ignore_list);
|
|
|
|
else
|
|
|
|
oidset_parse_file_carefully(&sb->ignore_list, i->string,
|
|
|
|
peel_to_commit_oid, sb);
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
}
|
|
|
|
for_each_string_list_item(i, ignore_rev_list) {
|
|
|
|
if (repo_get_oid_committish(the_repository, i->string, &oid) ||
|
|
|
|
peel_to_commit_oid(&oid, sb))
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
die(_("cannot find revision %s to ignore"), i->string);
|
|
|
|
oidset_insert(&sb->ignore_list, &oid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_blame(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
struct rev_info revs;
|
|
|
|
const char *path;
|
|
|
|
struct blame_scoreboard sb;
|
|
|
|
struct blame_origin *o;
|
|
|
|
struct blame_entry *ent = NULL;
|
|
|
|
long dashdash_pos, lno;
|
|
|
|
struct progress_info pi = { NULL, 0 };
|
|
|
|
|
|
|
|
struct string_list range_list = STRING_LIST_INIT_NODUP;
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP;
|
|
|
|
int output_option = 0, opt = 0;
|
|
|
|
int show_stats = 0;
|
|
|
|
const char *revs_file = NULL;
|
|
|
|
const char *contents_from = NULL;
|
|
|
|
const struct option options[] = {
|
|
|
|
OPT_BOOL(0, "incremental", &incremental, N_("show blame entries as we find them, incrementally")),
|
|
|
|
OPT_BOOL('b', NULL, &blank_boundary, N_("do not show object names of boundary commits (Default: off)")),
|
|
|
|
OPT_BOOL(0, "root", &show_root, N_("do not treat root commits as boundaries (Default: off)")),
|
|
|
|
OPT_BOOL(0, "show-stats", &show_stats, N_("show work cost statistics")),
|
|
|
|
OPT_BOOL(0, "progress", &show_progress, N_("force progress reporting")),
|
|
|
|
OPT_BIT(0, "score-debug", &output_option, N_("show output score for blame entries"), OUTPUT_SHOW_SCORE),
|
|
|
|
OPT_BIT('f', "show-name", &output_option, N_("show original filename (Default: auto)"), OUTPUT_SHOW_NAME),
|
|
|
|
OPT_BIT('n', "show-number", &output_option, N_("show original linenumber (Default: off)"), OUTPUT_SHOW_NUMBER),
|
|
|
|
OPT_BIT('p', "porcelain", &output_option, N_("show in a format designed for machine consumption"), OUTPUT_PORCELAIN),
|
|
|
|
OPT_BIT(0, "line-porcelain", &output_option, N_("show porcelain format with per-line commit information"), OUTPUT_PORCELAIN|OUTPUT_LINE_PORCELAIN),
|
|
|
|
OPT_BIT('c', NULL, &output_option, N_("use the same output mode as git-annotate (Default: off)"), OUTPUT_ANNOTATE_COMPAT),
|
|
|
|
OPT_BIT('t', NULL, &output_option, N_("show raw timestamp (Default: off)"), OUTPUT_RAW_TIMESTAMP),
|
|
|
|
OPT_BIT('l', NULL, &output_option, N_("show long commit SHA1 (Default: off)"), OUTPUT_LONG_OBJECT_NAME),
|
|
|
|
OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
|
|
|
|
OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
|
|
|
|
OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
|
|
|
|
OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
|
|
|
|
OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
|
|
|
|
OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
|
|
|
|
OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
|
|
|
|
OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL),
|
|
|
|
OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
|
|
|
|
OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
|
|
|
|
OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),
|
|
|
|
OPT_CALLBACK_F('M', NULL, &opt, N_("score"), N_("find line movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback),
|
|
|
|
OPT_STRING_LIST('L', NULL, &range_list, N_("range"),
|
|
|
|
N_("process only line range <start>,<end> or function :<funcname>")),
|
|
|
|
OPT__ABBREV(&abbrev),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
struct parse_opt_ctx_t ctx;
|
|
|
|
int cmd_is_annotate = !strcmp(argv[0], "annotate");
|
|
|
|
struct range_set ranges;
|
|
|
|
unsigned int range_i;
|
|
|
|
long anchor;
|
|
|
|
const int hexsz = the_hash_algo->hexsz;
|
|
|
|
long num_lines = 0;
|
|
|
|
const char *str_usage = cmd_is_annotate ? annotate_usage : blame_usage;
|
|
|
|
const char **opt_usage = cmd_is_annotate ? annotate_opt_usage : blame_opt_usage;
|
|
|
|
|
|
|
|
setup_default_color_by_age();
|
|
|
|
git_config(git_blame_config, &output_option);
|
|
|
|
repo_init_revisions(the_repository, &revs, NULL);
|
|
|
|
revs.date_mode = blame_date_mode;
|
|
|
|
revs.diffopt.flags.allow_textconv = 1;
|
|
|
|
revs.diffopt.flags.follow_renames = 1;
|
|
|
|
|
|
|
|
save_commit_buffer = 0;
|
|
|
|
dashdash_pos = 0;
|
|
|
|
show_progress = -1;
|
|
|
|
|
|
|
|
parse_options_start(&ctx, argc, argv, prefix, options,
|
|
|
|
PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
|
|
|
|
for (;;) {
|
|
|
|
switch (parse_options_step(&ctx, options, opt_usage)) {
|
|
|
|
case PARSE_OPT_NON_OPTION:
|
|
|
|
case PARSE_OPT_UNKNOWN:
|
|
|
|
break;
|
|
|
|
case PARSE_OPT_HELP:
|
|
|
|
case PARSE_OPT_ERROR:
|
parse-options: add support for parsing subcommands
Several Git commands have subcommands to implement mutually exclusive
"operation modes", and they usually parse their subcommand argument
with a bunch of if-else if statements.
Teach parse-options to handle subcommands as well, which will result
in shorter and simpler code with consistent error handling and error
messages on unknown or missing subcommand, and it will also make
possible for our Bash completion script to handle subcommands
programmatically.
The approach is guided by the following observations:
- Most subcommands [1] are implemented in dedicated functions, and
most of those functions [2] either have a signature matching the
'int cmd_foo(int argc, const char **argc, const char *prefix)'
signature of builtin commands or can be trivially converted to
that signature, because they miss only that last prefix parameter
or have no parameters at all.
- Subcommand arguments only have long form, and they have no double
dash prefix, no negated form, and no description, and they don't
take any arguments, and can't be abbreviated.
- There must be exactly one subcommand among the arguments, or zero
if the command has a default operation mode.
- All arguments following the subcommand are considered to be
arguments of the subcommand, and, conversely, arguments meant for
the subcommand may not preceed the subcommand.
So in the end subcommand declaration and parsing would look something
like this:
parse_opt_subcommand_fn *fn = NULL;
struct option builtin_commit_graph_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"),
N_("the object directory to store the graph")),
OPT_SUBCOMMAND("verify", &fn, graph_verify),
OPT_SUBCOMMAND("write", &fn, graph_write),
OPT_END(),
};
argc = parse_options(argc, argv, prefix, options,
builtin_commit_graph_usage, 0);
return fn(argc, argv, prefix);
Here each OPT_SUBCOMMAND specifies the name of the subcommand and the
function implementing it, and the address of the same 'fn' subcommand
function pointer. parse_options() then processes the arguments until
it finds the first argument matching one of the subcommands, sets 'fn'
to the function associated with that subcommand, and returns, leaving
the rest of the arguments unprocessed. If none of the listed
subcommands is found among the arguments, parse_options() will show
usage and abort.
If a command has a default operation mode, 'fn' should be initialized
to the function implementing that mode, and parse_options() should be
invoked with the PARSE_OPT_SUBCOMMAND_OPTIONAL flag. In this case
parse_options() won't error out when not finding any subcommands, but
will return leaving 'fn' unchanged. Note that if that default
operation mode has any --options, then the PARSE_OPT_KEEP_UNKNOWN_OPT
flag is necessary as well (otherwise parse_options() would error out
upon seeing the unknown option meant to the default operation mode).
Some thoughts about the implementation:
- The same pointer to 'fn' must be specified as 'value' for each
OPT_SUBCOMMAND, because there can be only one set of mutually
exclusive subcommands; parse_options() will BUG() otherwise.
There are other ways to tell parse_options() where to put the
function associated with the subcommand given on the command line,
but I didn't like them:
- Change parse_options()'s signature by adding a pointer to
subcommand function to be set to the function associated with
the given subcommand, affecting all callsites, even those that
don't have subcommands.
- Introduce a specific parse_options_and_subcommand() variant
with that extra funcion parameter.
- I decided against automatically calling the subcommand function
from within parse_options(), because:
- There are commands that have to perform additional actions
after option parsing but before calling the function
implementing the specified subcommand.
- The return code of the subcommand is usually the return code
of the git command, but preserving the return code of the
automatically called subcommand function would have made the
API awkward.
- Also add a OPT_SUBCOMMAND_F() variant to allow specifying an
option flag: we have two subcommands that are purposefully
excluded from completion ('git remote rm' and 'git stash save'),
so they'll have to be specified with the PARSE_OPT_NOCOMPLETE
flag.
- Some of the 'parse_opt_flags' don't make sense with subcommands,
and using them is probably just an oversight or misunderstanding.
Therefore parse_options() will BUG() when invoked with any of the
following flags while the options array contains at least one
OPT_SUBCOMMAND:
- PARSE_OPT_KEEP_DASHDASH: parse_options() stops parsing
arguments when encountering a "--" argument, so it doesn't
make sense to expect and keep one before a subcommand, because
it would prevent the parsing of the subcommand.
However, this flag is allowed in combination with the
PARSE_OPT_SUBCOMMAND_OPTIONAL flag, because the double dash
might be meaningful for the command's default operation mode,
e.g. to disambiguate refs and pathspecs.
- PARSE_OPT_STOP_AT_NON_OPTION: As its name suggests, this flag
tells parse_options() to stop as soon as it encouners a
non-option argument, but subcommands are by definition not
options... so how could they be parsed, then?!
- PARSE_OPT_KEEP_UNKNOWN: This flag can be used to collect any
unknown --options and then pass them to a different command or
subsystem. Surely if a command has subcommands, then this
functionality should rather be delegated to one of those
subcommands, and not performed by the command itself.
However, this flag is allowed in combination with the
PARSE_OPT_SUBCOMMAND_OPTIONAL flag, making possible to pass
--options to the default operation mode.
- If the command with subcommands has a default operation mode, then
all arguments to the command must preceed the arguments of the
subcommand.
AFAICT we don't have any commands where this makes a difference,
because in those commands either only the command accepts any
arguments ('notes' and 'remote'), or only the default subcommand
('reflog' and 'stash'), but never both.
- The 'argv' array passed to subcommand functions currently starts
with the name of the subcommand. Keep this behavior. AFAICT no
subcommand functions depend on the actual content of 'argv[0]',
but the parse_options() call handling their options expects that
the options start at argv[1].
- To support handling subcommands programmatically in our Bash
completion script, 'git cmd --git-completion-helper' will now list
both subcommands and regular --options, if any. This means that
the completion script will have to separate subcommands (i.e.
words without a double dash prefix) from --options on its own, but
that's rather easy to do, and it's not much work either, because
the number of subcommands a command might have is rather low, and
those commands accept only a single --option or none at all. An
alternative would be to introduce a separate option that lists
only subcommands, but then the completion script would need not
one but two git invocations and command substitutions for commands
with subcommands.
Note that this change doesn't affect the behavior of our Bash
completion script, because when completing the --option of a
command with subcommands, e.g. for 'git notes --<TAB>', then all
subcommands will be filtered out anyway, as none of them will
match the word to be completed starting with that double dash
prefix.
[1] Except 'git rerere', because many of its subcommands are
implemented in the bodies of the if-else if statements parsing the
command's subcommand argument.
[2] Except 'credential', 'credential-store' and 'fsmonitor--daemon',
because some of the functions implementing their subcommands take
special parameters.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
|
|
|
case PARSE_OPT_SUBCOMMAND:
|
|
|
|
exit(129);
|
|
|
|
case PARSE_OPT_COMPLETE:
|
|
|
|
exit(0);
|
|
|
|
case PARSE_OPT_DONE:
|
|
|
|
if (ctx.argv[0])
|
|
|
|
dashdash_pos = ctx.cpidx;
|
|
|
|
goto parse_done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(ctx.argv[0], "--reverse")) {
|
|
|
|
ctx.argv[0] = "--children";
|
|
|
|
reverse = 1;
|
|
|
|
}
|
|
|
|
parse_revision_opt(&revs, &ctx, options, opt_usage);
|
|
|
|
}
|
|
|
|
parse_done:
|
|
|
|
revision_opts_finish(&revs);
|
|
|
|
no_whole_file_rename = !revs.diffopt.flags.follow_renames;
|
|
|
|
xdl_opts |= revs.diffopt.xdl_opts & XDF_INDENT_HEURISTIC;
|
|
|
|
revs.diffopt.flags.follow_renames = 0;
|
|
|
|
argc = parse_options_end(&ctx);
|
|
|
|
|
|
|
|
prepare_repo_settings(the_repository);
|
|
|
|
the_repository->settings.command_requires_full_index = 0;
|
|
|
|
|
|
|
|
if (incremental || (output_option & OUTPUT_PORCELAIN)) {
|
|
|
|
if (show_progress > 0)
|
|
|
|
die(_("--progress can't be used with --incremental or porcelain formats"));
|
|
|
|
show_progress = 0;
|
|
|
|
} else if (show_progress < 0)
|
|
|
|
show_progress = isatty(2);
|
|
|
|
|
|
|
|
if (0 < abbrev && abbrev < hexsz)
|
|
|
|
/* one more abbrev length is needed for the boundary commit */
|
|
|
|
abbrev++;
|
|
|
|
else if (!abbrev)
|
|
|
|
abbrev = hexsz;
|
|
|
|
|
|
|
|
if (revs_file && read_ancestry(revs_file))
|
|
|
|
die_errno("reading graft file '%s' failed", revs_file);
|
|
|
|
|
|
|
|
if (cmd_is_annotate) {
|
|
|
|
output_option |= OUTPUT_ANNOTATE_COMPAT;
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
blame_date_mode.type = DATE_ISO8601;
|
|
|
|
} else {
|
|
|
|
blame_date_mode = revs.date_mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The maximum width used to show the dates */
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
switch (blame_date_mode.type) {
|
|
|
|
case DATE_RFC2822:
|
|
|
|
blame_date_width = sizeof("Thu, 19 Oct 2006 16:00:04 -0700");
|
|
|
|
break;
|
|
|
|
case DATE_ISO8601_STRICT:
|
|
|
|
blame_date_width = sizeof("2006-10-19T16:00:04-07:00");
|
|
|
|
break;
|
|
|
|
case DATE_ISO8601:
|
|
|
|
blame_date_width = sizeof("2006-10-19 16:00:04 -0700");
|
|
|
|
break;
|
|
|
|
case DATE_RAW:
|
|
|
|
blame_date_width = sizeof("1161298804 -0700");
|
|
|
|
break;
|
|
|
|
case DATE_UNIX:
|
|
|
|
blame_date_width = sizeof("1161298804");
|
|
|
|
break;
|
|
|
|
case DATE_SHORT:
|
|
|
|
blame_date_width = sizeof("2006-10-19");
|
|
|
|
break;
|
|
|
|
case DATE_RELATIVE:
|
C style: use standard style for "TRANSLATORS" comments
Change all the "TRANSLATORS: [...]" comments in the C code to use the
regular Git coding style, and amend the style guide so that the
example there uses that style.
This custom style was necessary back in 2010 when the gettext support
was initially added, and was subsequently documented in commit
cbcfd4e3ea ("i18n: mention "TRANSLATORS:" marker in
Documentation/CodingGuidelines", 2014-04-18).
GNU xgettext hasn't had the parsing limitation that necessitated this
exception for almost 3 years. Since its 0.19 release on 2014-06-02
it's been able to recognize TRANSLATOR comments in the standard Git
comment syntax[1].
Usually we'd like to keep compatibility with software that's that
young, but in this case literally the only person who needs to be
using a gettext newer than 3 years old is Jiang Xin (the only person
who runs & commits "make pot" results), so I think in this case we can
make an exception.
This xgettext parsing feature was added after a thread on the Git
mailing list[2] which continued on the bug-gettext[3] list, but we
never subsequently changed our style & styleguide, do so.
There are already longstanding changes in git that use the standard
comment style & have their TRANSLATORS comments extracted properly
without getting the literal "*"'s mixed up in the text, as would
happen before xgettext 0.19.
Commit 7ff2683253 ("builtin-am: implement -i/--interactive",
2015-08-04) added one such comment, which in commit df0617bfa7 ("l10n:
git.pot: v2.6.0 round 1 (123 new, 41 removed)", 2015-09-05) got picked
up in the po/git.pot file with the right format, showing that Jiang
already runs a modern xgettext.
The xgettext parser does not handle the sort of non-standard comment
style that I'm amending here in sequencer.c, but that isn't standard
Git comment syntax anyway. With this change to sequencer.c & "make
pot" the comment in the pot file is now correct:
#. TRANSLATORS: %s will be "revert", "cherry-pick" or
-#. * "rebase -i".
+#. "rebase -i".
1. http://git.savannah.gnu.org/cgit/gettext.git/commit/?id=10af7fe6bd
2. <2ce9ec406501d112e032c8208417f8100bed04c6.1397712142.git.worldhello.net@gmail.com>
(https://public-inbox.org/git/2ce9ec406501d112e032c8208417f8100bed04c6.1397712142.git.worldhello.net@gmail.com/)
3. https://lists.gnu.org/archive/html/bug-gettext/2014-04/msg00016.html
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
/*
|
|
|
|
* TRANSLATORS: This string is used to tell us the
|
|
|
|
* maximum display width for a relative timestamp in
|
|
|
|
* "git blame" output. For C locale, "4 years, 11
|
|
|
|
* months ago", which takes 22 places, is the longest
|
|
|
|
* among various forms of relative timestamps, but
|
|
|
|
* your language may need more or fewer display
|
|
|
|
* columns.
|
|
|
|
*/
|
|
|
|
blame_date_width = utf8_strwidth(_("4 years, 11 months ago")) + 1; /* add the null */
|
|
|
|
break;
|
|
|
|
case DATE_HUMAN:
|
|
|
|
/* If the year is shown, no time is shown */
|
|
|
|
blame_date_width = sizeof("Thu Oct 19 16:00");
|
|
|
|
break;
|
|
|
|
case DATE_NORMAL:
|
|
|
|
blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700");
|
|
|
|
break;
|
|
|
|
case DATE_STRFTIME:
|
|
|
|
blame_date_width = strlen(show_date(0, 0, &blame_date_mode)) + 1; /* add the null */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
blame_date_width -= 1; /* strip the null */
|
|
|
|
|
|
|
|
if (revs.diffopt.flags.find_copies_harder)
|
|
|
|
opt |= (PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE |
|
|
|
|
PICKAXE_BLAME_COPY_HARDER);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have collected options unknown to us in argv[1..unk]
|
|
|
|
* which are to be passed to revision machinery if we are
|
|
|
|
* going to do the "bottom" processing.
|
|
|
|
*
|
|
|
|
* The remaining are:
|
|
|
|
*
|
|
|
|
* (1) if dashdash_pos != 0, it is either
|
|
|
|
* "blame [revisions] -- <path>" or
|
|
|
|
* "blame -- <path> <rev>"
|
|
|
|
*
|
|
|
|
* (2) otherwise, it is one of the two:
|
|
|
|
* "blame [revisions] <path>"
|
|
|
|
* "blame <path> <rev>"
|
|
|
|
*
|
|
|
|
* Note that we must strip out <path> from the arguments: we do not
|
|
|
|
* want the path pruning but we may want "bottom" processing.
|
|
|
|
*/
|
|
|
|
if (dashdash_pos) {
|
|
|
|
switch (argc - dashdash_pos - 1) {
|
|
|
|
case 2: /* (1b) */
|
|
|
|
if (argc != 4)
|
|
|
|
usage_with_options(opt_usage, options);
|
|
|
|
/* reorder for the new way: <rev> -- <path> */
|
|
|
|
argv[1] = argv[3];
|
|
|
|
argv[3] = argv[2];
|
|
|
|
argv[2] = "--";
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case 1: /* (1a) */
|
|
|
|
path = add_prefix(prefix, argv[--argc]);
|
|
|
|
argv[argc] = NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage_with_options(opt_usage, options);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (argc < 2)
|
|
|
|
usage_with_options(opt_usage, options);
|
blame: tighten command line parser
The command line parser of "git blame" is prepared to take an
ancient odd argument order "blame <path> <rev>" in addition to the
usual "blame [<rev>] <path>". It has at least two negative
ramifications:
- In order to tell these two apart, it checks if the last command
line argument names a path in the working tree, using
file_exists(). However, "blame <rev> <path>" is a request to
explain each and every line in the contents of <path> stored in
revision <rev> and does not need to have a working tree version
of the file. A check with file_exists() is simply wrong.
- To coerce that mistaken file_exists() check to work, the code
calls setup_work_tree() before doing so, because the path it has
is relative to the top-level of the project tree. However,
"blame <rev> <path>" MUST be usable even in a bare repository,
and there is no reason for letting setup_work_tree() complain
and die with "This operation must be run in a work tree".
To correct the former, switch to check if the last token is a
revision (and if so, parse arguments using "blame <path> <rev>"
rule). Correct the latter by getting rid of setup_work_tree() and
file_exists() check--the only case the call to this function matters
is when we are running "blame <path>" (i.e. no starting revision and
asking to blame the working tree file at <path>, digging through the
HEAD revision), but there is a call in setup_scoreboard() just
before it calls fake_working_tree_commit().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (argc == 3 && is_a_rev(argv[argc - 1])) { /* (2b) */
|
|
|
|
path = add_prefix(prefix, argv[1]);
|
|
|
|
argv[1] = argv[2];
|
blame: tighten command line parser
The command line parser of "git blame" is prepared to take an
ancient odd argument order "blame <path> <rev>" in addition to the
usual "blame [<rev>] <path>". It has at least two negative
ramifications:
- In order to tell these two apart, it checks if the last command
line argument names a path in the working tree, using
file_exists(). However, "blame <rev> <path>" is a request to
explain each and every line in the contents of <path> stored in
revision <rev> and does not need to have a working tree version
of the file. A check with file_exists() is simply wrong.
- To coerce that mistaken file_exists() check to work, the code
calls setup_work_tree() before doing so, because the path it has
is relative to the top-level of the project tree. However,
"blame <rev> <path>" MUST be usable even in a bare repository,
and there is no reason for letting setup_work_tree() complain
and die with "This operation must be run in a work tree".
To correct the former, switch to check if the last token is a
revision (and if so, parse arguments using "blame <path> <rev>"
rule). Correct the latter by getting rid of setup_work_tree() and
file_exists() check--the only case the call to this function matters
is when we are running "blame <path>" (i.e. no starting revision and
asking to blame the working tree file at <path>, digging through the
HEAD revision), but there is a call in setup_scoreboard() just
before it calls fake_working_tree_commit().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
} else { /* (2a) */
|
|
|
|
if (argc == 2 && is_a_rev(argv[1]) && !get_git_work_tree())
|
|
|
|
die("missing <path> to blame");
|
|
|
|
path = add_prefix(prefix, argv[argc - 1]);
|
|
|
|
}
|
|
|
|
argv[argc - 1] = "--";
|
|
|
|
}
|
|
|
|
|
|
|
|
revs.disable_stdin = 1;
|
|
|
|
setup_revisions(argc, argv, &revs, NULL);
|
blame: default to HEAD in a bare repo when no start commit is given
When 'git blame' is invoked without specifying the commit to start
blaming from, it starts from the given file's state in the work tree.
However, when invoked in a bare repository without a start commit,
then there is no work tree state to start from, and it dies with the
following error message:
$ git rev-parse --is-bare-repository
true
$ git blame file.c
fatal: this operation must be run in a work tree
This is misleading, because it implies that 'git blame' doesn't work
in bare repositories at all, but it does, in fact, work just fine when
it is given a commit to start from.
We could improve the error message, of course, but let's just default
to HEAD in a bare repository instead, as most likely that is what the
user wanted anyway (if they wanted to start from an other commit, then
they would have specified that in the first place).
'git annotate' is just a thin wrapper around 'git blame', so in the
same situation it printed the same misleading error message, and this
patch fixes it, too.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
if (!revs.pending.nr && is_bare_repository()) {
|
|
|
|
struct commit *head_commit;
|
|
|
|
struct object_id head_oid;
|
|
|
|
|
|
|
|
if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
|
|
|
|
&head_oid, NULL) ||
|
|
|
|
!(head_commit = lookup_commit_reference_gently(revs.repo,
|
|
|
|
&head_oid, 1)))
|
|
|
|
die("no such ref: HEAD");
|
|
|
|
|
|
|
|
add_pending_object(&revs, &head_commit->object, "HEAD");
|
|
|
|
}
|
|
|
|
|
|
|
|
init_scoreboard(&sb);
|
|
|
|
sb.revs = &revs;
|
|
|
|
sb.contents_from = contents_from;
|
|
|
|
sb.reverse = reverse;
|
|
|
|
sb.repo = the_repository;
|
blame: enable funcname blaming with userdiff driver
In blame.c::cmd_blame, we send the 'path' field of the 'sb' 'struct
blame_scoreboard' as the 'path' argument to
'line-range.c::parse_range_arg', but 'sb.path' is not set yet; it's set
to the local variable 'path' a few lines later at line 1137.
This 'path' argument is only used in 'parse_range_arg' if we are blaming
a funcname, i.e. `git blame -L :<funcname> <path>`, and in that case it
is sent to 'parse_range_funcname', where it is used to determine if a
userdiff driver should be used for said <path> to match the given
funcname.
Since 'path' is yet unset, the userdiff driver is never used, so we fall
back to the default funcname regex, which is usually not appropriate for
paths that are set to use a specific userdiff driver, and thus either we
match some unrelated lines, or we die with
fatal: -L parameter '<funcname>' starting at line 1: no match
This has been the case ever since `git blame` learned to blame a
funcname in 13b8f68c1f (log -L: :pattern:file syntax to find by
funcname, 2013-03-28).
Enable funcname blaming for paths using specific userdiff drivers by
initializing 'sb.path' earlier in 'cmd_blame', when some of its other
fields are initialized, so that it is set when passed to
'parse_range_arg'.
Add a regression test in 'annotate-tests.sh', which is sourced in
t8001-annotate.sh and t8002-blame.sh, leveraging an existing file used
to test the userdiff patterns in t4018-diff-funcname.
Also, use 'sb.path' instead of 'path' when constructing the error
message at line 1114, for consistency.
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
sb.path = path;
|
blame: add the ability to ignore commits and their changes
Commits that make formatting changes or function renames are often not
interesting when blaming a file. A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.
For example, say a file has the following git history / rev-list:
---O---A---X---B---C---D---Y---E---F
Commits X and Y both touch a particular line, and the other commits do
not:
X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);
Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);
git-blame will blame Y for the change. I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made. This
differs from -S rev-list, which specifies the list of commits to
process for the blame. We would still process Y, but just don't let the
blame 'stick.'
This patch adds the ability for users to ignore a revision with
--ignore-rev=rev, which may be repeated. They can specify a set of
files of full object names of revs, e.g. SHA-1 hashes, one per line. A
single file may be specified with the blame.ignoreRevFile config option
or with --ignore-rev-file=file. Both the config option and the command
line option may be repeated multiple times. An empty file name "" will
clear the list of revs from previously processed files. Config options
are processed before command line options.
For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the option to ignore all of the commits in that file.
Additionally, a user can use the --ignore-rev option for one-off
investigation. To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.
To make this work, we can't simply remove all ignored commits from the
rev-list. We need to diff the changes introduced by Y so that we can
ignore them. We let the blames get passed to Y, just like when
processing normally. When Y is the target, we make sure that Y does not
*keep* any blames. Any changes that Y is responsible for get passed to
its parent. Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.
The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.
One issue is that the ignored commit *did* make some change, and there is
no general solution to finding the line in the parent commit that
corresponds to a given line in the ignored commit. That makes it hard
to attribute a particular line within an ignored commit's diff
correctly.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) #include "a.h"
commit-b 12) #include "b.h"
Commit X, which we will ignore, swaps these lines:
commit-X 11) #include "b.h"
commit-X 12) #include "a.h"
We can pass that blame entry to the parent, but line 11 will be
attributed to commit A, even though "include b.h" came from commit B.
The blame mechanism will be looking at the parent's view of the file at
line number 11.
ignore_blame_entry() is set up to allow alternative algorithms for
guessing per-line blames. Any line that is not attributed to the parent
will continue to be blamed on the ignored commit as if that commit was
not ignored. Upcoming patches have the ability to detect these lines
and mark them in the blame output.
The existing algorithm is simple: blame each line on the corresponding
line in the parent's diff chunk. Any lines beyond that stay with the
target.
For example, the parent of an ignored commit has this, say at line 11:
commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d
After a commit 'X', we have:
commit-X 11) void new_func_1(void *x,
commit-X 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Commit X nets two additionally lines: 13 and 14. The current
guess_line_blames() algorithm will not attribute these to the parent,
whose diff chunk is only two lines - not four.
When we ignore with the current algorithm, we get:
commit-a 11) void new_func_1(void *x,
commit-b 12) void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14) void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d
Note that line 12 was blamed on B, though B was the commit for
new_func_2(), not new_func_1(). Even when guess_line_blames() finds a
line in the parent, it may still be incorrect.
Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list);
|
|
|
|
string_list_clear(&ignore_revs_file_list, 0);
|
|
|
|
string_list_clear(&ignore_rev_list, 0);
|
|
|
|
setup_scoreboard(&sb, &o);
|
blame: use changed-path Bloom filters
The changed-path Bloom filters help reduce the amount of tree
parsing required during history queries. Before calculating a
diff, we can ask the filter if a path changed between a commit
and its first parent. If the filter says "no" then we can move
on without parsing trees. If the filter says "maybe" then we
parse trees to discover if the answer is actually "yes" or "no".
When computing a blame, there is a section in find_origin() that
computes a diff between a commit and one of its parents. When this
is the first parent, we can check the Bloom filters before calling
diff_tree_oid().
In order to make this work with the blame machinery, we need to
initialize a struct bloom_key with the initial path. But also, we
need to add more keys to a list if a rename is detected. We then
check to see if _any_ of these keys answer "maybe" in the diff.
During development, I purposefully left out this "add a new key
when a rename is detected" to see if the test suite would catch
my error. That is how I discovered the issues with
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS from the previous change.
With that change, we can feel some confidence in the coverage of
this change.
If a user requests copy detection using "git blame -C", then there
are more places where the set of "important" files can expand. I
do not know enough about how this happens in the blame machinery.
Thus, the Bloom filter integration is explicitly disabled in this
mode. A later change could expand the bloom_key data with an
appropriate call (or calls) to add_bloom_key().
If we did not disable this mode, then the following tests would
fail:
t8003-blame-corner-cases.sh
t8011-blame-split-file.sh
Generally, this is a performance enhancement and should not
change the behavior of 'git blame' in any way. If a repo has a
commit-graph file with computed changed-path Bloom filters, then
they should notice improved performance for their 'git blame'
commands.
Here are some example timings that I found by blaming some paths
in the Linux kernel repository:
git blame arch/x86/kernel/topology.c >/dev/null
Before: 0.83s
After: 0.24s
git blame kernel/time/time.c >/dev/null
Before: 0.72s
After: 0.24s
git blame tools/perf/ui/stdio/hist.c >/dev/null
Before: 0.27s
After: 0.11s
I specifically looked for "deep" paths that were also edited many
times. As a counterpoint, the MAINTAINERS file was edited many
times but is located in the root tree. This means that the cost of
computing a diff relative to the pathspec is very small. Here are
the timings for that command:
git blame MAINTAINERS >/dev/null
Before: 20.1s
After: 18.0s
These timings are the best of five. The worst-case runs were on the
order of 2.5 minutes for both cases. Note that the MAINTAINERS file
has 18,740 lines across 17,000+ commits. This happens to be one of
the cases where this change provides the least improvement.
The lack of improvement for the MAINTAINERS file and the relatively
modest improvement for the other examples can be easily explained.
The blame machinery needs to compute line-level diffs to determine
which lines were changed by each commit. That makes up a large
proportion of the computation time, and this change does not
attempt to improve on that section of the algorithm. The
MAINTAINERS file is large and changed often, so it takes time to
determine which lines were updated by which commit. In contrast,
the code files are much smaller, and it takes longer to comute
the line-by-line diff for a single patch on the Linux mailing
lists.
Outside of the "-C" integration, I believe there is little more to
gain from the changed-path Bloom filters for 'git blame' after this
patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Changed-path Bloom filters are disabled when looking
|
|
|
|
* for copies.
|
|
|
|
*/
|
|
|
|
if (!(opt & PICKAXE_BLAME_COPY))
|
|
|
|
setup_blame_bloom_data(&sb);
|
blame: use changed-path Bloom filters
The changed-path Bloom filters help reduce the amount of tree
parsing required during history queries. Before calculating a
diff, we can ask the filter if a path changed between a commit
and its first parent. If the filter says "no" then we can move
on without parsing trees. If the filter says "maybe" then we
parse trees to discover if the answer is actually "yes" or "no".
When computing a blame, there is a section in find_origin() that
computes a diff between a commit and one of its parents. When this
is the first parent, we can check the Bloom filters before calling
diff_tree_oid().
In order to make this work with the blame machinery, we need to
initialize a struct bloom_key with the initial path. But also, we
need to add more keys to a list if a rename is detected. We then
check to see if _any_ of these keys answer "maybe" in the diff.
During development, I purposefully left out this "add a new key
when a rename is detected" to see if the test suite would catch
my error. That is how I discovered the issues with
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS from the previous change.
With that change, we can feel some confidence in the coverage of
this change.
If a user requests copy detection using "git blame -C", then there
are more places where the set of "important" files can expand. I
do not know enough about how this happens in the blame machinery.
Thus, the Bloom filter integration is explicitly disabled in this
mode. A later change could expand the bloom_key data with an
appropriate call (or calls) to add_bloom_key().
If we did not disable this mode, then the following tests would
fail:
t8003-blame-corner-cases.sh
t8011-blame-split-file.sh
Generally, this is a performance enhancement and should not
change the behavior of 'git blame' in any way. If a repo has a
commit-graph file with computed changed-path Bloom filters, then
they should notice improved performance for their 'git blame'
commands.
Here are some example timings that I found by blaming some paths
in the Linux kernel repository:
git blame arch/x86/kernel/topology.c >/dev/null
Before: 0.83s
After: 0.24s
git blame kernel/time/time.c >/dev/null
Before: 0.72s
After: 0.24s
git blame tools/perf/ui/stdio/hist.c >/dev/null
Before: 0.27s
After: 0.11s
I specifically looked for "deep" paths that were also edited many
times. As a counterpoint, the MAINTAINERS file was edited many
times but is located in the root tree. This means that the cost of
computing a diff relative to the pathspec is very small. Here are
the timings for that command:
git blame MAINTAINERS >/dev/null
Before: 20.1s
After: 18.0s
These timings are the best of five. The worst-case runs were on the
order of 2.5 minutes for both cases. Note that the MAINTAINERS file
has 18,740 lines across 17,000+ commits. This happens to be one of
the cases where this change provides the least improvement.
The lack of improvement for the MAINTAINERS file and the relatively
modest improvement for the other examples can be easily explained.
The blame machinery needs to compute line-level diffs to determine
which lines were changed by each commit. That makes up a large
proportion of the computation time, and this change does not
attempt to improve on that section of the algorithm. The
MAINTAINERS file is large and changed often, so it takes time to
determine which lines were updated by which commit. In contrast,
the code files are much smaller, and it takes longer to comute
the line-by-line diff for a single patch on the Linux mailing
lists.
Outside of the "-C" integration, I believe there is little more to
gain from the changed-path Bloom filters for 'git blame' after this
patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
|
|
|
|
lno = sb.num_lines;
|
|
|
|
|
|
|
|
if (lno && !range_list.nr)
|
|
|
|
string_list_append(&range_list, "1");
|
|
|
|
|
|
|
|
anchor = 1;
|
|
|
|
range_set_init(&ranges, range_list.nr);
|
|
|
|
for (range_i = 0; range_i < range_list.nr; ++range_i) {
|
|
|
|
long bottom, top;
|
|
|
|
if (parse_range_arg(range_list.items[range_i].string,
|
|
|
|
nth_line_cb, &sb, lno, anchor,
|
|
|
|
&bottom, &top, sb.path,
|
|
|
|
the_repository->index))
|
|
|
|
usage(str_usage);
|
|
|
|
if ((!lno && (top || bottom)) || lno < bottom)
|
|
|
|
die(Q_("file %s has only %lu line",
|
|
|
|
"file %s has only %lu lines",
|
blame: enable funcname blaming with userdiff driver
In blame.c::cmd_blame, we send the 'path' field of the 'sb' 'struct
blame_scoreboard' as the 'path' argument to
'line-range.c::parse_range_arg', but 'sb.path' is not set yet; it's set
to the local variable 'path' a few lines later at line 1137.
This 'path' argument is only used in 'parse_range_arg' if we are blaming
a funcname, i.e. `git blame -L :<funcname> <path>`, and in that case it
is sent to 'parse_range_funcname', where it is used to determine if a
userdiff driver should be used for said <path> to match the given
funcname.
Since 'path' is yet unset, the userdiff driver is never used, so we fall
back to the default funcname regex, which is usually not appropriate for
paths that are set to use a specific userdiff driver, and thus either we
match some unrelated lines, or we die with
fatal: -L parameter '<funcname>' starting at line 1: no match
This has been the case ever since `git blame` learned to blame a
funcname in 13b8f68c1f (log -L: :pattern:file syntax to find by
funcname, 2013-03-28).
Enable funcname blaming for paths using specific userdiff drivers by
initializing 'sb.path' earlier in 'cmd_blame', when some of its other
fields are initialized, so that it is set when passed to
'parse_range_arg'.
Add a regression test in 'annotate-tests.sh', which is sourced in
t8001-annotate.sh and t8002-blame.sh, leveraging an existing file used
to test the userdiff patterns in t4018-diff-funcname.
Also, use 'sb.path' instead of 'path' when constructing the error
message at line 1114, for consistency.
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
lno), sb.path, lno);
|
|
|
|
if (bottom < 1)
|
|
|
|
bottom = 1;
|
|
|
|
if (top < 1 || lno < top)
|
|
|
|
top = lno;
|
|
|
|
bottom--;
|
|
|
|
range_set_append_unsafe(&ranges, bottom, top);
|
|
|
|
anchor = top + 1;
|
|
|
|
}
|
|
|
|
sort_and_merge_range_set(&ranges);
|
|
|
|
|
|
|
|
for (range_i = ranges.nr; range_i > 0; --range_i) {
|
|
|
|
const struct range *r = &ranges.ranges[range_i - 1];
|
|
|
|
ent = blame_entry_prepend(ent, r->start, r->end, o);
|
|
|
|
num_lines += (r->end - r->start);
|
|
|
|
}
|
|
|
|
if (!num_lines)
|
|
|
|
num_lines = sb.num_lines;
|
|
|
|
|
|
|
|
o->suspects = ent;
|
|
|
|
prio_queue_put(&sb.commits, o->commit);
|
|
|
|
|
|
|
|
blame_origin_decref(o);
|
|
|
|
|
|
|
|
range_set_release(&ranges);
|
|
|
|
string_list_clear(&range_list, 0);
|
|
|
|
|
|
|
|
sb.ent = NULL;
|
|
|
|
|
|
|
|
if (blame_move_score)
|
|
|
|
sb.move_score = blame_move_score;
|
|
|
|
if (blame_copy_score)
|
|
|
|
sb.copy_score = blame_copy_score;
|
|
|
|
|
|
|
|
sb.debug = DEBUG_BLAME;
|
|
|
|
sb.on_sanity_fail = &sanity_check_on_fail;
|
|
|
|
|
|
|
|
sb.show_root = show_root;
|
|
|
|
sb.xdl_opts = xdl_opts;
|
|
|
|
sb.no_whole_file_rename = no_whole_file_rename;
|
|
|
|
|
|
|
|
read_mailmap(&mailmap);
|
|
|
|
|
|
|
|
sb.found_guilty_entry = &found_guilty_entry;
|
|
|
|
sb.found_guilty_entry_data = π
|
|
|
|
if (show_progress)
|
|
|
|
pi.progress = start_delayed_progress(_("Blaming lines"), num_lines);
|
|
|
|
|
|
|
|
assign_blame(&sb, opt);
|
|
|
|
|
|
|
|
stop_progress(&pi.progress);
|
|
|
|
|
|
|
|
if (!incremental)
|
|
|
|
setup_pager();
|
|
|
|
else
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
blame_sort_final(&sb);
|
|
|
|
|
|
|
|
blame_coalesce(&sb);
|
|
|
|
|
|
|
|
if (!(output_option & (OUTPUT_COLOR_LINE | OUTPUT_SHOW_AGE_WITH_COLOR)))
|
|
|
|
output_option |= coloring_mode;
|
|
|
|
|
|
|
|
if (!(output_option & OUTPUT_PORCELAIN)) {
|
|
|
|
find_alignment(&sb, &output_option);
|
|
|
|
if (!*repeated_meta_color &&
|
|
|
|
(output_option & OUTPUT_COLOR_LINE))
|
|
|
|
xsnprintf(repeated_meta_color,
|
|
|
|
sizeof(repeated_meta_color),
|
|
|
|
"%s", GIT_COLOR_CYAN);
|
|
|
|
}
|
|
|
|
if (output_option & OUTPUT_ANNOTATE_COMPAT)
|
|
|
|
output_option &= ~(OUTPUT_COLOR_LINE | OUTPUT_SHOW_AGE_WITH_COLOR);
|
|
|
|
|
|
|
|
output(&sb, output_option);
|
|
|
|
free((void *)sb.final_buf);
|
|
|
|
for (ent = sb.ent; ent; ) {
|
|
|
|
struct blame_entry *e = ent->next;
|
|
|
|
free(ent);
|
|
|
|
ent = e;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (show_stats) {
|
|
|
|
printf("num read blob: %d\n", sb.num_read_blob);
|
|
|
|
printf("num get patch: %d\n", sb.num_get_patch);
|
|
|
|
printf("num commits: %d\n", sb.num_commits);
|
|
|
|
}
|
blame: use changed-path Bloom filters
The changed-path Bloom filters help reduce the amount of tree
parsing required during history queries. Before calculating a
diff, we can ask the filter if a path changed between a commit
and its first parent. If the filter says "no" then we can move
on without parsing trees. If the filter says "maybe" then we
parse trees to discover if the answer is actually "yes" or "no".
When computing a blame, there is a section in find_origin() that
computes a diff between a commit and one of its parents. When this
is the first parent, we can check the Bloom filters before calling
diff_tree_oid().
In order to make this work with the blame machinery, we need to
initialize a struct bloom_key with the initial path. But also, we
need to add more keys to a list if a rename is detected. We then
check to see if _any_ of these keys answer "maybe" in the diff.
During development, I purposefully left out this "add a new key
when a rename is detected" to see if the test suite would catch
my error. That is how I discovered the issues with
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS from the previous change.
With that change, we can feel some confidence in the coverage of
this change.
If a user requests copy detection using "git blame -C", then there
are more places where the set of "important" files can expand. I
do not know enough about how this happens in the blame machinery.
Thus, the Bloom filter integration is explicitly disabled in this
mode. A later change could expand the bloom_key data with an
appropriate call (or calls) to add_bloom_key().
If we did not disable this mode, then the following tests would
fail:
t8003-blame-corner-cases.sh
t8011-blame-split-file.sh
Generally, this is a performance enhancement and should not
change the behavior of 'git blame' in any way. If a repo has a
commit-graph file with computed changed-path Bloom filters, then
they should notice improved performance for their 'git blame'
commands.
Here are some example timings that I found by blaming some paths
in the Linux kernel repository:
git blame arch/x86/kernel/topology.c >/dev/null
Before: 0.83s
After: 0.24s
git blame kernel/time/time.c >/dev/null
Before: 0.72s
After: 0.24s
git blame tools/perf/ui/stdio/hist.c >/dev/null
Before: 0.27s
After: 0.11s
I specifically looked for "deep" paths that were also edited many
times. As a counterpoint, the MAINTAINERS file was edited many
times but is located in the root tree. This means that the cost of
computing a diff relative to the pathspec is very small. Here are
the timings for that command:
git blame MAINTAINERS >/dev/null
Before: 20.1s
After: 18.0s
These timings are the best of five. The worst-case runs were on the
order of 2.5 minutes for both cases. Note that the MAINTAINERS file
has 18,740 lines across 17,000+ commits. This happens to be one of
the cases where this change provides the least improvement.
The lack of improvement for the MAINTAINERS file and the relatively
modest improvement for the other examples can be easily explained.
The blame machinery needs to compute line-level diffs to determine
which lines were changed by each commit. That makes up a large
proportion of the computation time, and this change does not
attempt to improve on that section of the algorithm. The
MAINTAINERS file is large and changed often, so it takes time to
determine which lines were updated by which commit. In contrast,
the code files are much smaller, and it takes longer to comute
the line-by-line diff for a single patch on the Linux mailing
lists.
Outside of the "-C" integration, I believe there is little more to
gain from the changed-path Bloom filters for 'git blame' after this
patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
|
|
|
|
cleanup:
|
blame: use changed-path Bloom filters
The changed-path Bloom filters help reduce the amount of tree
parsing required during history queries. Before calculating a
diff, we can ask the filter if a path changed between a commit
and its first parent. If the filter says "no" then we can move
on without parsing trees. If the filter says "maybe" then we
parse trees to discover if the answer is actually "yes" or "no".
When computing a blame, there is a section in find_origin() that
computes a diff between a commit and one of its parents. When this
is the first parent, we can check the Bloom filters before calling
diff_tree_oid().
In order to make this work with the blame machinery, we need to
initialize a struct bloom_key with the initial path. But also, we
need to add more keys to a list if a rename is detected. We then
check to see if _any_ of these keys answer "maybe" in the diff.
During development, I purposefully left out this "add a new key
when a rename is detected" to see if the test suite would catch
my error. That is how I discovered the issues with
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS from the previous change.
With that change, we can feel some confidence in the coverage of
this change.
If a user requests copy detection using "git blame -C", then there
are more places where the set of "important" files can expand. I
do not know enough about how this happens in the blame machinery.
Thus, the Bloom filter integration is explicitly disabled in this
mode. A later change could expand the bloom_key data with an
appropriate call (or calls) to add_bloom_key().
If we did not disable this mode, then the following tests would
fail:
t8003-blame-corner-cases.sh
t8011-blame-split-file.sh
Generally, this is a performance enhancement and should not
change the behavior of 'git blame' in any way. If a repo has a
commit-graph file with computed changed-path Bloom filters, then
they should notice improved performance for their 'git blame'
commands.
Here are some example timings that I found by blaming some paths
in the Linux kernel repository:
git blame arch/x86/kernel/topology.c >/dev/null
Before: 0.83s
After: 0.24s
git blame kernel/time/time.c >/dev/null
Before: 0.72s
After: 0.24s
git blame tools/perf/ui/stdio/hist.c >/dev/null
Before: 0.27s
After: 0.11s
I specifically looked for "deep" paths that were also edited many
times. As a counterpoint, the MAINTAINERS file was edited many
times but is located in the root tree. This means that the cost of
computing a diff relative to the pathspec is very small. Here are
the timings for that command:
git blame MAINTAINERS >/dev/null
Before: 20.1s
After: 18.0s
These timings are the best of five. The worst-case runs were on the
order of 2.5 minutes for both cases. Note that the MAINTAINERS file
has 18,740 lines across 17,000+ commits. This happens to be one of
the cases where this change provides the least improvement.
The lack of improvement for the MAINTAINERS file and the relatively
modest improvement for the other examples can be easily explained.
The blame machinery needs to compute line-level diffs to determine
which lines were changed by each commit. That makes up a large
proportion of the computation time, and this change does not
attempt to improve on that section of the algorithm. The
MAINTAINERS file is large and changed often, so it takes time to
determine which lines were updated by which commit. In contrast,
the code files are much smaller, and it takes longer to comute
the line-by-line diff for a single patch on the Linux mailing
lists.
Outside of the "-C" integration, I believe there is little more to
gain from the changed-path Bloom filters for 'git blame' after this
patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
cleanup_scoreboard(&sb);
|
|
|
|
release_revisions(&revs);
|
|
|
|
return 0;
|
|
|
|
}
|