|
|
|
/*
|
|
|
|
* Copyright (C) 2005 Junio C Hamano
|
|
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include "cache.h"
|
|
|
|
#include "quote.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "diffcore.h"
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
#include "xdiff/xdiff.h"
|
|
|
|
|
|
|
|
static int use_size_cache;
|
|
|
|
|
|
|
|
int diff_rename_limit_default = -1;
|
|
|
|
|
|
|
|
int git_diff_config(const char *var, const char *value)
|
|
|
|
{
|
|
|
|
if (!strcmp(var, "diff.renamelimit")) {
|
|
|
|
diff_rename_limit_default = git_config_int(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return git_default_config(var, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *quote_one(const char *str)
|
|
|
|
{
|
|
|
|
int needlen;
|
|
|
|
char *xp;
|
|
|
|
|
|
|
|
if (!str)
|
|
|
|
return NULL;
|
|
|
|
needlen = quote_c_style(str, NULL, NULL, 0);
|
|
|
|
if (!needlen)
|
|
|
|
return strdup(str);
|
|
|
|
xp = xmalloc(needlen + 1);
|
|
|
|
quote_c_style(str, xp, NULL, 0);
|
|
|
|
return xp;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *quote_two(const char *one, const char *two)
|
|
|
|
{
|
|
|
|
int need_one = quote_c_style(one, NULL, NULL, 1);
|
|
|
|
int need_two = quote_c_style(two, NULL, NULL, 1);
|
|
|
|
char *xp;
|
|
|
|
|
|
|
|
if (need_one + need_two) {
|
|
|
|
if (!need_one) need_one = strlen(one);
|
|
|
|
if (!need_two) need_one = strlen(two);
|
|
|
|
|
|
|
|
xp = xmalloc(need_one + need_two + 3);
|
|
|
|
xp[0] = '"';
|
|
|
|
quote_c_style(one, xp + 1, NULL, 1);
|
|
|
|
quote_c_style(two, xp + need_one + 1, NULL, 1);
|
|
|
|
strcpy(xp + need_one + need_two + 1, "\"");
|
|
|
|
return xp;
|
|
|
|
}
|
|
|
|
need_one = strlen(one);
|
|
|
|
need_two = strlen(two);
|
|
|
|
xp = xmalloc(need_one + need_two + 1);
|
|
|
|
strcpy(xp, one);
|
|
|
|
strcpy(xp + need_one, two);
|
|
|
|
return xp;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *external_diff(void)
|
|
|
|
{
|
|
|
|
static const char *external_diff_cmd = NULL;
|
|
|
|
static int done_preparing = 0;
|
|
|
|
|
|
|
|
if (done_preparing)
|
|
|
|
return external_diff_cmd;
|
|
|
|
external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
|
|
|
|
done_preparing = 1;
|
|
|
|
return external_diff_cmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TEMPFILE_PATH_LEN 50
|
|
|
|
|
|
|
|
static struct diff_tempfile {
|
|
|
|
const char *name; /* filename external diff should read from */
|
|
|
|
char hex[41];
|
|
|
|
char mode[10];
|
|
|
|
char tmp_path[TEMPFILE_PATH_LEN];
|
|
|
|
} diff_temp[2];
|
|
|
|
|
|
|
|
static int count_lines(const char *data, int size)
|
|
|
|
{
|
|
|
|
int count, ch, completely_empty = 1, nl_just_seen = 0;
|
|
|
|
count = 0;
|
|
|
|
while (0 < size--) {
|
|
|
|
ch = *data++;
|
|
|
|
if (ch == '\n') {
|
|
|
|
count++;
|
|
|
|
nl_just_seen = 1;
|
|
|
|
completely_empty = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
nl_just_seen = 0;
|
|
|
|
completely_empty = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (completely_empty)
|
|
|
|
return 0;
|
|
|
|
if (!nl_just_seen)
|
|
|
|
count++; /* no trailing newline */
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_line_count(int count)
|
|
|
|
{
|
|
|
|
switch (count) {
|
|
|
|
case 0:
|
|
|
|
printf("0,0");
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
printf("1");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
printf("1,%d", count);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void copy_file(int prefix, const char *data, int size)
|
|
|
|
{
|
|
|
|
int ch, nl_just_seen = 1;
|
|
|
|
while (0 < size--) {
|
|
|
|
ch = *data++;
|
|
|
|
if (nl_just_seen)
|
|
|
|
putchar(prefix);
|
|
|
|
putchar(ch);
|
|
|
|
if (ch == '\n')
|
|
|
|
nl_just_seen = 1;
|
|
|
|
else
|
|
|
|
nl_just_seen = 0;
|
|
|
|
}
|
|
|
|
if (!nl_just_seen)
|
|
|
|
printf("\n\\ No newline at end of file\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emit_rewrite_diff(const char *name_a,
|
|
|
|
const char *name_b,
|
|
|
|
struct diff_filespec *one,
|
|
|
|
struct diff_filespec *two)
|
|
|
|
{
|
|
|
|
int lc_a, lc_b;
|
|
|
|
diff_populate_filespec(one, 0);
|
|
|
|
diff_populate_filespec(two, 0);
|
|
|
|
lc_a = count_lines(one->data, one->size);
|
|
|
|
lc_b = count_lines(two->data, two->size);
|
|
|
|
printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
|
|
|
|
print_line_count(lc_a);
|
|
|
|
printf(" +");
|
|
|
|
print_line_count(lc_b);
|
|
|
|
printf(" @@\n");
|
|
|
|
if (lc_a)
|
|
|
|
copy_file('-', one->data, one->size);
|
|
|
|
if (lc_b)
|
|
|
|
copy_file('+', two->data, two->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
{
|
|
|
|
if (!DIFF_FILE_VALID(one)) {
|
|
|
|
mf->ptr = ""; /* does not matter */
|
|
|
|
mf->size = 0;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else if (diff_populate_filespec(one, 0))
|
|
|
|
return -1;
|
|
|
|
mf->ptr = one->data;
|
|
|
|
mf->size = one->size;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct emit_callback {
|
|
|
|
const char **label_path;
|
|
|
|
};
|
|
|
|
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
static int fn_out(void *priv, mmbuffer_t *mb, int nbuf)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct emit_callback *ecbdata = priv;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
|
|
|
|
if (ecbdata->label_path[0]) {
|
|
|
|
printf("--- %s\n", ecbdata->label_path[0]);
|
|
|
|
printf("+++ %s\n", ecbdata->label_path[1]);
|
|
|
|
ecbdata->label_path[0] = ecbdata->label_path[1] = NULL;
|
|
|
|
}
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
for (i = 0; i < nbuf; i++)
|
|
|
|
if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FIRST_FEW_BYTES 8000
|
|
|
|
static int mmfile_is_binary(mmfile_t *mf)
|
|
|
|
{
|
|
|
|
long sz = mf->size;
|
|
|
|
if (FIRST_FEW_BYTES < sz)
|
|
|
|
sz = FIRST_FEW_BYTES;
|
|
|
|
if (memchr(mf->ptr, 0, sz))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void builtin_diff(const char *name_a,
|
|
|
|
const char *name_b,
|
|
|
|
struct diff_filespec *one,
|
|
|
|
struct diff_filespec *two,
|
|
|
|
const char *xfrm_msg,
|
|
|
|
int complete_rewrite)
|
|
|
|
{
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
mmfile_t mf1, mf2;
|
|
|
|
const char *lbl[2];
|
|
|
|
char *a_one, *b_two;
|
|
|
|
|
|
|
|
a_one = quote_two("a/", name_a);
|
|
|
|
b_two = quote_two("b/", name_b);
|
|
|
|
lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
|
|
|
|
lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
|
|
|
|
printf("diff --git %s %s\n", a_one, b_two);
|
|
|
|
if (lbl[0][0] == '/') {
|
|
|
|
/* /dev/null */
|
|
|
|
printf("new file mode %06o\n", two->mode);
|
|
|
|
if (xfrm_msg && xfrm_msg[0])
|
|
|
|
puts(xfrm_msg);
|
|
|
|
}
|
|
|
|
else if (lbl[1][0] == '/') {
|
|
|
|
printf("deleted file mode %06o\n", one->mode);
|
|
|
|
if (xfrm_msg && xfrm_msg[0])
|
|
|
|
puts(xfrm_msg);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (one->mode != two->mode) {
|
|
|
|
printf("old mode %06o\n", one->mode);
|
|
|
|
printf("new mode %06o\n", two->mode);
|
|
|
|
}
|
|
|
|
if (xfrm_msg && xfrm_msg[0])
|
|
|
|
puts(xfrm_msg);
|
|
|
|
/*
|
|
|
|
* we do not run diff between different kind
|
|
|
|
* of objects.
|
|
|
|
*/
|
|
|
|
if ((one->mode ^ two->mode) & S_IFMT)
|
|
|
|
goto free_ab_and_return;
|
|
|
|
if (complete_rewrite) {
|
|
|
|
emit_rewrite_diff(name_a, name_b, one, two);
|
|
|
|
goto free_ab_and_return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
die("unable to read files to diff");
|
|
|
|
|
|
|
|
if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))
|
|
|
|
printf("Binary files %s and %s differ\n", lbl[0], lbl[1]);
|
|
|
|
else {
|
|
|
|
/* Crazy xdl interfaces.. */
|
|
|
|
const char *diffopts = getenv("GIT_DIFF_OPTS");
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
xpparam_t xpp;
|
|
|
|
xdemitconf_t xecfg;
|
|
|
|
xdemitcb_t ecb;
|
|
|
|
struct emit_callback ecbdata;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
|
|
|
|
ecbdata.label_path = lbl;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
xpp.flags = XDF_NEED_MINIMAL;
|
|
|
|
xecfg.ctxlen = 3;
|
|
|
|
xecfg.flags = XDL_EMIT_FUNCNAMES;
|
|
|
|
if (!diffopts)
|
|
|
|
;
|
|
|
|
else if (!strncmp(diffopts, "--unified=", 10))
|
|
|
|
xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10);
|
|
|
|
else if (!strncmp(diffopts, "-u", 2))
|
|
|
|
xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10);
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
ecb.outf = fn_out;
|
|
|
|
ecb.priv = &ecbdata;
|
Use a *real* built-in diff generator
This uses a simplified libxdiff setup to generate unified diffs _without_
doing fork/execve of GNU "diff".
This has several huge advantages, for example:
Before:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m24.818s
user 0m13.332s
sys 0m8.664s
After:
[torvalds@g5 linux]$ time git diff v2.6.16.. > /dev/null
real 0m4.563s
user 0m2.944s
sys 0m1.580s
and the fact that this should be a lot more portable (ie we can ignore all
the issues with doing fork/execve under Windows).
Perhaps even more importantly, this allows us to do diffs without actually
ever writing out the git file contents to a temporary file (and without
any of the shell quoting issues on filenames etc etc).
NOTE! THIS PATCH DOES NOT DO THAT OPTIMIZATION YET! I was lazy, and the
current "diff-core" code actually will always write the temp-files,
because it used to be something that you simply had to do. So this current
one actually writes a temp-file like before, and then reads it into memory
again just to do the diff. Stupid.
But if this basic infrastructure is accepted, we can start switching over
diff-core to not write temp-files, which should speed things up even
further, especially when doing big tree-to-tree diffs.
Now, in the interest of full disclosure, I should also point out a few
downsides:
- the libxdiff algorithm is different, and I bet GNU diff has gotten a
lot more testing. And the thing is, generating a diff is not an exact
science - you can get two different diffs (and you will), and they can
both be perfectly valid. So it's not possible to "validate" the
libxdiff output by just comparing it against GNU diff.
- GNU diff does some nice eye-candy, like trying to figure out what the
last function was, and adding that information to the "@@ .." line.
libxdiff doesn't do that.
- The libxdiff thing has some known deficiencies. In particular, it gets
the "\No newline at end of file" case wrong. So this is currently for
the experimental branch only. I hope Davide will help fix it.
That said, I think the huge performance advantage, and the fact that it
integrates better is definitely worth it. But it should go into a
development branch at least due to the missing newline issue.
Technical note: this is based on libxdiff-0.17, but I did some surgery to
get rid of the extraneous fat - stuff that git doesn't need, and seriously
cutting down on mmfile_t, which had much more capabilities than the diff
algorithm either needed or used. In this version, "mmfile_t" is just a
trivial <pointer,length> tuple.
That said, I tried to keep the differences to simple removals, so that you
can do a diff between this and the libxdiff origin, and you'll basically
see just things getting deleted. Even the mmfile_t simplifications are
left in a state where the diffs should be readable.
Apologies to Davide, whom I'd love to get feedback on this all from (I
wrote my own "fill_mmfile()" for the new simpler mmfile_t format: the old
complex format had a helper function for that, but I did my surgery with
the goal in mind that eventually we _should_ just do
mmfile_t mf;
buf = read_sha1_file(sha1, type, &size);
mf->ptr = buf;
mf->size = size;
.. use "mf" directly ..
which was really a nightmare with the old "helpful" mmfile_t, and really
is that easy with the new cut-down interfaces).
[ Btw, as any hawk-eye can see from the diff, this was actually generated
with itself, so it is "self-hosting". That's about all the testing it
has gotten, along with the above kernel diff, which eye-balls correctly,
but shows the newline issue when you double-check it with "git-apply" ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
|
|
|
|
}
|
|
|
|
|
|
|
|
free_ab_and_return:
|
|
|
|
free(a_one);
|
|
|
|
free(b_two);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct diff_filespec *alloc_filespec(const char *path)
|
|
|
|
{
|
|
|
|
int namelen = strlen(path);
|
|
|
|
struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
|
|
|
|
|
|
|
|
memset(spec, 0, sizeof(*spec));
|
|
|
|
spec->path = (char *)(spec + 1);
|
|
|
|
memcpy(spec->path, path, namelen+1);
|
|
|
|
return spec;
|
|
|
|
}
|
|
|
|
|
|
|
|
void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
|
|
|
|
unsigned short mode)
|
|
|
|
{
|
|
|
|
if (mode) {
|
|
|
|
spec->mode = canon_mode(mode);
|
|
|
|
memcpy(spec->sha1, sha1, 20);
|
|
|
|
spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a name and sha1 pair, if the dircache tells us the file in
|
|
|
|
* the work tree has that object contents, return true, so that
|
|
|
|
* prepare_temp_file() does not have to inflate and extract.
|
|
|
|
*/
|
|
|
|
static int work_tree_matches(const char *name, const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
struct cache_entry *ce;
|
|
|
|
struct stat st;
|
|
|
|
int pos, len;
|
|
|
|
|
|
|
|
/* We do not read the cache ourselves here, because the
|
|
|
|
* benchmark with my previous version that always reads cache
|
|
|
|
* shows that it makes things worse for diff-tree comparing
|
|
|
|
* two linux-2.6 kernel trees in an already checked out work
|
|
|
|
* tree. This is because most diff-tree comparisons deal with
|
|
|
|
* only a small number of files, while reading the cache is
|
|
|
|
* expensive for a large project, and its cost outweighs the
|
|
|
|
* savings we get by not inflating the object to a temporary
|
|
|
|
* file. Practically, this code only helps when we are used
|
|
|
|
* by diff-cache --cached, which does read the cache before
|
|
|
|
* calling us.
|
|
|
|
*/
|
|
|
|
if (!active_cache)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
len = strlen(name);
|
|
|
|
pos = cache_name_pos(name, len);
|
|
|
|
if (pos < 0)
|
|
|
|
return 0;
|
|
|
|
ce = active_cache[pos];
|
|
|
|
if ((lstat(name, &st) < 0) ||
|
|
|
|
!S_ISREG(st.st_mode) || /* careful! */
|
|
|
|
ce_match_stat(ce, &st, 0) ||
|
|
|
|
memcmp(sha1, ce->sha1, 20))
|
|
|
|
return 0;
|
|
|
|
/* we return 1 only when we can stat, it is a regular file,
|
|
|
|
* stat information matches, and sha1 recorded in the cache
|
|
|
|
* matches. I.e. we know the file in the work tree really is
|
|
|
|
* the same as the <name, sha1> pair.
|
|
|
|
*/
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sha1_size_cache {
|
|
|
|
unsigned char sha1[20];
|
|
|
|
unsigned long size;
|
|
|
|
} **sha1_size_cache;
|
|
|
|
static int sha1_size_cache_nr, sha1_size_cache_alloc;
|
|
|
|
|
|
|
|
static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
|
|
|
|
int find_only,
|
|
|
|
unsigned long size)
|
|
|
|
{
|
|
|
|
int first, last;
|
|
|
|
struct sha1_size_cache *e;
|
|
|
|
|
|
|
|
first = 0;
|
|
|
|
last = sha1_size_cache_nr;
|
|
|
|
while (last > first) {
|
|
|
|
int cmp, next = (last + first) >> 1;
|
|
|
|
e = sha1_size_cache[next];
|
|
|
|
cmp = memcmp(e->sha1, sha1, 20);
|
|
|
|
if (!cmp)
|
|
|
|
return e;
|
|
|
|
if (cmp < 0) {
|
|
|
|
last = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
first = next+1;
|
|
|
|
}
|
|
|
|
/* not found */
|
|
|
|
if (find_only)
|
|
|
|
return NULL;
|
|
|
|
/* insert to make it at "first" */
|
|
|
|
if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
|
|
|
|
sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
|
|
|
|
sha1_size_cache = xrealloc(sha1_size_cache,
|
|
|
|
sha1_size_cache_alloc *
|
|
|
|
sizeof(*sha1_size_cache));
|
|
|
|
}
|
|
|
|
sha1_size_cache_nr++;
|
|
|
|
if (first < sha1_size_cache_nr)
|
|
|
|
memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
|
|
|
|
(sha1_size_cache_nr - first - 1) *
|
|
|
|
sizeof(*sha1_size_cache));
|
|
|
|
e = xmalloc(sizeof(struct sha1_size_cache));
|
|
|
|
sha1_size_cache[first] = e;
|
|
|
|
memcpy(e->sha1, sha1, 20);
|
|
|
|
e->size = size;
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* While doing rename detection and pickaxe operation, we may need to
|
|
|
|
* grab the data for the blob (or file) for our own in-core comparison.
|
|
|
|
* diff_filespec has data and size fields for this purpose.
|
|
|
|
*/
|
|
|
|
int diff_populate_filespec(struct diff_filespec *s, int size_only)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
if (!DIFF_FILE_VALID(s))
|
|
|
|
die("internal error: asking to populate invalid file.");
|
|
|
|
if (S_ISDIR(s->mode))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!use_size_cache)
|
|
|
|
size_only = 0;
|
|
|
|
|
|
|
|
if (s->data)
|
|
|
|
return err;
|
|
|
|
if (!s->sha1_valid ||
|
|
|
|
work_tree_matches(s->path, s->sha1)) {
|
|
|
|
struct stat st;
|
|
|
|
int fd;
|
|
|
|
if (lstat(s->path, &st) < 0) {
|
|
|
|
if (errno == ENOENT) {
|
|
|
|
err_empty:
|
|
|
|
err = -1;
|
|
|
|
empty:
|
|
|
|
s->data = "";
|
|
|
|
s->size = 0;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s->size = st.st_size;
|
|
|
|
if (!s->size)
|
|
|
|
goto empty;
|
|
|
|
if (size_only)
|
|
|
|
return 0;
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
int ret;
|
|
|
|
s->data = xmalloc(s->size);
|
|
|
|
s->should_free = 1;
|
|
|
|
ret = readlink(s->path, s->data, s->size);
|
|
|
|
if (ret < 0) {
|
|
|
|
free(s->data);
|
|
|
|
goto err_empty;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
fd = open(s->path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
goto err_empty;
|
|
|
|
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
|
|
|
|
close(fd);
|
|
|
|
if (s->data == MAP_FAILED)
|
|
|
|
goto err_empty;
|
|
|
|
s->should_munmap = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
char type[20];
|
|
|
|
struct sha1_size_cache *e;
|
|
|
|
|
|
|
|
if (size_only) {
|
|
|
|
e = locate_size_cache(s->sha1, 1, 0);
|
|
|
|
if (e) {
|
|
|
|
s->size = e->size;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!sha1_object_info(s->sha1, type, &s->size))
|
|
|
|
locate_size_cache(s->sha1, 0, s->size);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
s->data = read_sha1_file(s->sha1, type, &s->size);
|
|
|
|
s->should_free = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_free_filespec_data(struct diff_filespec *s)
|
|
|
|
{
|
|
|
|
if (s->should_free)
|
|
|
|
free(s->data);
|
|
|
|
else if (s->should_munmap)
|
|
|
|
munmap(s->data, s->size);
|
|
|
|
s->should_free = s->should_munmap = 0;
|
|
|
|
s->data = NULL;
|
|
|
|
free(s->cnt_data);
|
|
|
|
s->cnt_data = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void prep_temp_blob(struct diff_tempfile *temp,
|
|
|
|
void *blob,
|
|
|
|
unsigned long size,
|
|
|
|
const unsigned char *sha1,
|
|
|
|
int mode)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX");
|
|
|
|
if (fd < 0)
|
|
|
|
die("unable to create temp-file");
|
|
|
|
if (write(fd, blob, size) != size)
|
|
|
|
die("unable to write temp-file");
|
|
|
|
close(fd);
|
|
|
|
temp->name = temp->tmp_path;
|
|
|
|
strcpy(temp->hex, sha1_to_hex(sha1));
|
|
|
|
temp->hex[40] = 0;
|
|
|
|
sprintf(temp->mode, "%06o", mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void prepare_temp_file(const char *name,
|
|
|
|
struct diff_tempfile *temp,
|
|
|
|
struct diff_filespec *one)
|
|
|
|
{
|
|
|
|
if (!DIFF_FILE_VALID(one)) {
|
|
|
|
not_a_valid_file:
|
|
|
|
/* A '-' entry produces this for file-2, and
|
|
|
|
* a '+' entry produces this for file-1.
|
|
|
|
*/
|
|
|
|
temp->name = "/dev/null";
|
|
|
|
strcpy(temp->hex, ".");
|
|
|
|
strcpy(temp->mode, ".");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!one->sha1_valid ||
|
|
|
|
work_tree_matches(name, one->sha1)) {
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(name, &st) < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
goto not_a_valid_file;
|
|
|
|
die("stat(%s): %s", name, strerror(errno));
|
|
|
|
}
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
int ret;
|
|
|
|
char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */
|
|
|
|
if (sizeof(buf) <= st.st_size)
|
|
|
|
die("symlink too long: %s", name);
|
|
|
|
ret = readlink(name, buf, st.st_size);
|
|
|
|
if (ret < 0)
|
|
|
|
die("readlink(%s)", name);
|
|
|
|
prep_temp_blob(temp, buf, st.st_size,
|
|
|
|
(one->sha1_valid ?
|
|
|
|
one->sha1 : null_sha1),
|
|
|
|
(one->sha1_valid ?
|
|
|
|
one->mode : S_IFLNK));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* we can borrow from the file in the work tree */
|
|
|
|
temp->name = name;
|
|
|
|
if (!one->sha1_valid)
|
|
|
|
strcpy(temp->hex, sha1_to_hex(null_sha1));
|
|
|
|
else
|
|
|
|
strcpy(temp->hex, sha1_to_hex(one->sha1));
|
|
|
|
/* Even though we may sometimes borrow the
|
|
|
|
* contents from the work tree, we always want
|
|
|
|
* one->mode. mode is trustworthy even when
|
|
|
|
* !(one->sha1_valid), as long as
|
|
|
|
* DIFF_FILE_VALID(one).
|
|
|
|
*/
|
|
|
|
sprintf(temp->mode, "%06o", one->mode);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (diff_populate_filespec(one, 0))
|
|
|
|
die("cannot read data blob for %s", one->path);
|
|
|
|
prep_temp_blob(temp, one->data, one->size,
|
|
|
|
one->sha1, one->mode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_tempfile(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 2; i++)
|
|
|
|
if (diff_temp[i].name == diff_temp[i].tmp_path) {
|
|
|
|
unlink(diff_temp[i].name);
|
|
|
|
diff_temp[i].name = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_tempfile_on_signal(int signo)
|
|
|
|
{
|
|
|
|
remove_tempfile();
|
|
|
|
signal(SIGINT, SIG_DFL);
|
|
|
|
raise(signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int spawn_prog(const char *pgm, const char **arg)
|
|
|
|
{
|
|
|
|
pid_t pid;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
fflush(NULL);
|
|
|
|
pid = fork();
|
|
|
|
if (pid < 0)
|
|
|
|
die("unable to fork");
|
|
|
|
if (!pid) {
|
|
|
|
execvp(pgm, (char *const*) arg);
|
|
|
|
exit(255);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (waitpid(pid, &status, 0) < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Earlier we did not check the exit status because
|
|
|
|
* diff exits non-zero if files are different, and
|
|
|
|
* we are not interested in knowing that. It was a
|
|
|
|
* mistake which made it harder to quit a diff-*
|
|
|
|
* session that uses the git-apply-patch-script as
|
|
|
|
* the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
|
|
|
|
* should also exit non-zero only when it wants to
|
|
|
|
* abort the entire diff-* session.
|
|
|
|
*/
|
|
|
|
if (WIFEXITED(status) && !WEXITSTATUS(status))
|
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* An external diff command takes:
|
|
|
|
*
|
|
|
|
* diff-cmd name infile1 infile1-sha1 infile1-mode \
|
|
|
|
* infile2 infile2-sha1 infile2-mode [ rename-to ]
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static void run_external_diff(const char *pgm,
|
|
|
|
const char *name,
|
|
|
|
const char *other,
|
|
|
|
struct diff_filespec *one,
|
|
|
|
struct diff_filespec *two,
|
|
|
|
const char *xfrm_msg,
|
|
|
|
int complete_rewrite)
|
|
|
|
{
|
|
|
|
const char *spawn_arg[10];
|
|
|
|
struct diff_tempfile *temp = diff_temp;
|
|
|
|
int retval;
|
|
|
|
static int atexit_asked = 0;
|
|
|
|
const char *othername;
|
|
|
|
const char **arg = &spawn_arg[0];
|
|
|
|
|
|
|
|
othername = (other? other : name);
|
|
|
|
if (one && two) {
|
|
|
|
prepare_temp_file(name, &temp[0], one);
|
|
|
|
prepare_temp_file(othername, &temp[1], two);
|
|
|
|
if (! atexit_asked &&
|
|
|
|
(temp[0].name == temp[0].tmp_path ||
|
|
|
|
temp[1].name == temp[1].tmp_path)) {
|
|
|
|
atexit_asked = 1;
|
|
|
|
atexit(remove_tempfile);
|
|
|
|
}
|
|
|
|
signal(SIGINT, remove_tempfile_on_signal);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (one && two) {
|
|
|
|
*arg++ = pgm;
|
|
|
|
*arg++ = name;
|
|
|
|
*arg++ = temp[0].name;
|
|
|
|
*arg++ = temp[0].hex;
|
|
|
|
*arg++ = temp[0].mode;
|
|
|
|
*arg++ = temp[1].name;
|
|
|
|
*arg++ = temp[1].hex;
|
|
|
|
*arg++ = temp[1].mode;
|
|
|
|
if (other) {
|
|
|
|
*arg++ = other;
|
|
|
|
*arg++ = xfrm_msg;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*arg++ = pgm;
|
|
|
|
*arg++ = name;
|
|
|
|
}
|
|
|
|
*arg = NULL;
|
|
|
|
retval = spawn_prog(pgm, spawn_arg);
|
|
|
|
remove_tempfile();
|
|
|
|
if (retval) {
|
|
|
|
fprintf(stderr, "external diff died, stopping at %s.\n", name);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void run_diff_cmd(const char *pgm,
|
|
|
|
const char *name,
|
|
|
|
const char *other,
|
|
|
|
struct diff_filespec *one,
|
|
|
|
struct diff_filespec *two,
|
|
|
|
const char *xfrm_msg,
|
|
|
|
int complete_rewrite)
|
|
|
|
{
|
|
|
|
if (pgm) {
|
|
|
|
run_external_diff(pgm, name, other, one, two, xfrm_msg,
|
|
|
|
complete_rewrite);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (one && two)
|
|
|
|
builtin_diff(name, other ? other : name,
|
|
|
|
one, two, xfrm_msg, complete_rewrite);
|
|
|
|
else
|
|
|
|
printf("* Unmerged path %s\n", name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void diff_fill_sha1_info(struct diff_filespec *one)
|
|
|
|
{
|
|
|
|
if (DIFF_FILE_VALID(one)) {
|
|
|
|
if (!one->sha1_valid) {
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(one->path, &st) < 0)
|
|
|
|
die("stat %s", one->path);
|
|
|
|
if (index_path(one->sha1, one->path, &st, 0))
|
|
|
|
die("cannot hash %s\n", one->path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
memset(one->sha1, 0, 20);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void run_diff(struct diff_filepair *p, struct diff_options *o)
|
|
|
|
{
|
|
|
|
const char *pgm = external_diff();
|
|
|
|
char msg[PATH_MAX*2+300], *xfrm_msg;
|
|
|
|
struct diff_filespec *one;
|
|
|
|
struct diff_filespec *two;
|
|
|
|
const char *name;
|
|
|
|
const char *other;
|
|
|
|
char *name_munged, *other_munged;
|
|
|
|
int complete_rewrite = 0;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (DIFF_PAIR_UNMERGED(p)) {
|
|
|
|
/* unmerged */
|
|
|
|
run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
name = p->one->path;
|
|
|
|
other = (strcmp(name, p->two->path) ? p->two->path : NULL);
|
|
|
|
name_munged = quote_one(name);
|
|
|
|
other_munged = quote_one(other);
|
|
|
|
one = p->one; two = p->two;
|
|
|
|
|
|
|
|
diff_fill_sha1_info(one);
|
|
|
|
diff_fill_sha1_info(two);
|
|
|
|
|
|
|
|
len = 0;
|
|
|
|
switch (p->status) {
|
|
|
|
case DIFF_STATUS_COPIED:
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len,
|
|
|
|
"similarity index %d%%\n"
|
|
|
|
"copy from %s\n"
|
|
|
|
"copy to %s\n",
|
|
|
|
(int)(0.5 + p->score * 100.0/MAX_SCORE),
|
|
|
|
name_munged, other_munged);
|
|
|
|
break;
|
|
|
|
case DIFF_STATUS_RENAMED:
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len,
|
|
|
|
"similarity index %d%%\n"
|
|
|
|
"rename from %s\n"
|
|
|
|
"rename to %s\n",
|
|
|
|
(int)(0.5 + p->score * 100.0/MAX_SCORE),
|
|
|
|
name_munged, other_munged);
|
|
|
|
break;
|
|
|
|
case DIFF_STATUS_MODIFIED:
|
|
|
|
if (p->score) {
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len,
|
|
|
|
"dissimilarity index %d%%\n",
|
|
|
|
(int)(0.5 + p->score *
|
|
|
|
100.0/MAX_SCORE));
|
|
|
|
complete_rewrite = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* fallthru */
|
|
|
|
default:
|
|
|
|
/* nothing */
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (memcmp(one->sha1, two->sha1, 20)) {
|
|
|
|
char one_sha1[41];
|
|
|
|
int abbrev = o->full_index ? 40 : DEFAULT_ABBREV;
|
|
|
|
memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
|
|
|
|
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len,
|
|
|
|
"index %.*s..%.*s",
|
|
|
|
abbrev, one_sha1, abbrev,
|
|
|
|
sha1_to_hex(two->sha1));
|
|
|
|
if (one->mode == two->mode)
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len,
|
|
|
|
" %06o", one->mode);
|
|
|
|
len += snprintf(msg + len, sizeof(msg) - len, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len)
|
|
|
|
msg[--len] = 0;
|
|
|
|
xfrm_msg = len ? msg : NULL;
|
|
|
|
|
|
|
|
if (!pgm &&
|
|
|
|
DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
|
|
|
|
(S_IFMT & one->mode) != (S_IFMT & two->mode)) {
|
|
|
|
/* a filepair that changes between file and symlink
|
|
|
|
* needs to be split into deletion and creation.
|
|
|
|
*/
|
|
|
|
struct diff_filespec *null = alloc_filespec(two->path);
|
|
|
|
run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0);
|
|
|
|
free(null);
|
|
|
|
null = alloc_filespec(one->path);
|
|
|
|
run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0);
|
|
|
|
free(null);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
run_diff_cmd(pgm, name, other, one, two, xfrm_msg,
|
|
|
|
complete_rewrite);
|
|
|
|
|
|
|
|
free(name_munged);
|
|
|
|
free(other_munged);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_setup(struct diff_options *options)
|
|
|
|
{
|
|
|
|
memset(options, 0, sizeof(*options));
|
|
|
|
options->output_format = DIFF_FORMAT_RAW;
|
|
|
|
options->line_termination = '\n';
|
|
|
|
options->break_opt = -1;
|
|
|
|
options->rename_limit = -1;
|
|
|
|
|
|
|
|
options->change = diff_change;
|
|
|
|
options->add_remove = diff_addremove;
|
|
|
|
}
|
|
|
|
|
|
|
|
int diff_setup_done(struct diff_options *options)
|
|
|
|
{
|
|
|
|
if ((options->find_copies_harder &&
|
|
|
|
options->detect_rename != DIFF_DETECT_COPY) ||
|
|
|
|
(0 <= options->rename_limit && !options->detect_rename))
|
|
|
|
return -1;
|
|
|
|
if (options->detect_rename && options->rename_limit < 0)
|
|
|
|
options->rename_limit = diff_rename_limit_default;
|
|
|
|
if (options->setup & DIFF_SETUP_USE_CACHE) {
|
|
|
|
if (!active_cache)
|
|
|
|
/* read-cache does not die even when it fails
|
|
|
|
* so it is safe for us to do this here. Also
|
|
|
|
* it does not smudge active_cache or active_nr
|
|
|
|
* when it fails, so we do not have to worry about
|
|
|
|
* cleaning it up ourselves either.
|
|
|
|
*/
|
|
|
|
read_cache();
|
|
|
|
}
|
|
|
|
if (options->setup & DIFF_SETUP_USE_SIZE_CACHE)
|
|
|
|
use_size_cache = 1;
|
|
|
|
if (options->abbrev <= 0 || 40 < options->abbrev)
|
|
|
|
options->abbrev = 40; /* full */
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int diff_opt_parse(struct diff_options *options, const char **av, int ac)
|
|
|
|
{
|
|
|
|
const char *arg = av[0];
|
|
|
|
if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
|
|
|
|
options->output_format = DIFF_FORMAT_PATCH;
|
|
|
|
else if (!strcmp(arg, "-z"))
|
|
|
|
options->line_termination = 0;
|
|
|
|
else if (!strncmp(arg, "-l", 2))
|
|
|
|
options->rename_limit = strtoul(arg+2, NULL, 10);
|
|
|
|
else if (!strcmp(arg, "--full-index"))
|
|
|
|
options->full_index = 1;
|
|
|
|
else if (!strcmp(arg, "--name-only"))
|
|
|
|
options->output_format = DIFF_FORMAT_NAME;
|
|
|
|
else if (!strcmp(arg, "--name-status"))
|
|
|
|
options->output_format = DIFF_FORMAT_NAME_STATUS;
|
|
|
|
else if (!strcmp(arg, "-R"))
|
|
|
|
options->reverse_diff = 1;
|
|
|
|
else if (!strncmp(arg, "-S", 2))
|
|
|
|
options->pickaxe = arg + 2;
|
|
|
|
else if (!strcmp(arg, "-s"))
|
|
|
|
options->output_format = DIFF_FORMAT_NO_OUTPUT;
|
|
|
|
else if (!strncmp(arg, "-O", 2))
|
|
|
|
options->orderfile = arg + 2;
|
|
|
|
else if (!strncmp(arg, "--diff-filter=", 14))
|
|
|
|
options->filter = arg + 14;
|
|
|
|
else if (!strcmp(arg, "--pickaxe-all"))
|
|
|
|
options->pickaxe_opts = DIFF_PICKAXE_ALL;
|
|
|
|
else if (!strcmp(arg, "--pickaxe-regex"))
|
|
|
|
options->pickaxe_opts = DIFF_PICKAXE_REGEX;
|
|
|
|
else if (!strncmp(arg, "-B", 2)) {
|
|
|
|
if ((options->break_opt =
|
|
|
|
diff_scoreopt_parse(arg)) == -1)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
else if (!strncmp(arg, "-M", 2)) {
|
|
|
|
if ((options->rename_score =
|
|
|
|
diff_scoreopt_parse(arg)) == -1)
|
|
|
|
return -1;
|
|
|
|
options->detect_rename = DIFF_DETECT_RENAME;
|
|
|
|
}
|
|
|
|
else if (!strncmp(arg, "-C", 2)) {
|
|
|
|
if ((options->rename_score =
|
|
|
|
diff_scoreopt_parse(arg)) == -1)
|
|
|
|
return -1;
|
|
|
|
options->detect_rename = DIFF_DETECT_COPY;
|
|
|
|
}
|
|
|
|
else if (!strcmp(arg, "--find-copies-harder"))
|
|
|
|
options->find_copies_harder = 1;
|
|
|
|
else if (!strcmp(arg, "--abbrev"))
|
|
|
|
options->abbrev = DEFAULT_ABBREV;
|
|
|
|
else if (!strncmp(arg, "--abbrev=", 9)) {
|
|
|
|
options->abbrev = strtoul(arg + 9, NULL, 10);
|
|
|
|
if (options->abbrev < MINIMUM_ABBREV)
|
|
|
|
options->abbrev = MINIMUM_ABBREV;
|
|
|
|
else if (40 < options->abbrev)
|
|
|
|
options->abbrev = 40;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_num(const char **cp_p)
|
|
|
|
{
|
|
|
|
unsigned long num, scale;
|
|
|
|
int ch, dot;
|
|
|
|
const char *cp = *cp_p;
|
|
|
|
|
|
|
|
num = 0;
|
|
|
|
scale = 1;
|
|
|
|
dot = 0;
|
|
|
|
for(;;) {
|
|
|
|
ch = *cp;
|
|
|
|
if ( !dot && ch == '.' ) {
|
|
|
|
scale = 1;
|
|
|
|
dot = 1;
|
|
|
|
} else if ( ch == '%' ) {
|
|
|
|
scale = dot ? scale*100 : 100;
|
|
|
|
cp++; /* % is always at the end */
|
|
|
|
break;
|
|
|
|
} else if ( ch >= '0' && ch <= '9' ) {
|
|
|
|
if ( scale < 100000 ) {
|
|
|
|
scale *= 10;
|
|
|
|
num = (num*10) + (ch-'0');
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
*cp_p = cp;
|
|
|
|
|
|
|
|
/* user says num divided by scale and we say internally that
|
|
|
|
* is MAX_SCORE * num / scale.
|
|
|
|
*/
|
|
|
|
return (num >= scale) ? MAX_SCORE : (MAX_SCORE * num / scale);
|
|
|
|
}
|
|
|
|
|
|
|
|
int diff_scoreopt_parse(const char *opt)
|
|
|
|
{
|
[PATCH] diff: Update -B heuristics.
As Linus pointed out on the mailing list discussion, -B should
break a files that has many inserts even if it still keeps
enough of the original contents, so that the broken pieces can
later be matched with other files by -M or -C. However, if such
a broken pair does not get picked up by -M or -C, we would want
to apply different criteria; namely, regardless of the amount of
new material in the result, the determination of "rewrite"
should be done by looking at the amount of original material
still left in the result. If you still have the original 97
lines from a 100-line document, it does not matter if you add
your own 13 lines to make a 110-line document, or if you add 903
lines to make a 1000-line document. It is not a rewrite but an
in-place edit. On the other hand, if you did lose 97 lines from
the original, it does not matter if you added 27 lines to make a
30-line document or if you added 997 lines to make a 1000-line
document. You did a complete rewrite in either case.
This patch introduces a post-processing phase that runs after
diffcore-rename matches up broken pairs diffcore-break creates.
The purpose of this post-processing is to pick up these broken
pieces and merge them back into in-place modifications. For
this, the score parameter -B option takes is changed into a pair
of numbers, and it takes "-B99/80" format when fully spelled
out. The first number is the minimum amount of "edit" (same
definition as what diffcore-rename uses, which is "sum of
deletion and insertion") that a modification needs to have to be
broken, and the second number is the minimum amount of "delete"
a surviving broken pair must have to avoid being merged back
together. It can be abbreviated to "-B" to use default for
both, "-B9" or "-B9/" to use 90% for "edit" but default (80%)
for merge avoidance, or "-B/75" to use default (99%) "edit" and
75% for merge avoidance.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
int opt1, opt2, cmd;
|
|
|
|
|
|
|
|
if (*opt++ != '-')
|
|
|
|
return -1;
|
|
|
|
cmd = *opt++;
|
|
|
|
if (cmd != 'M' && cmd != 'C' && cmd != 'B')
|
|
|
|
return -1; /* that is not a -M, -C nor -B option */
|
|
|
|
|
|
|
|
opt1 = parse_num(&opt);
|
[PATCH] diff: Update -B heuristics.
As Linus pointed out on the mailing list discussion, -B should
break a files that has many inserts even if it still keeps
enough of the original contents, so that the broken pieces can
later be matched with other files by -M or -C. However, if such
a broken pair does not get picked up by -M or -C, we would want
to apply different criteria; namely, regardless of the amount of
new material in the result, the determination of "rewrite"
should be done by looking at the amount of original material
still left in the result. If you still have the original 97
lines from a 100-line document, it does not matter if you add
your own 13 lines to make a 110-line document, or if you add 903
lines to make a 1000-line document. It is not a rewrite but an
in-place edit. On the other hand, if you did lose 97 lines from
the original, it does not matter if you added 27 lines to make a
30-line document or if you added 997 lines to make a 1000-line
document. You did a complete rewrite in either case.
This patch introduces a post-processing phase that runs after
diffcore-rename matches up broken pairs diffcore-break creates.
The purpose of this post-processing is to pick up these broken
pieces and merge them back into in-place modifications. For
this, the score parameter -B option takes is changed into a pair
of numbers, and it takes "-B99/80" format when fully spelled
out. The first number is the minimum amount of "edit" (same
definition as what diffcore-rename uses, which is "sum of
deletion and insertion") that a modification needs to have to be
broken, and the second number is the minimum amount of "delete"
a surviving broken pair must have to avoid being merged back
together. It can be abbreviated to "-B" to use default for
both, "-B9" or "-B9/" to use 90% for "edit" but default (80%)
for merge avoidance, or "-B/75" to use default (99%) "edit" and
75% for merge avoidance.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
if (cmd != 'B')
|
|
|
|
opt2 = 0;
|
|
|
|
else {
|
|
|
|
if (*opt == 0)
|
|
|
|
opt2 = 0;
|
|
|
|
else if (*opt != '/')
|
|
|
|
return -1; /* we expect -B80/99 or -B80 */
|
|
|
|
else {
|
|
|
|
opt++;
|
|
|
|
opt2 = parse_num(&opt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (*opt != 0)
|
|
|
|
return -1;
|
[PATCH] diff: Update -B heuristics.
As Linus pointed out on the mailing list discussion, -B should
break a files that has many inserts even if it still keeps
enough of the original contents, so that the broken pieces can
later be matched with other files by -M or -C. However, if such
a broken pair does not get picked up by -M or -C, we would want
to apply different criteria; namely, regardless of the amount of
new material in the result, the determination of "rewrite"
should be done by looking at the amount of original material
still left in the result. If you still have the original 97
lines from a 100-line document, it does not matter if you add
your own 13 lines to make a 110-line document, or if you add 903
lines to make a 1000-line document. It is not a rewrite but an
in-place edit. On the other hand, if you did lose 97 lines from
the original, it does not matter if you added 27 lines to make a
30-line document or if you added 997 lines to make a 1000-line
document. You did a complete rewrite in either case.
This patch introduces a post-processing phase that runs after
diffcore-rename matches up broken pairs diffcore-break creates.
The purpose of this post-processing is to pick up these broken
pieces and merge them back into in-place modifications. For
this, the score parameter -B option takes is changed into a pair
of numbers, and it takes "-B99/80" format when fully spelled
out. The first number is the minimum amount of "edit" (same
definition as what diffcore-rename uses, which is "sum of
deletion and insertion") that a modification needs to have to be
broken, and the second number is the minimum amount of "delete"
a surviving broken pair must have to avoid being merged back
together. It can be abbreviated to "-B" to use default for
both, "-B9" or "-B9/" to use 90% for "edit" but default (80%)
for merge avoidance, or "-B/75" to use default (99%) "edit" and
75% for merge avoidance.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
return opt1 | (opt2 << 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct diff_queue_struct diff_queued_diff;
|
|
|
|
|
|
|
|
void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
|
|
|
|
{
|
|
|
|
if (queue->alloc <= queue->nr) {
|
|
|
|
queue->alloc = alloc_nr(queue->alloc);
|
|
|
|
queue->queue = xrealloc(queue->queue,
|
|
|
|
sizeof(dp) * queue->alloc);
|
|
|
|
}
|
|
|
|
queue->queue[queue->nr++] = dp;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
|
|
|
|
struct diff_filespec *one,
|
|
|
|
struct diff_filespec *two)
|
|
|
|
{
|
|
|
|
struct diff_filepair *dp = xmalloc(sizeof(*dp));
|
|
|
|
dp->one = one;
|
|
|
|
dp->two = two;
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
dp->score = 0;
|
|
|
|
dp->status = 0;
|
|
|
|
dp->source_stays = 0;
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
dp->broken_pair = 0;
|
|
|
|
if (queue)
|
|
|
|
diff_q(queue, dp);
|
|
|
|
return dp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_free_filepair(struct diff_filepair *p)
|
|
|
|
{
|
|
|
|
diff_free_filespec_data(p->one);
|
|
|
|
diff_free_filespec_data(p->two);
|
|
|
|
free(p->one);
|
|
|
|
free(p->two);
|
|
|
|
free(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is different from find_unique_abbrev() in that
|
|
|
|
* it stuffs the result with dots for alignment.
|
|
|
|
*/
|
|
|
|
const char *diff_unique_abbrev(const unsigned char *sha1, int len)
|
|
|
|
{
|
|
|
|
int abblen;
|
|
|
|
const char *abbrev;
|
|
|
|
if (len == 40)
|
|
|
|
return sha1_to_hex(sha1);
|
|
|
|
|
|
|
|
abbrev = find_unique_abbrev(sha1, len);
|
|
|
|
if (!abbrev)
|
|
|
|
return sha1_to_hex(sha1);
|
|
|
|
abblen = strlen(abbrev);
|
|
|
|
if (abblen < 37) {
|
|
|
|
static char hex[41];
|
|
|
|
if (len < abblen && abblen <= len + 2)
|
|
|
|
sprintf(hex, "%s%.*s", abbrev, len+3-abblen, "..");
|
|
|
|
else
|
|
|
|
sprintf(hex, "%s...", abbrev);
|
|
|
|
return hex;
|
|
|
|
}
|
|
|
|
return sha1_to_hex(sha1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void diff_flush_raw(struct diff_filepair *p,
|
|
|
|
int line_termination,
|
|
|
|
int inter_name_termination,
|
|
|
|
struct diff_options *options)
|
|
|
|
{
|
|
|
|
int two_paths;
|
|
|
|
char status[10];
|
|
|
|
int abbrev = options->abbrev;
|
|
|
|
const char *path_one, *path_two;
|
|
|
|
int output_format = options->output_format;
|
|
|
|
|
|
|
|
path_one = p->one->path;
|
|
|
|
path_two = p->two->path;
|
|
|
|
if (line_termination) {
|
|
|
|
path_one = quote_one(path_one);
|
|
|
|
path_two = quote_one(path_two);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->score)
|
|
|
|
sprintf(status, "%c%03d", p->status,
|
|
|
|
(int)(0.5 + p->score * 100.0/MAX_SCORE));
|
|
|
|
else {
|
|
|
|
status[0] = p->status;
|
|
|
|
status[1] = 0;
|
|
|
|
}
|
|
|
|
switch (p->status) {
|
|
|
|
case DIFF_STATUS_COPIED:
|
|
|
|
case DIFF_STATUS_RENAMED:
|
|
|
|
two_paths = 1;
|
|
|
|
break;
|
|
|
|
case DIFF_STATUS_ADDED:
|
|
|
|
case DIFF_STATUS_DELETED:
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
two_paths = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
two_paths = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (output_format != DIFF_FORMAT_NAME_STATUS) {
|
|
|
|
printf(":%06o %06o %s ",
|
|
|
|
p->one->mode, p->two->mode,
|
|
|
|
diff_unique_abbrev(p->one->sha1, abbrev));
|
|
|
|
printf("%s ",
|
|
|
|
diff_unique_abbrev(p->two->sha1, abbrev));
|
|
|
|
}
|
|
|
|
printf("%s%c%s", status, inter_name_termination, path_one);
|
|
|
|
if (two_paths)
|
|
|
|
printf("%c%s", inter_name_termination, path_two);
|
|
|
|
putchar(line_termination);
|
|
|
|
if (path_one != p->one->path)
|
|
|
|
free((void*)path_one);
|
|
|
|
if (path_two != p->two->path)
|
|
|
|
free((void*)path_two);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void diff_flush_name(struct diff_filepair *p,
|
|
|
|
int inter_name_termination,
|
|
|
|
int line_termination)
|
|
|
|
{
|
|
|
|
char *path = p->two->path;
|
|
|
|
|
|
|
|
if (line_termination)
|
|
|
|
path = quote_one(p->two->path);
|
|
|
|
else
|
|
|
|
path = p->two->path;
|
|
|
|
printf("%s%c", path, line_termination);
|
|
|
|
if (p->two->path != path)
|
|
|
|
free(path);
|
|
|
|
}
|
|
|
|
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
int diff_unmodified_pair(struct diff_filepair *p)
|
|
|
|
{
|
|
|
|
/* This function is written stricter than necessary to support
|
|
|
|
* the currently implemented transformers, but the idea is to
|
|
|
|
* let transformers to produce diff_filepairs any way they want,
|
|
|
|
* and filter and clean them up here before producing the output.
|
|
|
|
*/
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
|
|
|
|
if (DIFF_PAIR_UNMERGED(p))
|
|
|
|
return 0; /* unmerged is interesting */
|
|
|
|
|
|
|
|
one = p->one;
|
|
|
|
two = p->two;
|
|
|
|
|
|
|
|
/* deletion, addition, mode or type change
|
|
|
|
* and rename are all interesting.
|
|
|
|
*/
|
|
|
|
if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
|
|
|
|
DIFF_PAIR_MODE_CHANGED(p) ||
|
|
|
|
strcmp(one->path, two->path))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* both are valid and point at the same path. that is, we are
|
|
|
|
* dealing with a change.
|
|
|
|
*/
|
|
|
|
if (one->sha1_valid && two->sha1_valid &&
|
|
|
|
!memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
|
|
|
|
return 1; /* no change */
|
|
|
|
if (!one->sha1_valid && !two->sha1_valid)
|
|
|
|
return 1; /* both look at the same file on the filesystem. */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
{
|
|
|
|
if (diff_unmodified_pair(p))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
|
|
|
|
(DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
|
|
|
|
return; /* no tree diffs in patch format */
|
|
|
|
|
|
|
|
run_diff(p, o);
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
}
|
|
|
|
|
|
|
|
int diff_queue_is_empty(void)
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
{
|
|
|
|
struct diff_queue_struct *q = &diff_queued_diff;
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < q->nr; i++)
|
|
|
|
if (!diff_unmodified_pair(q->queue[i]))
|
|
|
|
return 0;
|
|
|
|
return 1;
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
}
|
|
|
|
|
|
|
|
#if DIFF_DEBUG
|
|
|
|
void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
|
|
|
|
x, one ? one : "",
|
|
|
|
s->path,
|
|
|
|
DIFF_FILE_VALID(s) ? "valid" : "invalid",
|
|
|
|
s->mode,
|
|
|
|
s->sha1_valid ? sha1_to_hex(s->sha1) : "");
|
|
|
|
fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
|
|
|
|
x, one ? one : "",
|
|
|
|
s->size, s->xfrm_flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_debug_filepair(const struct diff_filepair *p, int i)
|
|
|
|
{
|
|
|
|
diff_debug_filespec(p->one, i, "one");
|
|
|
|
diff_debug_filespec(p->two, i, "two");
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
fprintf(stderr, "score %d, status %c stays %d broken %d\n",
|
|
|
|
p->score, p->status ? p->status : '?',
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
p->source_stays, p->broken_pair);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
if (msg)
|
|
|
|
fprintf(stderr, "%s\n", msg);
|
|
|
|
fprintf(stderr, "q->nr = %d\n", q->nr);
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
diff_debug_filepair(p, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void diff_resolve_rename_copy(void)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
struct diff_filepair *p, *pp;
|
|
|
|
struct diff_queue_struct *q = &diff_queued_diff;
|
|
|
|
|
|
|
|
diff_debug_queue("resolve-rename-copy", q);
|
|
|
|
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
p = q->queue[i];
|
|
|
|
p->status = 0; /* undecided */
|
|
|
|
if (DIFF_PAIR_UNMERGED(p))
|
|
|
|
p->status = DIFF_STATUS_UNMERGED;
|
|
|
|
else if (!DIFF_FILE_VALID(p->one))
|
|
|
|
p->status = DIFF_STATUS_ADDED;
|
|
|
|
else if (!DIFF_FILE_VALID(p->two))
|
|
|
|
p->status = DIFF_STATUS_DELETED;
|
|
|
|
else if (DIFF_PAIR_TYPE_CHANGED(p))
|
|
|
|
p->status = DIFF_STATUS_TYPE_CHANGED;
|
|
|
|
|
|
|
|
/* from this point on, we are dealing with a pair
|
|
|
|
* whose both sides are valid and of the same type, i.e.
|
|
|
|
* either in-place edit or rename/copy edit.
|
|
|
|
*/
|
|
|
|
else if (DIFF_PAIR_RENAME(p)) {
|
|
|
|
if (p->source_stays) {
|
|
|
|
p->status = DIFF_STATUS_COPIED;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* See if there is some other filepair that
|
|
|
|
* copies from the same source as us. If so
|
|
|
|
* we are a copy. Otherwise we are either a
|
|
|
|
* copy if the path stays, or a rename if it
|
|
|
|
* does not, but we already handled "stays" case.
|
|
|
|
*/
|
|
|
|
for (j = i + 1; j < q->nr; j++) {
|
|
|
|
pp = q->queue[j];
|
|
|
|
if (strcmp(pp->one->path, p->one->path))
|
|
|
|
continue; /* not us */
|
|
|
|
if (!DIFF_PAIR_RENAME(pp))
|
|
|
|
continue; /* not a rename/copy */
|
|
|
|
/* pp is a rename/copy from the same source */
|
|
|
|
p->status = DIFF_STATUS_COPIED;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!p->status)
|
|
|
|
p->status = DIFF_STATUS_RENAMED;
|
|
|
|
}
|
|
|
|
else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
|
|
|
|
p->one->mode != p->two->mode)
|
|
|
|
p->status = DIFF_STATUS_MODIFIED;
|
|
|
|
else {
|
|
|
|
/* This is a "no-change" entry and should not
|
|
|
|
* happen anymore, but prepare for broken callers.
|
|
|
|
*/
|
|
|
|
error("feeding unmodified %s to diffcore",
|
|
|
|
p->one->path);
|
|
|
|
p->status = DIFF_STATUS_UNKNOWN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
diff_debug_queue("resolve-rename-copy done", q);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_flush(struct diff_options *options)
|
|
|
|
{
|
|
|
|
struct diff_queue_struct *q = &diff_queued_diff;
|
|
|
|
int i;
|
|
|
|
int inter_name_termination = '\t';
|
|
|
|
int diff_output_format = options->output_format;
|
|
|
|
int line_termination = options->line_termination;
|
|
|
|
|
|
|
|
if (!line_termination)
|
|
|
|
inter_name_termination = 0;
|
|
|
|
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
|
|
|
|
switch (p->status) {
|
|
|
|
case DIFF_STATUS_UNKNOWN:
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
die("internal error in diff-resolve-rename-copy");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
switch (diff_output_format) {
|
|
|
|
case DIFF_FORMAT_PATCH:
|
|
|
|
diff_flush_patch(p, options);
|
|
|
|
break;
|
|
|
|
case DIFF_FORMAT_RAW:
|
|
|
|
case DIFF_FORMAT_NAME_STATUS:
|
|
|
|
diff_flush_raw(p, line_termination,
|
|
|
|
inter_name_termination,
|
|
|
|
options);
|
|
|
|
break;
|
|
|
|
case DIFF_FORMAT_NAME:
|
|
|
|
diff_flush_name(p,
|
|
|
|
inter_name_termination,
|
|
|
|
line_termination);
|
|
|
|
break;
|
|
|
|
case DIFF_FORMAT_NO_OUTPUT:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
diff_free_filepair(p);
|
|
|
|
}
|
|
|
|
free(q->queue);
|
|
|
|
q->queue = NULL;
|
|
|
|
q->nr = q->alloc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void diffcore_apply_filter(const char *filter)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct diff_queue_struct *q = &diff_queued_diff;
|
|
|
|
struct diff_queue_struct outq;
|
|
|
|
outq.queue = NULL;
|
|
|
|
outq.nr = outq.alloc = 0;
|
|
|
|
|
|
|
|
if (!filter)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
|
|
|
|
int found;
|
|
|
|
for (i = found = 0; !found && i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
if (((p->status == DIFF_STATUS_MODIFIED) &&
|
|
|
|
((p->score &&
|
|
|
|
strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
|
|
|
|
(!p->score &&
|
|
|
|
strchr(filter, DIFF_STATUS_MODIFIED)))) ||
|
|
|
|
((p->status != DIFF_STATUS_MODIFIED) &&
|
|
|
|
strchr(filter, p->status)))
|
|
|
|
found++;
|
|
|
|
}
|
|
|
|
if (found)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* otherwise we will clear the whole queue
|
|
|
|
* by copying the empty outq at the end of this
|
|
|
|
* function, but first clear the current entries
|
|
|
|
* in the queue.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < q->nr; i++)
|
|
|
|
diff_free_filepair(q->queue[i]);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* Only the matching ones */
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
|
|
|
|
if (((p->status == DIFF_STATUS_MODIFIED) &&
|
|
|
|
((p->score &&
|
|
|
|
strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
|
|
|
|
(!p->score &&
|
|
|
|
strchr(filter, DIFF_STATUS_MODIFIED)))) ||
|
|
|
|
((p->status != DIFF_STATUS_MODIFIED) &&
|
|
|
|
strchr(filter, p->status)))
|
|
|
|
diff_q(&outq, p);
|
|
|
|
else
|
|
|
|
diff_free_filepair(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(q->queue);
|
|
|
|
*q = outq;
|
|
|
|
}
|
|
|
|
|
|
|
|
void diffcore_std(struct diff_options *options)
|
|
|
|
{
|
|
|
|
if (options->paths && options->paths[0])
|
|
|
|
diffcore_pathspec(options->paths);
|
|
|
|
if (options->break_opt != -1)
|
|
|
|
diffcore_break(options->break_opt);
|
|
|
|
if (options->detect_rename)
|
|
|
|
diffcore_rename(options);
|
|
|
|
if (options->break_opt != -1)
|
[PATCH] diff: Update -B heuristics.
As Linus pointed out on the mailing list discussion, -B should
break a files that has many inserts even if it still keeps
enough of the original contents, so that the broken pieces can
later be matched with other files by -M or -C. However, if such
a broken pair does not get picked up by -M or -C, we would want
to apply different criteria; namely, regardless of the amount of
new material in the result, the determination of "rewrite"
should be done by looking at the amount of original material
still left in the result. If you still have the original 97
lines from a 100-line document, it does not matter if you add
your own 13 lines to make a 110-line document, or if you add 903
lines to make a 1000-line document. It is not a rewrite but an
in-place edit. On the other hand, if you did lose 97 lines from
the original, it does not matter if you added 27 lines to make a
30-line document or if you added 997 lines to make a 1000-line
document. You did a complete rewrite in either case.
This patch introduces a post-processing phase that runs after
diffcore-rename matches up broken pairs diffcore-break creates.
The purpose of this post-processing is to pick up these broken
pieces and merge them back into in-place modifications. For
this, the score parameter -B option takes is changed into a pair
of numbers, and it takes "-B99/80" format when fully spelled
out. The first number is the minimum amount of "edit" (same
definition as what diffcore-rename uses, which is "sum of
deletion and insertion") that a modification needs to have to be
broken, and the second number is the minimum amount of "delete"
a surviving broken pair must have to avoid being merged back
together. It can be abbreviated to "-B" to use default for
both, "-B9" or "-B9/" to use 90% for "edit" but default (80%)
for merge avoidance, or "-B/75" to use default (99%) "edit" and
75% for merge avoidance.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
diffcore_merge_broken();
|
|
|
|
if (options->pickaxe)
|
|
|
|
diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
|
|
|
|
if (options->orderfile)
|
|
|
|
diffcore_order(options->orderfile);
|
|
|
|
diff_resolve_rename_copy();
|
|
|
|
diffcore_apply_filter(options->filter);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void diffcore_std_no_resolve(struct diff_options *options)
|
|
|
|
{
|
|
|
|
if (options->pickaxe)
|
|
|
|
diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
|
|
|
|
if (options->orderfile)
|
|
|
|
diffcore_order(options->orderfile);
|
|
|
|
diffcore_apply_filter(options->filter);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_addremove(struct diff_options *options,
|
|
|
|
int addremove, unsigned mode,
|
|
|
|
const unsigned char *sha1,
|
|
|
|
const char *base, const char *path)
|
|
|
|
{
|
|
|
|
char concatpath[PATH_MAX];
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
|
|
|
|
/* This may look odd, but it is a preparation for
|
|
|
|
* feeding "there are unchanged files which should
|
|
|
|
* not produce diffs, but when you are doing copy
|
|
|
|
* detection you would need them, so here they are"
|
|
|
|
* entries to the diff-core. They will be prefixed
|
|
|
|
* with something like '=' or '*' (I haven't decided
|
|
|
|
* which but should not make any difference).
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
* Feeding the same new and old to diff_change()
|
|
|
|
* also has the same effect.
|
|
|
|
* Before the final output happens, they are pruned after
|
|
|
|
* merged into rename/copy pairs as appropriate.
|
|
|
|
*/
|
|
|
|
if (options->reverse_diff)
|
|
|
|
addremove = (addremove == '+' ? '-' :
|
|
|
|
addremove == '-' ? '+' : addremove);
|
|
|
|
|
|
|
|
if (!path) path = "";
|
|
|
|
sprintf(concatpath, "%s%s", base, path);
|
|
|
|
one = alloc_filespec(concatpath);
|
|
|
|
two = alloc_filespec(concatpath);
|
|
|
|
|
|
|
|
if (addremove != '+')
|
|
|
|
fill_filespec(one, sha1, mode);
|
|
|
|
if (addremove != '-')
|
|
|
|
fill_filespec(two, sha1, mode);
|
|
|
|
|
|
|
|
diff_queue(&diff_queued_diff, one, two);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_change(struct diff_options *options,
|
|
|
|
unsigned old_mode, unsigned new_mode,
|
|
|
|
const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1,
|
|
|
|
const char *base, const char *path)
|
|
|
|
{
|
|
|
|
char concatpath[PATH_MAX];
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
|
|
|
|
if (options->reverse_diff) {
|
|
|
|
unsigned tmp;
|
|
|
|
const unsigned char *tmp_c;
|
|
|
|
tmp = old_mode; old_mode = new_mode; new_mode = tmp;
|
|
|
|
tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
|
|
|
|
}
|
|
|
|
if (!path) path = "";
|
|
|
|
sprintf(concatpath, "%s%s", base, path);
|
|
|
|
one = alloc_filespec(concatpath);
|
|
|
|
two = alloc_filespec(concatpath);
|
|
|
|
fill_filespec(one, old_sha1, old_mode);
|
|
|
|
fill_filespec(two, new_sha1, new_mode);
|
|
|
|
|
|
|
|
diff_queue(&diff_queued_diff, one, two);
|
|
|
|
}
|
|
|
|
|
|
|
|
void diff_unmerge(struct diff_options *options,
|
|
|
|
const char *path)
|
|
|
|
{
|
|
|
|
struct diff_filespec *one, *two;
|
|
|
|
one = alloc_filespec(path);
|
|
|
|
two = alloc_filespec(path);
|
|
|
|
diff_queue(&diff_queued_diff, one, two);
|
|
|
|
}
|