Browse Source
A new diffcore transformation, diffcore-break.c, is introduced. When the -B flag is given, a patch that represents a complete rewrite is broken into a deletion followed by a creation. This makes it easier to review such a complete rewrite patch. The -B flag takes the same syntax as the -M and -C flags to specify the minimum amount of non-source material the resulting file needs to have to be considered a complete rewrite, and defaults to 99% if not specified. As the new test t4008-diff-break-rewrite.sh demonstrates, if a file is a complete rewrite, it is broken into a delete/create pair, which can further be subjected to the usual rename detection if -M or -C is used. For example, if file0 gets completely rewritten to make it as if it were rather based on file1 which itself disappeared, the following happens: The original change looks like this: file0 --> file0' (quite different from file0) file1 --> /dev/null After diffcore-break runs, it would become this: file0 --> /dev/null /dev/null --> file0' file1 --> /dev/null Then diffcore-rename matches them up: file1 --> file0' The internal score values are finer grained now. Earlier maximum of 10000 has been raised to 60000; there is no user visible changes but there is no reason to waste available bits. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>maint
Junio C Hamano
20 years ago
committed by
Linus Torvalds
13 changed files with 433 additions and 29 deletions
@ -0,0 +1,127 @@
@@ -0,0 +1,127 @@
|
||||
/* |
||||
* Copyright (C) 2005 Junio C Hamano |
||||
*/ |
||||
#include "cache.h" |
||||
#include "diff.h" |
||||
#include "diffcore.h" |
||||
#include "delta.h" |
||||
#include "count-delta.h" |
||||
|
||||
static int very_different(struct diff_filespec *src, |
||||
struct diff_filespec *dst, |
||||
int min_score) |
||||
{ |
||||
/* dst is recorded as a modification of src. Are they so |
||||
* different that we are better off recording this as a pair |
||||
* of delete and create? min_score is the minimum amount of |
||||
* new material that must exist in the dst and not in src for |
||||
* the pair to be considered a complete rewrite, and recommended |
||||
* to be set to a very high value, 99% or so. |
||||
* |
||||
* The value we return represents the amount of new material |
||||
* that is in dst and not in src. We return 0 when we do not |
||||
* want to get the filepair broken. |
||||
*/ |
||||
void *delta; |
||||
unsigned long delta_size, base_size; |
||||
|
||||
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) |
||||
return 0; /* leave symlink rename alone */ |
||||
|
||||
if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1)) |
||||
return 0; /* error but caught downstream */ |
||||
|
||||
delta_size = ((src->size < dst->size) ? |
||||
(dst->size - src->size) : (src->size - dst->size)); |
||||
|
||||
/* Notice that we use max of src and dst as the base size, |
||||
* unlike rename similarity detection. This is so that we do |
||||
* not mistake a large addition as a complete rewrite. |
||||
*/ |
||||
base_size = ((src->size < dst->size) ? dst->size : src->size); |
||||
|
||||
/* |
||||
* If file size difference is too big compared to the |
||||
* base_size, we declare this a complete rewrite. |
||||
*/ |
||||
if (base_size * min_score < delta_size * MAX_SCORE) |
||||
return MAX_SCORE; |
||||
|
||||
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0)) |
||||
return 0; /* error but caught downstream */ |
||||
|
||||
delta = diff_delta(src->data, src->size, |
||||
dst->data, dst->size, |
||||
&delta_size); |
||||
|
||||
/* A delta that has a lot of literal additions would have |
||||
* big delta_size no matter what else it does. |
||||
*/ |
||||
if (base_size * min_score < delta_size * MAX_SCORE) |
||||
return MAX_SCORE; |
||||
|
||||
/* Estimate the edit size by interpreting delta. */ |
||||
delta_size = count_delta(delta, delta_size); |
||||
free(delta); |
||||
if (delta_size == UINT_MAX) |
||||
return 0; /* error in delta computation */ |
||||
|
||||
if (base_size < delta_size) |
||||
return MAX_SCORE; |
||||
|
||||
return delta_size * MAX_SCORE / base_size; |
||||
} |
||||
|
||||
void diffcore_break(int min_score) |
||||
{ |
||||
struct diff_queue_struct *q = &diff_queued_diff; |
||||
struct diff_queue_struct outq; |
||||
int i; |
||||
|
||||
if (!min_score) |
||||
min_score = DEFAULT_BREAK_SCORE; |
||||
|
||||
outq.nr = outq.alloc = 0; |
||||
outq.queue = NULL; |
||||
|
||||
for (i = 0; i < q->nr; i++) { |
||||
struct diff_filepair *p = q->queue[i]; |
||||
int score; |
||||
|
||||
/* We deal only with in-place edit of non directory. |
||||
* We do not break anything else. |
||||
*/ |
||||
if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) && |
||||
!S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) && |
||||
!strcmp(p->one->path, p->two->path)) { |
||||
score = very_different(p->one, p->two, min_score); |
||||
if (min_score <= score) { |
||||
/* Split this into delete and create */ |
||||
struct diff_filespec *null_one, *null_two; |
||||
struct diff_filepair *dp; |
||||
|
||||
/* deletion of one */ |
||||
null_one = alloc_filespec(p->one->path); |
||||
dp = diff_queue(&outq, p->one, null_one); |
||||
dp->score = score; |
||||
dp->broken_pair = 1; |
||||
|
||||
/* creation of two */ |
||||
null_two = alloc_filespec(p->two->path); |
||||
dp = diff_queue(&outq, null_two, p->two); |
||||
dp->score = score; |
||||
dp->broken_pair = 1; |
||||
|
||||
free(p); /* not diff_free_filepair(), we are |
||||
* reusing one and two here. |
||||
*/ |
||||
continue; |
||||
} |
||||
} |
||||
diff_q(&outq, p); |
||||
} |
||||
free(q->queue); |
||||
*q = outq; |
||||
|
||||
return; |
||||
} |
@ -0,0 +1,207 @@
@@ -0,0 +1,207 @@
|
||||
#!/bin/sh |
||||
# |
||||
# Copyright (c) 2005 Junio C Hamano |
||||
# |
||||
|
||||
test_description='Break and then rename |
||||
|
||||
We have two very different files, file0 and file1, registered in a tree. |
||||
|
||||
We update file1 so drastically that it is more similar to file0, and |
||||
then remove file0. With -B, changes to file1 should be broken into |
||||
separate delete and create, resulting in removal of file0, removal of |
||||
original file1 and creation of completely rewritten file1. |
||||
|
||||
Further, with -B and -M together, these three modifications should |
||||
turn into rename-edit of file0 into file1. |
||||
|
||||
Starting from the same two files in the tree, we swap file0 and file1. |
||||
With -B, this should be detected as two complete rewrites, resulting in |
||||
four changes in total. |
||||
|
||||
Further, with -B and -M together, these should turn into two renames. |
||||
' |
||||
. ./test-lib.sh |
||||
|
||||
_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' |
||||
_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40" |
||||
sanitize_diff_raw='s/ '"$_x40"' '"$_x40"' \([CDNR]\)[0-9]* / X X \1# /' |
||||
compare_diff_raw () { |
||||
# When heuristics are improved, the score numbers would change. |
||||
# Ignore them while comparing. |
||||
# Also we do not check SHA1 hash generation in this test, which |
||||
# is a job for t0000-basic.sh |
||||
|
||||
sed -e "$sanitize_diff_raw" <"$1" >.tmp-1 |
||||
sed -e "$sanitize_diff_raw" <"$2" >.tmp-2 |
||||
diff -u .tmp-1 .tmp-2 && rm -f .tmp-1 .tmp-2 |
||||
} |
||||
|
||||
test_expect_success \ |
||||
setup \ |
||||
'cat ../../README >file0 && |
||||
cat ../../COPYING >file1 && |
||||
git-update-cache --add file0 file1 && |
||||
tree=$(git-write-tree) && |
||||
echo "$tree"' |
||||
|
||||
test_expect_success \ |
||||
'change file1 with copy-edit of file0 and remove file0' \ |
||||
'sed -e "s/git/GIT/" file0 >file1 && |
||||
rm -f file0 && |
||||
git-update-cache --remove file0 file1' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B' \ |
||||
'git-diff-cache -B --cached "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 0000000000000000000000000000000000000000 D file0 |
||||
:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 11e331465a89c394dc25c780de230043750c1ec8 N100 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B (#1)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B and -M' \ |
||||
'git-diff-cache -B -M "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 08bb2fb671deff4c03a4d4a0a1315dff98d5732c R100 file0 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B -M (#2)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'swap file0 and file1' \ |
||||
'rm -f file0 file1 && |
||||
git-read-tree -m $tree && |
||||
git-checkout-cache -f -u -a && |
||||
mv file0 tmp && |
||||
mv file1 file0 && |
||||
mv tmp file1 && |
||||
git-update-cache file0 file1' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B' \ |
||||
'git-diff-cache -B "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 0000000000000000000000000000000000000000 D100 file0 |
||||
:000000 100644 0000000000000000000000000000000000000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 N100 file0 |
||||
:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B (#3)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B and -M' \ |
||||
'git-diff-cache -B -M "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100 file1 file0 |
||||
:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 R100 file0 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B -M (#4)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'make file0 into something completely different' \ |
||||
'rm -f file0 && |
||||
ln -s frotz file0 && |
||||
git-update-cache file0 file1' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B' \ |
||||
'git-diff-cache -B "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0 |
||||
:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B (#5)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B' \ |
||||
'git-diff-cache -B -M "$tree" >current' |
||||
|
||||
# This should not mistake file0 as the copy source of new file1 |
||||
# due to type differences. |
||||
cat >expected <<\EOF |
||||
:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0 |
||||
:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B -M (#6)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -M' \ |
||||
'git-diff-cache -M "$tree" >current' |
||||
|
||||
# This should not mistake file0 as the copy source of new file1 |
||||
# due to type differences. |
||||
cat >expected <<\EOF |
||||
:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0 |
||||
:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 M file1 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -M (#7)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'file1 edited to look like file0 and file0 rename-edited to file2' \ |
||||
'rm -f file0 file1 && |
||||
git-read-tree -m $tree && |
||||
git-checkout-cache -f -u -a && |
||||
sed -e "s/git/GIT/" file0 >file1 && |
||||
sed -e "s/git/GET/" file0 >file2 && |
||||
rm -f file0 |
||||
git-update-cache --add --remove file0 file1 file2' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B' \ |
||||
'git-diff-cache -B "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 0000000000000000000000000000000000000000 D file0 |
||||
:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 08bb2fb671deff4c03a4d4a0a1315dff98d5732c N100 file1 |
||||
:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N file2 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B (#8)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_expect_success \ |
||||
'run diff with -B -M' \ |
||||
'git-diff-cache -B -M "$tree" >current' |
||||
|
||||
cat >expected <<\EOF |
||||
:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 08bb2fb671deff4c03a4d4a0a1315dff98d5732c C095 file0 file1 |
||||
:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 59f832e5c8b3f7e486be15ad0cd3e95ba9af8998 R095 file0 file2 |
||||
EOF |
||||
|
||||
test_expect_success \ |
||||
'validate result of -B -M (#9)' \ |
||||
'compare_diff_raw current expected' |
||||
|
||||
test_done |
Loading…
Reference in new issue