Browse Source

diffcore-break: similarity estimator fix.

This is a companion patch to the previous fix to diffcore-rename.
The merging-back process should use a logic similar to what is used
there.

Signed-off-by: Junio C Hamano <junkio@cox.net>
maint
Junio C Hamano 19 years ago
parent
commit
4d0f39cecf
  1. 41
      diffcore-break.c
  2. 4
      diffcore.h

41
diffcore-break.c

@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src,
* The value we return is 1 if we want the pair to be broken,
* or 0 if we do not.
*/
unsigned long delta_size, base_size, src_copied, literal_added;
int to_break = 0;
unsigned long delta_size, base_size, src_copied, literal_added,
src_removed;

*merge_score_p = 0; /* assume no deletion --- "do not break"
* is the default.
@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src, @@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src,
&src_copied, &literal_added))
return 0;

/* sanity */
if (src->size < src_copied)
src_copied = src->size;
if (dst->size < literal_added + src_copied) {
if (src_copied < dst->size)
literal_added = dst->size - src_copied;
else
literal_added = 0;
}
src_removed = src->size - src_copied;

/* Compute merge-score, which is "how much is removed
* from the source material". The clean-up stage will
* merge the surviving pair together if the score is
* less than the minimum, after rename/copy runs.
*/
if (src->size <= src_copied)
; /* all copied, nothing removed */
else {
delta_size = src->size - src_copied;
*merge_score_p = delta_size * MAX_SCORE / src->size;
}
*merge_score_p = src_removed * MAX_SCORE / src->size;

/* Extent of damage, which counts both inserts and
* deletes.
*/
if (src->size + literal_added <= src_copied)
delta_size = 0; /* avoid wrapping around */
else
delta_size = (src->size - src_copied) + literal_added;
delta_size = src_removed + literal_added;
if (delta_size * MAX_SCORE / base_size < break_score)
return 0;

/* We break if the edit exceeds the minimum.
* i.e. (break_score / MAX_SCORE < delta_size / base_size)
/* If you removed a lot without adding new material, that is
* not really a rewrite.
*/
if (break_score * base_size < delta_size * MAX_SCORE)
to_break = 1;
if ((src->size * break_score < src_removed * MAX_SCORE) &&
(literal_added * 20 < src_removed) &&
(literal_added * 20 < src_copied))
return 0;

return to_break;
return 1;
}

void diffcore_break(int break_score)

4
diffcore.h

@ -17,8 +17,8 @@ @@ -17,8 +17,8 @@
*/
#define MAX_SCORE 60000.0
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/
#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/
#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */
#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */

#define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */


Loading…
Cancel
Save