Browse Source
This switches the change estimation logic used by break, rename and copy detection from delta packing code to a more line oriented one. This way, thee performance-density tradeoff by delta packing code can be made without worrying about breaking the rename detection. Signed-off-by: Junio C Hamano <junkio@cox.net>maint
Junio C Hamano
19 years ago
1 changed files with 113 additions and 28 deletions
@ -1,43 +1,128 @@
@@ -1,43 +1,128 @@
|
||||
#include "cache.h" |
||||
#include "diff.h" |
||||
#include "diffcore.h" |
||||
#include "delta.h" |
||||
#include "count-delta.h" |
||||
|
||||
static int diffcore_count_changes_1(void *src, unsigned long src_size, |
||||
void *dst, unsigned long dst_size, |
||||
unsigned long delta_limit, |
||||
unsigned long *src_copied, |
||||
unsigned long *literal_added) |
||||
{ |
||||
void *delta; |
||||
unsigned long delta_size; |
||||
|
||||
delta = diff_delta(src, src_size, |
||||
dst, dst_size, |
||||
&delta_size, delta_limit); |
||||
if (!delta) |
||||
/* If delta_limit is exceeded, we have too much differences */ |
||||
return -1; |
||||
|
||||
/* Estimate the edit size by interpreting delta. */ |
||||
if (count_delta(delta, delta_size, src_copied, literal_added)) { |
||||
free(delta); |
||||
return -1; |
||||
struct linehash { |
||||
unsigned long bytes; |
||||
unsigned long hash; |
||||
}; |
||||
|
||||
static unsigned long hash_extended_line(const unsigned char **buf_p, |
||||
unsigned long left) |
||||
{ |
||||
/* An extended line is zero or more whitespace letters (including LF) |
||||
* followed by one non whitespace letter followed by zero or more |
||||
* non LF, and terminated with by a LF (or EOF). |
||||
*/ |
||||
const unsigned char *bol = *buf_p; |
||||
const unsigned char *buf = bol; |
||||
unsigned long hashval = 0; |
||||
while (left) { |
||||
unsigned c = *buf++; |
||||
if (!c) |
||||
goto binary; |
||||
left--; |
||||
if (' ' < c) { |
||||
hashval = c; |
||||
break; |
||||
} |
||||
} |
||||
while (left) { |
||||
unsigned c = *buf++; |
||||
if (!c) |
||||
goto binary; |
||||
left--; |
||||
if (c == '\n') |
||||
break; |
||||
if (' ' < c) |
||||
hashval = hashval * 11 + c; |
||||
} |
||||
free(delta); |
||||
*buf_p = buf; |
||||
return hashval; |
||||
|
||||
binary: |
||||
*buf_p = NULL; |
||||
return 0; |
||||
} |
||||
|
||||
static int linehash_compare(const void *a_, const void *b_) |
||||
{ |
||||
struct linehash *a = (struct linehash *) a_; |
||||
struct linehash *b = (struct linehash *) b_; |
||||
if (a->hash < b->hash) return -1; |
||||
if (a->hash > b->hash) return 1; |
||||
return 0; |
||||
} |
||||
|
||||
static struct linehash *hash_lines(const unsigned char *buf, |
||||
unsigned long size) |
||||
{ |
||||
const unsigned char *eobuf = buf + size; |
||||
struct linehash *line = NULL; |
||||
int alloc = 0, used = 0; |
||||
|
||||
while (buf < eobuf) { |
||||
const unsigned char *ptr = buf; |
||||
unsigned long hash = hash_extended_line(&buf, eobuf-ptr); |
||||
if (!buf) { |
||||
free(line); |
||||
return NULL; |
||||
} |
||||
if (alloc <= used) { |
||||
alloc = alloc_nr(alloc); |
||||
line = xrealloc(line, sizeof(*line) * alloc); |
||||
} |
||||
line[used].bytes = buf - ptr; |
||||
line[used].hash = hash; |
||||
used++; |
||||
} |
||||
qsort(line, used, sizeof(*line), linehash_compare); |
||||
|
||||
/* Terminate the list */ |
||||
if (alloc <= used) |
||||
line = xrealloc(line, sizeof(*line) * (used+1)); |
||||
line[used].bytes = line[used].hash = 0; |
||||
return line; |
||||
} |
||||
|
||||
int diffcore_count_changes(void *src, unsigned long src_size, |
||||
void *dst, unsigned long dst_size, |
||||
unsigned long delta_limit, |
||||
unsigned long *src_copied, |
||||
unsigned long *literal_added) |
||||
{ |
||||
return diffcore_count_changes_1(src, src_size, |
||||
dst, dst_size, |
||||
delta_limit, |
||||
src_copied, |
||||
literal_added); |
||||
struct linehash *src_lines, *dst_lines; |
||||
unsigned long sc, la; |
||||
|
||||
src_lines = hash_lines(src, src_size); |
||||
if (!src_lines) |
||||
return -1; |
||||
dst_lines = hash_lines(dst, dst_size); |
||||
if (!dst_lines) { |
||||
free(src_lines); |
||||
return -1; |
||||
} |
||||
sc = la = 0; |
||||
while (src_lines->bytes && dst_lines->bytes) { |
||||
int cmp = linehash_compare(src_lines, dst_lines); |
||||
if (!cmp) { |
||||
sc += src_lines->bytes; |
||||
src_lines++; |
||||
dst_lines++; |
||||
continue; |
||||
} |
||||
if (cmp < 0) { |
||||
src_lines++; |
||||
continue; |
||||
} |
||||
la += dst_lines->bytes; |
||||
dst_lines++; |
||||
} |
||||
while (dst_lines->bytes) { |
||||
la += dst_lines->bytes; |
||||
dst_lines++; |
||||
} |
||||
*src_copied = sc; |
||||
*literal_added = la; |
||||
return 0; |
||||
} |
||||
|
Loading…
Reference in new issue