Browse Source

Merge branch 'np/delta' into next

* np/delta:
  count-delta: tweak counting of copied source material.
  diff-delta: produce optimal pack data
maint
Junio C Hamano 19 years ago
parent
commit
d2540f0203
  1. 77
      count-delta.c
  2. 77
      diff-delta.c

77
count-delta.c

@ -3,11 +3,74 @@
* The delta-parsing part is almost straight copy of patch-delta.c * The delta-parsing part is almost straight copy of patch-delta.c
* which is (C) 2005 Nicolas Pitre <nico@cam.org>. * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
*/ */
#include "cache.h"
#include "delta.h"
#include "count-delta.h"
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include "delta.h"
#include "count-delta.h" struct span {
struct span *next;
unsigned long ofs;
unsigned long end;
};

static void touch_range(struct span **span,
unsigned long ofs, unsigned long end)
{
struct span *e = *span;
struct span *p = NULL;

while (e && e->ofs <= ofs) {
again:
if (ofs < e->end) {
while (e->end < end) {
if (e->next) {
e->end = e->next->ofs;
e = e->next;
}
else {
e->end = end;
return;
}
}
return;
}
p = e;
e = e->next;
}
if (e && e->ofs <= end) {
e->ofs = ofs;
goto again;
}
else {
e = xmalloc(sizeof(*e));
e->ofs = ofs;
e->end = end;
if (p) {
e->next = p->next;
p->next = e;
}
else {
e->next = *span;
*span = e;
}
}
}

static unsigned long count_range(struct span *s)
{
struct span *t;
unsigned long sz = 0;
while (s) {
t = s;
sz += s->end - s->ofs;
s = s->next;
free(t);
}
return sz;
}


/* /*
* NOTE. We do not _interpret_ delta fully. As an approximation, we * NOTE. We do not _interpret_ delta fully. As an approximation, we
@ -21,10 +84,11 @@
int count_delta(void *delta_buf, unsigned long delta_size, int count_delta(void *delta_buf, unsigned long delta_size,
unsigned long *src_copied, unsigned long *literal_added) unsigned long *src_copied, unsigned long *literal_added)
{ {
unsigned long copied_from_source, added_literal; unsigned long added_literal;
const unsigned char *data, *top; const unsigned char *data, *top;
unsigned char cmd; unsigned char cmd;
unsigned long src_size, dst_size, out; unsigned long src_size, dst_size, out;
struct span *span = NULL;


if (delta_size < DELTA_SIZE_MIN) if (delta_size < DELTA_SIZE_MIN)
return -1; return -1;
@ -35,7 +99,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
src_size = get_delta_hdr_size(&data); src_size = get_delta_hdr_size(&data);
dst_size = get_delta_hdr_size(&data); dst_size = get_delta_hdr_size(&data);


added_literal = copied_from_source = out = 0; added_literal = out = 0;
while (data < top) { while (data < top) {
cmd = *data++; cmd = *data++;
if (cmd & 0x80) { if (cmd & 0x80) {
@ -49,7 +113,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
if (cmd & 0x40) cp_size |= (*data++ << 16); if (cmd & 0x40) cp_size |= (*data++ << 16);
if (cp_size == 0) cp_size = 0x10000; if (cp_size == 0) cp_size = 0x10000;


copied_from_source += cp_size; touch_range(&span, cp_off, cp_off+cp_size);
out += cp_size; out += cp_size;
} else { } else {
/* write literal into dst */ /* write literal into dst */
@ -59,6 +123,8 @@ int count_delta(void *delta_buf, unsigned long delta_size,
} }
} }


*src_copied = count_range(span);

/* sanity check */ /* sanity check */
if (data != top || out != dst_size) if (data != top || out != dst_size)
return -1; return -1;
@ -66,7 +132,6 @@ int count_delta(void *delta_buf, unsigned long delta_size,
/* delete size is what was _not_ copied from source. /* delete size is what was _not_ copied from source.
* edit size is that and literal additions. * edit size is that and literal additions.
*/ */
*src_copied = copied_from_source;
*literal_added = added_literal; *literal_added = added_literal;
return 0; return 0;
} }

77
diff-delta.c

@ -20,21 +20,11 @@


#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <zlib.h>
#include "delta.h" #include "delta.h"




/* block size: min = 16, max = 64k, power of 2 */
#define BLK_SIZE 16

#define MIN(a, b) ((a) < (b) ? (a) : (b))

#define GR_PRIME 0x9e370001
#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))

struct index { struct index {
const unsigned char *ptr; const unsigned char *ptr;
unsigned int val;
struct index *next; struct index *next;
}; };


@ -42,21 +32,21 @@ static struct index ** delta_index(const unsigned char *buf,
unsigned long bufsize, unsigned long bufsize,
unsigned int *hash_shift) unsigned int *hash_shift)
{ {
unsigned int hsize, hshift, entries, blksize, i; unsigned long hsize;
unsigned int hshift, i;
const unsigned char *data; const unsigned char *data;
struct index *entry, **hash; struct index *entry, **hash;
void *mem; void *mem;


/* determine index hash size */ /* determine index hash size */
entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE; hsize = bufsize / 4;
hsize = entries / 4; for (i = 8; (1 << i) < hsize && i < 16; i++);
for (i = 4; (1 << i) < hsize && i < 16; i++);
hsize = 1 << i; hsize = 1 << i;
hshift = 32 - i; hshift = i - 8;
*hash_shift = hshift; *hash_shift = hshift;


/* allocate lookup index */ /* allocate lookup index */
mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry)); mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
if (!mem) if (!mem)
return NULL; return NULL;
hash = mem; hash = mem;
@ -64,17 +54,12 @@ static struct index ** delta_index(const unsigned char *buf,
memset(hash, 0, hsize * sizeof(*hash)); memset(hash, 0, hsize * sizeof(*hash));


/* then populate it */ /* then populate it */
data = buf + entries * BLK_SIZE - BLK_SIZE; data = buf + bufsize - 2;
blksize = bufsize - (data - buf); while (data > buf) {
while (data >= buf) { entry->ptr = --data;
unsigned int val = adler32(0, data, blksize); i = data[0] ^ data[1] ^ (data[2] << hshift);
i = HASH(val, hshift);
entry->ptr = data;
entry->val = val;
entry->next = hash[i]; entry->next = hash[i];
hash[i] = entry++; hash[i] = entry++;
blksize = BLK_SIZE;
data -= BLK_SIZE;
} }


return hash; return hash;
@ -141,29 +126,27 @@ void *diff_delta(void *from_buf, unsigned long from_size,


while (data < top) { while (data < top) {
unsigned int moff = 0, msize = 0; unsigned int moff = 0, msize = 0;
unsigned int blksize = MIN(top - data, BLK_SIZE); if (data + 2 < top) {
unsigned int val = adler32(0, data, blksize); i = data[0] ^ data[1] ^ (data[2] << hash_shift);
i = HASH(val, hash_shift); for (entry = hash[i]; entry; entry = entry->next) {
for (entry = hash[i]; entry; entry = entry->next) { const unsigned char *ref = entry->ptr;
const unsigned char *ref = entry->ptr; const unsigned char *src = data;
const unsigned char *src = data; unsigned int ref_size = ref_top - ref;
unsigned int ref_size = ref_top - ref; if (ref_size > top - src)
if (entry->val != val) ref_size = top - src;
continue; if (ref_size > 0x10000)
if (ref_size > top - src) ref_size = 0x10000;
ref_size = top - src; if (ref_size <= msize)
while (ref_size && *src++ == *ref) {
ref++;
ref_size--;
}
ref_size = ref - entry->ptr;
if (ref_size > msize) {
/* this is our best match so far */
moff = entry->ptr - ref_data;
msize = ref_size;
if (msize >= 0x10000) {
msize = 0x10000;
break; break;
while (ref_size && *src++ == *ref) {
ref++;
ref_size--;
}
ref_size = ref - entry->ptr;
if (msize < ref - entry->ptr) {
/* this is our best match so far */
msize = ref - entry->ptr;
moff = entry->ptr - ref_data;
} }
} }
} }

Loading…
Cancel
Save