Browse Source

Merge branch 'np/delta' into next

* np/delta:
  count-delta: tweak counting of copied source material.
  diff-delta: produce optimal pack data
maint
Junio C Hamano 19 years ago
parent
commit
d2540f0203
  1. 77
      count-delta.c
  2. 77
      diff-delta.c

77
count-delta.c

@ -3,11 +3,74 @@ @@ -3,11 +3,74 @@
* The delta-parsing part is almost straight copy of patch-delta.c
* which is (C) 2005 Nicolas Pitre <nico@cam.org>.
*/
#include "cache.h"
#include "delta.h"
#include "count-delta.h"
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "delta.h"
#include "count-delta.h"

struct span {
struct span *next;
unsigned long ofs;
unsigned long end;
};

static void touch_range(struct span **span,
unsigned long ofs, unsigned long end)
{
struct span *e = *span;
struct span *p = NULL;

while (e && e->ofs <= ofs) {
again:
if (ofs < e->end) {
while (e->end < end) {
if (e->next) {
e->end = e->next->ofs;
e = e->next;
}
else {
e->end = end;
return;
}
}
return;
}
p = e;
e = e->next;
}
if (e && e->ofs <= end) {
e->ofs = ofs;
goto again;
}
else {
e = xmalloc(sizeof(*e));
e->ofs = ofs;
e->end = end;
if (p) {
e->next = p->next;
p->next = e;
}
else {
e->next = *span;
*span = e;
}
}
}

static unsigned long count_range(struct span *s)
{
struct span *t;
unsigned long sz = 0;
while (s) {
t = s;
sz += s->end - s->ofs;
s = s->next;
free(t);
}
return sz;
}

/*
* NOTE. We do not _interpret_ delta fully. As an approximation, we
@ -21,10 +84,11 @@ @@ -21,10 +84,11 @@
int count_delta(void *delta_buf, unsigned long delta_size,
unsigned long *src_copied, unsigned long *literal_added)
{
unsigned long copied_from_source, added_literal;
unsigned long added_literal;
const unsigned char *data, *top;
unsigned char cmd;
unsigned long src_size, dst_size, out;
struct span *span = NULL;

if (delta_size < DELTA_SIZE_MIN)
return -1;
@ -35,7 +99,7 @@ int count_delta(void *delta_buf, unsigned long delta_size, @@ -35,7 +99,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
src_size = get_delta_hdr_size(&data);
dst_size = get_delta_hdr_size(&data);

added_literal = copied_from_source = out = 0;
added_literal = out = 0;
while (data < top) {
cmd = *data++;
if (cmd & 0x80) {
@ -49,7 +113,7 @@ int count_delta(void *delta_buf, unsigned long delta_size, @@ -49,7 +113,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
if (cmd & 0x40) cp_size |= (*data++ << 16);
if (cp_size == 0) cp_size = 0x10000;

copied_from_source += cp_size;
touch_range(&span, cp_off, cp_off+cp_size);
out += cp_size;
} else {
/* write literal into dst */
@ -59,6 +123,8 @@ int count_delta(void *delta_buf, unsigned long delta_size, @@ -59,6 +123,8 @@ int count_delta(void *delta_buf, unsigned long delta_size,
}
}

*src_copied = count_range(span);

/* sanity check */
if (data != top || out != dst_size)
return -1;
@ -66,7 +132,6 @@ int count_delta(void *delta_buf, unsigned long delta_size, @@ -66,7 +132,6 @@ int count_delta(void *delta_buf, unsigned long delta_size,
/* delete size is what was _not_ copied from source.
* edit size is that and literal additions.
*/
*src_copied = copied_from_source;
*literal_added = added_literal;
return 0;
}

77
diff-delta.c

@ -20,21 +20,11 @@ @@ -20,21 +20,11 @@

#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include "delta.h"


/* block size: min = 16, max = 64k, power of 2 */
#define BLK_SIZE 16

#define MIN(a, b) ((a) < (b) ? (a) : (b))

#define GR_PRIME 0x9e370001
#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))

struct index {
const unsigned char *ptr;
unsigned int val;
struct index *next;
};

@ -42,21 +32,21 @@ static struct index ** delta_index(const unsigned char *buf, @@ -42,21 +32,21 @@ static struct index ** delta_index(const unsigned char *buf,
unsigned long bufsize,
unsigned int *hash_shift)
{
unsigned int hsize, hshift, entries, blksize, i;
unsigned long hsize;
unsigned int hshift, i;
const unsigned char *data;
struct index *entry, **hash;
void *mem;

/* determine index hash size */
entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE;
hsize = entries / 4;
for (i = 4; (1 << i) < hsize && i < 16; i++);
hsize = bufsize / 4;
for (i = 8; (1 << i) < hsize && i < 16; i++);
hsize = 1 << i;
hshift = 32 - i;
hshift = i - 8;
*hash_shift = hshift;

/* allocate lookup index */
mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry));
mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
if (!mem)
return NULL;
hash = mem;
@ -64,17 +54,12 @@ static struct index ** delta_index(const unsigned char *buf, @@ -64,17 +54,12 @@ static struct index ** delta_index(const unsigned char *buf,
memset(hash, 0, hsize * sizeof(*hash));

/* then populate it */
data = buf + entries * BLK_SIZE - BLK_SIZE;
blksize = bufsize - (data - buf);
while (data >= buf) {
unsigned int val = adler32(0, data, blksize);
i = HASH(val, hshift);
entry->ptr = data;
entry->val = val;
data = buf + bufsize - 2;
while (data > buf) {
entry->ptr = --data;
i = data[0] ^ data[1] ^ (data[2] << hshift);
entry->next = hash[i];
hash[i] = entry++;
blksize = BLK_SIZE;
data -= BLK_SIZE;
}

return hash;
@ -141,29 +126,27 @@ void *diff_delta(void *from_buf, unsigned long from_size, @@ -141,29 +126,27 @@ void *diff_delta(void *from_buf, unsigned long from_size,

while (data < top) {
unsigned int moff = 0, msize = 0;
unsigned int blksize = MIN(top - data, BLK_SIZE);
unsigned int val = adler32(0, data, blksize);
i = HASH(val, hash_shift);
for (entry = hash[i]; entry; entry = entry->next) {
const unsigned char *ref = entry->ptr;
const unsigned char *src = data;
unsigned int ref_size = ref_top - ref;
if (entry->val != val)
continue;
if (ref_size > top - src)
ref_size = top - src;
while (ref_size && *src++ == *ref) {
ref++;
ref_size--;
}
ref_size = ref - entry->ptr;
if (ref_size > msize) {
/* this is our best match so far */
moff = entry->ptr - ref_data;
msize = ref_size;
if (msize >= 0x10000) {
msize = 0x10000;
if (data + 2 < top) {
i = data[0] ^ data[1] ^ (data[2] << hash_shift);
for (entry = hash[i]; entry; entry = entry->next) {
const unsigned char *ref = entry->ptr;
const unsigned char *src = data;
unsigned int ref_size = ref_top - ref;
if (ref_size > top - src)
ref_size = top - src;
if (ref_size > 0x10000)
ref_size = 0x10000;
if (ref_size <= msize)
break;
while (ref_size && *src++ == *ref) {
ref++;
ref_size--;
}
ref_size = ref - entry->ptr;
if (msize < ref - entry->ptr) {
/* this is our best match so far */
msize = ref - entry->ptr;
moff = entry->ptr - ref_data;
}
}
}

Loading…
Cancel
Save