Merge branch 'np/index-pack'

* np/index-pack:
  index-pack: don't leak leaf delta result
  improve index-pack tests
  fix multiple issues in index-pack
  index-pack: smarter memory usage during delta resolution
  index-pack: rationalize delta resolution code
maint
Junio C Hamano 2008-11-02 16:36:37 -08:00
commit 275ee50c81
2 changed files with 146 additions and 109 deletions

View File

@ -221,17 +221,23 @@ static void bad_object(unsigned long offset, const char *format, ...)
die("pack has bad object at offset %lu: %s", offset, buf); die("pack has bad object at offset %lu: %s", offset, buf);
} }


static void free_base_data(struct base_data *c)
{
if (c->data) {
free(c->data);
c->data = NULL;
base_cache_used -= c->size;
}
}

static void prune_base_data(struct base_data *retain) static void prune_base_data(struct base_data *retain)
{ {
struct base_data *b = base_cache; struct base_data *b = base_cache;
for (b = base_cache; for (b = base_cache;
base_cache_used > delta_base_cache_limit && b; base_cache_used > delta_base_cache_limit && b;
b = b->child) { b = b->child) {
if (b->data && b != retain) { if (b->data && b != retain)
free(b->data); free_base_data(b);
b->data = NULL;
base_cache_used -= b->size;
}
} }
} }


@ -244,7 +250,8 @@ static void link_base_data(struct base_data *base, struct base_data *c)


c->base = base; c->base = base;
c->child = NULL; c->child = NULL;
base_cache_used += c->size; if (c->data)
base_cache_used += c->size;
prune_base_data(c); prune_base_data(c);
} }


@ -255,10 +262,7 @@ static void unlink_base_data(struct base_data *c)
base->child = NULL; base->child = NULL;
else else
base_cache = NULL; base_cache = NULL;
if (c->data) { free_base_data(c);
free(c->data);
base_cache_used -= c->size;
}
} }


static void *unpack_entry_data(unsigned long offset, unsigned long size) static void *unpack_entry_data(unsigned long offset, unsigned long size)
@ -408,22 +412,24 @@ static int find_delta(const union delta_base *base)
return -first-1; return -first-1;
} }


static int find_delta_children(const union delta_base *base, static void find_delta_children(const union delta_base *base,
int *first_index, int *last_index) int *first_index, int *last_index)
{ {
int first = find_delta(base); int first = find_delta(base);
int last = first; int last = first;
int end = nr_deltas - 1; int end = nr_deltas - 1;


if (first < 0) if (first < 0) {
return -1; *first_index = 0;
*last_index = -1;
return;
}
while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
--first; --first;
while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
++last; ++last;
*first_index = first; *first_index = first;
*last_index = last; *last_index = last;
return 0;
} }


static void sha1_object(const void *data, unsigned long size, static void sha1_object(const void *data, unsigned long size,
@ -494,8 +500,10 @@ static void *get_base_data(struct base_data *c)
free(raw); free(raw);
if (!c->data) if (!c->data)
bad_object(obj->idx.offset, "failed to apply delta"); bad_object(obj->idx.offset, "failed to apply delta");
} else } else {
c->data = get_data_from_pack(obj); c->data = get_data_from_pack(obj);
c->size = obj->size;
}


base_cache_used += c->size; base_cache_used += c->size;
prune_base_data(c); prune_base_data(c);
@ -504,49 +512,74 @@ static void *get_base_data(struct base_data *c)
} }


static void resolve_delta(struct object_entry *delta_obj, static void resolve_delta(struct object_entry *delta_obj,
struct base_data *base_obj, enum object_type type) struct base_data *base, struct base_data *result)
{ {
void *delta_data; void *base_data, *delta_data;
unsigned long delta_size;
union delta_base delta_base;
int j, first, last;
struct base_data result;


delta_obj->real_type = type; delta_obj->real_type = base->obj->real_type;
delta_data = get_data_from_pack(delta_obj); delta_data = get_data_from_pack(delta_obj);
delta_size = delta_obj->size; base_data = get_base_data(base);
result.data = patch_delta(get_base_data(base_obj), base_obj->size, result->obj = delta_obj;
delta_data, delta_size, result->data = patch_delta(base_data, base->size,
&result.size); delta_data, delta_obj->size, &result->size);
free(delta_data); free(delta_data);
if (!result.data) if (!result->data)
bad_object(delta_obj->idx.offset, "failed to apply delta"); bad_object(delta_obj->idx.offset, "failed to apply delta");
sha1_object(result.data, result.size, type, delta_obj->idx.sha1); sha1_object(result->data, result->size, delta_obj->real_type,
delta_obj->idx.sha1);
nr_resolved_deltas++; nr_resolved_deltas++;
}


result.obj = delta_obj; static void find_unresolved_deltas(struct base_data *base,
link_base_data(base_obj, &result); struct base_data *prev_base)
{
int i, ref_first, ref_last, ofs_first, ofs_last;


hashcpy(delta_base.sha1, delta_obj->idx.sha1); /*
if (!find_delta_children(&delta_base, &first, &last)) { * This is a recursive function. Those brackets should help reducing
for (j = first; j <= last; j++) { * stack usage by limiting the scope of the delta_base union.
struct object_entry *child = objects + deltas[j].obj_no; */
if (child->real_type == OBJ_REF_DELTA) {
resolve_delta(child, &result, type); union delta_base base_spec;

hashcpy(base_spec.sha1, base->obj->idx.sha1);
find_delta_children(&base_spec, &ref_first, &ref_last);

memset(&base_spec, 0, sizeof(base_spec));
base_spec.offset = base->obj->idx.offset;
find_delta_children(&base_spec, &ofs_first, &ofs_last);
}

if (ref_last == -1 && ofs_last == -1) {
free(base->data);
return;
}

link_base_data(prev_base, base);

for (i = ref_first; i <= ref_last; i++) {
struct object_entry *child = objects + deltas[i].obj_no;
if (child->real_type == OBJ_REF_DELTA) {
struct base_data result;
resolve_delta(child, base, &result);
if (i == ref_last && ofs_last == -1)
free_base_data(base);
find_unresolved_deltas(&result, base);
} }
} }


memset(&delta_base, 0, sizeof(delta_base)); for (i = ofs_first; i <= ofs_last; i++) {
delta_base.offset = delta_obj->idx.offset; struct object_entry *child = objects + deltas[i].obj_no;
if (!find_delta_children(&delta_base, &first, &last)) { if (child->real_type == OBJ_OFS_DELTA) {
for (j = first; j <= last; j++) { struct base_data result;
struct object_entry *child = objects + deltas[j].obj_no; resolve_delta(child, base, &result);
if (child->real_type == OBJ_OFS_DELTA) if (i == ofs_last)
resolve_delta(child, &result, type); free_base_data(base);
find_unresolved_deltas(&result, base);
} }
} }


unlink_base_data(&result); unlink_base_data(base);
} }


static int compare_delta_entry(const void *a, const void *b) static int compare_delta_entry(const void *a, const void *b)
@ -622,37 +655,13 @@ static void parse_pack_objects(unsigned char *sha1)
progress = start_progress("Resolving deltas", nr_deltas); progress = start_progress("Resolving deltas", nr_deltas);
for (i = 0; i < nr_objects; i++) { for (i = 0; i < nr_objects; i++) {
struct object_entry *obj = &objects[i]; struct object_entry *obj = &objects[i];
union delta_base base;
int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
struct base_data base_obj; struct base_data base_obj;


if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
continue; continue;
hashcpy(base.sha1, obj->idx.sha1);
ref = !find_delta_children(&base, &ref_first, &ref_last);
memset(&base, 0, sizeof(base));
base.offset = obj->idx.offset;
ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
if (!ref && !ofs)
continue;
base_obj.data = get_data_from_pack(obj);
base_obj.size = obj->size;
base_obj.obj = obj; base_obj.obj = obj;
link_base_data(NULL, &base_obj); base_obj.data = NULL;

find_unresolved_deltas(&base_obj, NULL);
if (ref)
for (j = ref_first; j <= ref_last; j++) {
struct object_entry *child = objects + deltas[j].obj_no;
if (child->real_type == OBJ_REF_DELTA)
resolve_delta(child, &base_obj, obj->type);
}
if (ofs)
for (j = ofs_first; j <= ofs_last; j++) {
struct object_entry *child = objects + deltas[j].obj_no;
if (child->real_type == OBJ_OFS_DELTA)
resolve_delta(child, &base_obj, obj->type);
}
unlink_base_data(&base_obj);
display_progress(progress, nr_resolved_deltas); display_progress(progress, nr_resolved_deltas);
} }
} }
@ -745,7 +754,6 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct delta_entry *d = sorted_by_pos[i]; struct delta_entry *d = sorted_by_pos[i];
enum object_type type; enum object_type type;
int j, first, last;
struct base_data base_obj; struct base_data base_obj;


if (objects[d->obj_no].real_type != OBJ_REF_DELTA) if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
@ -759,16 +767,7 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
base_obj.obj = append_obj_to_pack(f, d->base.sha1, base_obj.obj = append_obj_to_pack(f, d->base.sha1,
base_obj.data, base_obj.size, type); base_obj.data, base_obj.size, type);
link_base_data(NULL, &base_obj); find_unresolved_deltas(&base_obj, NULL);

find_delta_children(&d->base, &first, &last);
for (j = first; j <= last; j++) {
struct object_entry *child = objects + deltas[j].obj_no;
if (child->real_type == OBJ_REF_DELTA)
resolve_delta(child, &base_obj, type);
}

unlink_base_data(&base_obj);
display_progress(progress, nr_resolved_deltas); display_progress(progress, nr_resolved_deltas);
} }
free(sorted_by_pos); free(sorted_by_pos);

View File

@ -11,13 +11,18 @@ test_expect_success \
'rm -rf .git 'rm -rf .git
git init && git init &&
i=1 && i=1 &&
while test $i -le 100 while test $i -le 100
do do
i=`printf '%03i' $i` iii=`printf '%03i' $i`
echo $i >file_$i && test-genrandom "bar" 200 > wide_delta_$iii &&
test-genrandom "$i" 8192 >>file_$i && test-genrandom "baz $iii" 50 >> wide_delta_$iii &&
git update-index --add file_$i && test-genrandom "foo"$i 100 > deep_delta_$iii &&
i=`expr $i + 1` || return 1 test-genrandom "foo"`expr $i + 1` 100 >> deep_delta_$iii &&
test-genrandom "foo"`expr $i + 2` 100 >> deep_delta_$iii &&
echo $iii >file_$iii &&
test-genrandom "$iii" 8192 >>file_$iii &&
git update-index --add file_$iii deep_delta_$iii wide_delta_$iii &&
i=`expr $i + 1` || return 1
done && done &&
{ echo 101 && test-genrandom 100 8192; } >file_101 && { echo 101 && test-genrandom 100 8192; } >file_101 &&
git update-index --add file_101 && git update-index --add file_101 &&
@ -92,6 +97,31 @@ test_expect_success \
'64-bit offsets: index-pack result should match pack-objects one' \ '64-bit offsets: index-pack result should match pack-objects one' \
'cmp "test-3-${pack3}.idx" "3.idx"' 'cmp "test-3-${pack3}.idx" "3.idx"'


# returns the object number for given object in given pack index
index_obj_nr()
{
idx_file=$1
object_sha1=$2
nr=0
git show-index < $idx_file |
while read offs sha1 extra
do
nr=$(($nr + 1))
test "$sha1" = "$object_sha1" || continue
echo "$(($nr - 1))"
break
done
}

# returns the pack offset for given object as found in given pack index
index_obj_offset()
{
idx_file=$1
object_sha1=$2
git show-index < $idx_file | grep $object_sha1 |
( read offs extra && echo "$offs" )
}

test_expect_success \ test_expect_success \
'[index v1] 1) stream pack to repository' \ '[index v1] 1) stream pack to repository' \
'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" &&
@ -102,19 +132,22 @@ test_expect_success \


test_expect_success \ test_expect_success \
'[index v1] 2) create a stealth corruption in a delta base reference' \ '[index v1] 2) create a stealth corruption in a delta base reference' \
'# this test assumes a delta smaller than 16 bytes at the end of the pack '# This test assumes file_101 is a delta smaller than 16 bytes.
git show-index <1.idx | sort -n | sed -ne \$p | ( # It should be against file_100 but we substitute its base for file_099
read delta_offs delta_sha1 && sha1_101=`git hash-object file_101` &&
git cat-file blob "$delta_sha1" > blob_1 && sha1_099=`git hash-object file_099` &&
chmod +w ".git/objects/pack/pack-${pack1}.pack" && offs_101=`index_obj_offset 1.idx $sha1_101` &&
dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ nr_099=`index_obj_nr 1.idx $sha1_099` &&
if=".git/objects/pack/pack-${pack1}.idx" skip=$((256 * 4 + 4)) \ chmod +w ".git/objects/pack/pack-${pack1}.pack" &&
bs=1 count=20 conv=notrunc && dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \
git cat-file blob "$delta_sha1" > blob_2 )' if=".git/objects/pack/pack-${pack1}.idx" \
skip=$((4 + 256 * 4 + $nr_099 * 24)) \
bs=1 count=20 conv=notrunc &&
git cat-file blob $sha1_101 > file_101_foo1'


test_expect_success \ test_expect_success \
'[index v1] 3) corrupted delta happily returned wrong data' \ '[index v1] 3) corrupted delta happily returned wrong data' \
'! cmp blob_1 blob_2' 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1'


test_expect_success \ test_expect_success \
'[index v1] 4) confirm that the pack is actually corrupted' \ '[index v1] 4) confirm that the pack is actually corrupted' \
@ -140,19 +173,22 @@ test_expect_success \


test_expect_success \ test_expect_success \
'[index v2] 2) create a stealth corruption in a delta base reference' \ '[index v2] 2) create a stealth corruption in a delta base reference' \
'# this test assumes a delta smaller than 16 bytes at the end of the pack '# This test assumes file_101 is a delta smaller than 16 bytes.
git show-index <1.idx | sort -n | sed -ne \$p | ( # It should be against file_100 but we substitute its base for file_099
read delta_offs delta_sha1 delta_crc && sha1_101=`git hash-object file_101` &&
git cat-file blob "$delta_sha1" > blob_3 && sha1_099=`git hash-object file_099` &&
chmod +w ".git/objects/pack/pack-${pack1}.pack" && offs_101=`index_obj_offset 1.idx $sha1_101` &&
dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ nr_099=`index_obj_nr 1.idx $sha1_099` &&
if=".git/objects/pack/pack-${pack1}.idx" skip=$((8 + 256 * 4)) \ chmod +w ".git/objects/pack/pack-${pack1}.pack" &&
bs=1 count=20 conv=notrunc && dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \
git cat-file blob "$delta_sha1" > blob_4 )' if=".git/objects/pack/pack-${pack1}.idx" \
skip=$((8 + 256 * 4 + $nr_099 * 20)) \
bs=1 count=20 conv=notrunc &&
git cat-file blob $sha1_101 > file_101_foo2'


test_expect_success \ test_expect_success \
'[index v2] 3) corrupted delta happily returned wrong data' \ '[index v2] 3) corrupted delta happily returned wrong data' \
'! cmp blob_3 blob_4' 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2'


test_expect_success \ test_expect_success \
'[index v2] 4) confirm that the pack is actually corrupted' \ '[index v2] 4) confirm that the pack is actually corrupted' \
@ -167,9 +203,11 @@ test_expect_success \
'rm -f .git/objects/pack/* && 'rm -f .git/objects/pack/* &&
git index-pack --index-version=2 --stdin < "test-1-${pack1}.pack" && git index-pack --index-version=2 --stdin < "test-1-${pack1}.pack" &&
git verify-pack ".git/objects/pack/pack-${pack1}.pack" && git verify-pack ".git/objects/pack/pack-${pack1}.pack" &&
obj=`git hash-object file_001` &&
nr=`index_obj_nr ".git/objects/pack/pack-${pack1}.idx" $obj` &&
chmod +w ".git/objects/pack/pack-${pack1}.idx" && chmod +w ".git/objects/pack/pack-${pack1}.idx" &&
dd if=/dev/zero of=".git/objects/pack/pack-${pack1}.idx" conv=notrunc \ dd if=/dev/zero of=".git/objects/pack/pack-${pack1}.idx" conv=notrunc \
bs=1 count=4 seek=$((8 + 256 * 4 + `wc -l <obj-list` * 20 + 0)) && bs=1 count=4 seek=$((8 + 256 * 4 + `wc -l <obj-list` * 20 + $nr * 4)) &&
( while read obj ( while read obj
do git cat-file -p $obj >/dev/null || exit 1 do git cat-file -p $obj >/dev/null || exit 1
done <obj-list ) && done <obj-list ) &&