Merge branch 'jk/duplicate-objects-in-packs'
A packfile that stores the same object more than once is broken and will be rejected by "git index-pack" that is run when receiving data over the wire. * jk/duplicate-objects-in-packs: t5308: check that index-pack --strict detects duplicate objects test index-pack on packs with recoverable delta cycles add tests for indexing packs with delta cycles sha1-lookup: handle duplicate keys with GIT_USE_LOOKUP test-sha1: add a binary output modemaint
commit
6c34560053
|
@ -204,7 +204,54 @@ int sha1_entry_pos(const void *table,
|
||||||
* byte 0 thru (ofs-1) are the same between
|
* byte 0 thru (ofs-1) are the same between
|
||||||
* lo and hi; ofs is the first byte that is
|
* lo and hi; ofs is the first byte that is
|
||||||
* different.
|
* different.
|
||||||
|
*
|
||||||
|
* If ofs==20, then no bytes are different,
|
||||||
|
* meaning we have entries with duplicate
|
||||||
|
* keys. We know that we are in a solid run
|
||||||
|
* of this entry (because the entries are
|
||||||
|
* sorted, and our lo and hi are the same,
|
||||||
|
* there can be nothing but this single key
|
||||||
|
* in between). So we can stop the search.
|
||||||
|
* Either one of these entries is it (and
|
||||||
|
* we do not care which), or we do not have
|
||||||
|
* it.
|
||||||
|
*
|
||||||
|
* Furthermore, we know that one of our
|
||||||
|
* endpoints must be the edge of the run of
|
||||||
|
* duplicates. For example, given this
|
||||||
|
* sequence:
|
||||||
|
*
|
||||||
|
* idx 0 1 2 3 4 5
|
||||||
|
* key A C C C C D
|
||||||
|
*
|
||||||
|
* If we are searching for "B", we might
|
||||||
|
* hit the duplicate run at lo=1, hi=3
|
||||||
|
* (e.g., by first mi=3, then mi=0). But we
|
||||||
|
* can never have lo > 1, because B < C.
|
||||||
|
* That is, if our key is less than the
|
||||||
|
* run, we know that "lo" is the edge, but
|
||||||
|
* we can say nothing of "hi". Similarly,
|
||||||
|
* if our key is greater than the run, we
|
||||||
|
* know that "hi" is the edge, but we can
|
||||||
|
* say nothing of "lo".
|
||||||
|
*
|
||||||
|
* Therefore if we do not find it, we also
|
||||||
|
* know where it would go if it did exist:
|
||||||
|
* just on the far side of the edge that we
|
||||||
|
* know about.
|
||||||
*/
|
*/
|
||||||
|
if (ofs == 20) {
|
||||||
|
mi = lo;
|
||||||
|
mi_key = base + elem_size * mi + key_offset;
|
||||||
|
cmp = memcmp(mi_key, key, 20);
|
||||||
|
if (!cmp)
|
||||||
|
return mi;
|
||||||
|
if (cmp < 0)
|
||||||
|
return -1 - hi;
|
||||||
|
else
|
||||||
|
return -1 - lo;
|
||||||
|
}
|
||||||
|
|
||||||
hiv = hi_key[ofs_0];
|
hiv = hi_key[ofs_0];
|
||||||
if (ofs_0 < 19)
|
if (ofs_0 < 19)
|
||||||
hiv = (hiv << 8) | hi_key[ofs_0+1];
|
hiv = (hiv << 8) | hi_key[ofs_0+1];
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Support routines for hand-crafting weird or malicious packs.
|
||||||
|
#
|
||||||
|
# You can make a complete pack like:
|
||||||
|
#
|
||||||
|
# pack_header 2 >foo.pack &&
|
||||||
|
# pack_obj e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 >>foo.pack &&
|
||||||
|
# pack_obj e68fe8129b546b101aee9510c5328e7f21ca1d18 >>foo.pack &&
|
||||||
|
# pack_trailer foo.pack
|
||||||
|
|
||||||
|
# Print the big-endian 4-byte octal representation of $1
|
||||||
|
uint32_octal () {
|
||||||
|
n=$1
|
||||||
|
printf '\%o' $(($n / 16777216)); n=$((n % 16777216))
|
||||||
|
printf '\%o' $(($n / 65536)); n=$((n % 65536))
|
||||||
|
printf '\%o' $(($n / 256)); n=$((n % 256))
|
||||||
|
printf '\%o' $(($n ));
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the big-endian 4-byte binary representation of $1
|
||||||
|
uint32_binary () {
|
||||||
|
printf "$(uint32_octal "$1")"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print a pack header, version 2, for a pack with $1 objects
|
||||||
|
pack_header () {
|
||||||
|
printf 'PACK' &&
|
||||||
|
printf '\0\0\0\2' &&
|
||||||
|
uint32_binary "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the pack data for object $1, as a delta against object $2 (or as a full
|
||||||
|
# object if $2 is missing or empty). The output is suitable for including
|
||||||
|
# directly in the packfile, and represents the entirety of the object entry.
|
||||||
|
# Doing this on the fly (especially picking your deltas) is quite tricky, so we
|
||||||
|
# have hardcoded some well-known objects. See the case statements below for the
|
||||||
|
# complete list.
|
||||||
|
pack_obj () {
|
||||||
|
case "$1" in
|
||||||
|
# empty blob
|
||||||
|
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
|
||||||
|
case "$2" in
|
||||||
|
'')
|
||||||
|
printf '\060\170\234\003\0\0\0\0\1'
|
||||||
|
return
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
|
||||||
|
# blob containing "\7\76"
|
||||||
|
e68fe8129b546b101aee9510c5328e7f21ca1d18)
|
||||||
|
case "$2" in
|
||||||
|
'')
|
||||||
|
printf '\062\170\234\143\267\3\0\0\116\0\106'
|
||||||
|
return
|
||||||
|
;;
|
||||||
|
01d7713666f4de822776c7622c10f1b07de280dc)
|
||||||
|
printf '\165\1\327\161\66\146\364\336\202\47\166' &&
|
||||||
|
printf '\307\142\54\20\361\260\175\342\200\334\170' &&
|
||||||
|
printf '\234\143\142\142\142\267\003\0\0\151\0\114'
|
||||||
|
return
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
|
||||||
|
# blob containing "\7\0"
|
||||||
|
01d7713666f4de822776c7622c10f1b07de280dc)
|
||||||
|
case "$2" in
|
||||||
|
'')
|
||||||
|
printf '\062\170\234\143\147\0\0\0\20\0\10'
|
||||||
|
return
|
||||||
|
;;
|
||||||
|
e68fe8129b546b101aee9510c5328e7f21ca1d18)
|
||||||
|
printf '\165\346\217\350\22\233\124\153\20\32\356' &&
|
||||||
|
printf '\225\20\305\62\216\177\41\312\35\30\170\234' &&
|
||||||
|
printf '\143\142\142\142\147\0\0\0\53\0\16'
|
||||||
|
return
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute and append pack trailer to "$1"
|
||||||
|
pack_trailer () {
|
||||||
|
test-sha1 -b <"$1" >trailer.tmp &&
|
||||||
|
cat trailer.tmp >>"$1" &&
|
||||||
|
rm -f trailer.tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove any existing packs to make sure that
|
||||||
|
# whatever we index next will be the pack that we
|
||||||
|
# actually use.
|
||||||
|
clear_packs () {
|
||||||
|
rm -f .git/objects/pack/*
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='handling of duplicate objects in incoming packfiles'
|
||||||
|
. ./test-lib.sh
|
||||||
|
. "$TEST_DIRECTORY"/lib-pack.sh
|
||||||
|
|
||||||
|
# The sha1s we have in our pack. It's important that these have the same
|
||||||
|
# starting byte, so that they end up in the same fanout section of the index.
|
||||||
|
# That lets us make sure we are exercising the binary search with both sets.
|
||||||
|
LO_SHA1=e68fe8129b546b101aee9510c5328e7f21ca1d18
|
||||||
|
HI_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
|
||||||
|
|
||||||
|
# And here's a "missing sha1" which will produce failed lookups. It must also
|
||||||
|
# be in the same fanout section, and should be between the two (so that during
|
||||||
|
# our binary search, we are sure to end up looking at one or the other of the
|
||||||
|
# duplicate runs).
|
||||||
|
MISSING_SHA1='e69d000000000000000000000000000000000000'
|
||||||
|
|
||||||
|
# git will never intentionally create packfiles with
|
||||||
|
# duplicate objects, so we have to construct them by hand.
|
||||||
|
#
|
||||||
|
# $1 is the name of the packfile to create
|
||||||
|
#
|
||||||
|
# $2 is the number of times to duplicate each object
|
||||||
|
create_pack () {
|
||||||
|
pack_header "$((2 * $2))" >"$1" &&
|
||||||
|
for i in $(test_seq 1 "$2"); do
|
||||||
|
pack_obj $LO_SHA1 &&
|
||||||
|
pack_obj $HI_SHA1
|
||||||
|
done >>"$1" &&
|
||||||
|
pack_trailer "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# double-check that create_pack actually works
|
||||||
|
test_expect_success 'pack with no duplicates' '
|
||||||
|
create_pack no-dups.pack 1 &&
|
||||||
|
git index-pack --stdin <no-dups.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'index-pack will allow duplicate objects by default' '
|
||||||
|
clear_packs &&
|
||||||
|
create_pack dups.pack 100 &&
|
||||||
|
git index-pack --stdin <dups.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'create batch-check test vectors' '
|
||||||
|
cat >input <<-EOF &&
|
||||||
|
$LO_SHA1
|
||||||
|
$HI_SHA1
|
||||||
|
$MISSING_SHA1
|
||||||
|
EOF
|
||||||
|
cat >expect <<-EOF
|
||||||
|
$LO_SHA1 blob 2
|
||||||
|
$HI_SHA1 blob 0
|
||||||
|
$MISSING_SHA1 missing
|
||||||
|
EOF
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'lookup in duplicated pack (binary search)' '
|
||||||
|
git cat-file --batch-check <input >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'lookup in duplicated pack (GIT_USE_LOOKUP)' '
|
||||||
|
(
|
||||||
|
GIT_USE_LOOKUP=1 &&
|
||||||
|
export GIT_USE_LOOKUP &&
|
||||||
|
git cat-file --batch-check <input >actual
|
||||||
|
) &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'index-pack can reject packs with duplicates' '
|
||||||
|
clear_packs &&
|
||||||
|
create_pack dups.pack 2 &&
|
||||||
|
test_must_fail git index-pack --strict --stdin <dups.pack &&
|
||||||
|
test_expect_code 1 git cat-file -e $LO_SHA1
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='test index-pack handling of delta cycles in packfiles'
|
||||||
|
. ./test-lib.sh
|
||||||
|
. "$TEST_DIRECTORY"/lib-pack.sh
|
||||||
|
|
||||||
|
# Two similar-ish objects that we have computed deltas between.
|
||||||
|
A=01d7713666f4de822776c7622c10f1b07de280dc
|
||||||
|
B=e68fe8129b546b101aee9510c5328e7f21ca1d18
|
||||||
|
|
||||||
|
# double-check our hand-constucted packs
|
||||||
|
test_expect_success 'index-pack works with a single delta (A->B)' '
|
||||||
|
clear_packs &&
|
||||||
|
{
|
||||||
|
pack_header 2 &&
|
||||||
|
pack_obj $A $B &&
|
||||||
|
pack_obj $B
|
||||||
|
} >ab.pack &&
|
||||||
|
pack_trailer ab.pack &&
|
||||||
|
git index-pack --stdin <ab.pack &&
|
||||||
|
git cat-file -t $A &&
|
||||||
|
git cat-file -t $B
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'index-pack works with a single delta (B->A)' '
|
||||||
|
clear_packs &&
|
||||||
|
{
|
||||||
|
pack_header 2 &&
|
||||||
|
pack_obj $A &&
|
||||||
|
pack_obj $B $A
|
||||||
|
} >ba.pack &&
|
||||||
|
pack_trailer ba.pack &&
|
||||||
|
git index-pack --stdin <ba.pack &&
|
||||||
|
git cat-file -t $A &&
|
||||||
|
git cat-file -t $B
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'index-pack detects missing base objects' '
|
||||||
|
clear_packs &&
|
||||||
|
{
|
||||||
|
pack_header 1 &&
|
||||||
|
pack_obj $A $B
|
||||||
|
} >missing.pack &&
|
||||||
|
pack_trailer missing.pack &&
|
||||||
|
test_must_fail git index-pack --fix-thin --stdin <missing.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'index-pack detects REF_DELTA cycles' '
|
||||||
|
clear_packs &&
|
||||||
|
{
|
||||||
|
pack_header 2 &&
|
||||||
|
pack_obj $A $B &&
|
||||||
|
pack_obj $B $A
|
||||||
|
} >cycle.pack &&
|
||||||
|
pack_trailer cycle.pack &&
|
||||||
|
test_must_fail git index-pack --fix-thin --stdin <cycle.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_failure 'failover to an object in another pack' '
|
||||||
|
clear_packs &&
|
||||||
|
git index-pack --stdin <ab.pack &&
|
||||||
|
git index-pack --stdin --fix-thin <cycle.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_failure 'failover to a duplicate object in the same pack' '
|
||||||
|
clear_packs &&
|
||||||
|
{
|
||||||
|
pack_header 3 &&
|
||||||
|
pack_obj $A $B &&
|
||||||
|
pack_obj $B $A &&
|
||||||
|
pack_obj $A
|
||||||
|
} >recoverable.pack &&
|
||||||
|
pack_trailer recoverable.pack &&
|
||||||
|
git index-pack --fix-thin --stdin <recoverable.pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
15
test-sha1.c
15
test-sha1.c
|
@ -5,10 +5,15 @@ int main(int ac, char **av)
|
||||||
git_SHA_CTX ctx;
|
git_SHA_CTX ctx;
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
unsigned bufsz = 8192;
|
unsigned bufsz = 8192;
|
||||||
|
int binary = 0;
|
||||||
char *buffer;
|
char *buffer;
|
||||||
|
|
||||||
if (ac == 2)
|
if (ac == 2) {
|
||||||
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
|
if (!strcmp(av[1], "-b"))
|
||||||
|
binary = 1;
|
||||||
|
else
|
||||||
|
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
if (!bufsz)
|
if (!bufsz)
|
||||||
bufsz = 8192;
|
bufsz = 8192;
|
||||||
|
@ -42,6 +47,10 @@ int main(int ac, char **av)
|
||||||
git_SHA1_Update(&ctx, buffer, this_sz);
|
git_SHA1_Update(&ctx, buffer, this_sz);
|
||||||
}
|
}
|
||||||
git_SHA1_Final(sha1, &ctx);
|
git_SHA1_Final(sha1, &ctx);
|
||||||
puts(sha1_to_hex(sha1));
|
|
||||||
|
if (binary)
|
||||||
|
fwrite(sha1, 1, 20, stdout);
|
||||||
|
else
|
||||||
|
puts(sha1_to_hex(sha1));
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue