From df6d61017a17efe67e4709028fea8e820b5efc5e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Sep 2006 15:05:12 -0700 Subject: [PATCH 1/3] pack-objects: re-validate data we copy from elsewhere. When reusing data from an existing pack and from a new style loose objects, we used to just copy it staight into the resulting pack. Instead make sure they are not corrupt, but do so only when we are not streaming to stdout, in which case the receiving end will do the validation either by unpacking the stream or by constructing the .idx file. Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 67 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 46f524dfc3..11cc3c89f5 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -65,6 +65,7 @@ static unsigned char pack_file_sha1[20]; static int progress = 1; static volatile sig_atomic_t progress_update; static int window = 10; +static int pack_to_stdout; /* * The object names in objects array are hashed with this hashtable, @@ -242,6 +243,58 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } +static int revalidate_one(struct object_entry *entry, + void *data, char *type, unsigned long size) +{ + int err; + if (!data) + return -1; + if (size != entry->size) + return -1; + err = check_sha1_signature(entry->sha1, data, size, + type_names[entry->type]); + free(data); + return err; +} + +/* + * we are going to reuse the existing pack entry data. make + * sure it is not corrupt. + */ +static int revalidate_pack_entry(struct object_entry *entry) +{ + void *data; + char type[20]; + unsigned long size; + struct pack_entry e; + + if (pack_to_stdout) + return 0; + + e.p = entry->in_pack; + e.offset = entry->in_pack_offset; + + /* the caller has already called use_packed_git() for us */ + data = unpack_entry_gently(&e, type, &size); + return revalidate_one(entry, data, type, size); +} + +static int revalidate_loose_object(struct object_entry *entry, + unsigned char *map, + unsigned long mapsize) +{ + /* we already know this is a loose object with new type header. */ + void *data; + char type[20]; + unsigned long size; + + if (pack_to_stdout) + return 0; + + data = unpack_sha1_file(map, mapsize, type, &size); + return revalidate_one(entry, data, type, size); +} + static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { @@ -276,6 +329,9 @@ static unsigned long write_object(struct sha1file *f, map = map_sha1_file(entry->sha1, &mapsize); if (map && !legacy_loose_object(map)) { /* We can copy straight into the pack file */ + if (revalidate_loose_object(entry, map, mapsize)) + die("corrupt loose object %s", + sha1_to_hex(entry->sha1)); sha1write(f, map, mapsize); munmap(map, mapsize); written++; @@ -286,7 +342,7 @@ static unsigned long write_object(struct sha1file *f, munmap(map, mapsize); } - if (! to_reuse) { + if (!to_reuse) { buf = read_sha1_file(entry->sha1, type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); @@ -319,6 +375,9 @@ static unsigned long write_object(struct sha1file *f, datalen = find_packed_object_size(p, entry->in_pack_offset); buf = (char *) p->pack_base + entry->in_pack_offset; + + if (revalidate_pack_entry(entry)) + die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); unuse_packed_git(p); hdrlen = 0; /* not really */ @@ -1163,7 +1222,7 @@ static void prepare_pack(int window, int depth) find_deltas(sorted_by_type, window+1, depth); } -static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) +static int reuse_cached_pack(unsigned char *sha1) { static const char cache[] = "pack-cache/pack-%s.%s"; char *cached_pack, *cached_idx; @@ -1247,7 +1306,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { SHA_CTX ctx; char line[40 + 1 + PATH_MAX + 2]; - int depth = 10, pack_to_stdout = 0; + int depth = 10; struct object_entry **list; int num_preferred_base = 0; int i; @@ -1367,7 +1426,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (progress && (nr_objects != nr_result)) fprintf(stderr, "Result has %d objects.\n", nr_result); - if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) + if (reuse_cached_pack(object_list_sha1)) ; else { if (nr_result) From 7042dbf7a1e9137eb856b3b086a062561c50b8a3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 3 Sep 2006 14:44:46 -0700 Subject: [PATCH 2/3] pack-objects: fix thinko in revalidate code When revalidating an entry from an existing pack entry->size and entry->type are not necessarily the size of the final object when the entry is deltified, but for base objects they must match. Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 11cc3c89f5..5e42387a45 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -247,12 +247,13 @@ static int revalidate_one(struct object_entry *entry, void *data, char *type, unsigned long size) { int err; - if (!data) - return -1; - if (size != entry->size) - return -1; - err = check_sha1_signature(entry->sha1, data, size, - type_names[entry->type]); + if ((!data) || + ((entry->type != OBJ_DELTA) && + ( (size != entry->size) || + strcmp(type_names[entry->type], type)))) + err = -1; + else + err = check_sha1_signature(entry->sha1, data, size, type); free(data); return err; } From 72518e9c2623af0b5de864a7b66208ea94aacadb Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 3 Sep 2006 21:09:18 -0700 Subject: [PATCH 3/3] more lightweight revalidation while reusing deflated stream in packing When copying from an existing pack and when copying from a loose object with new style header, the code makes sure that the piece we are going to copy out inflates well and inflate() consumes the data in full while doing so. The check to see if the xdelta really apply is quite expensive as you described, because you would need to have the image of the base object which can be represented as a delta against something else. Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 81 +++++++++++++++++++++++++++--------------- cache.h | 12 +++++++ object.h | 11 ------ sha1_file.c | 2 +- 4 files changed, 65 insertions(+), 41 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 5e42387a45..149fa28397 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -243,41 +243,61 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } -static int revalidate_one(struct object_entry *entry, - void *data, char *type, unsigned long size) +static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) { - int err; - if ((!data) || - ((entry->type != OBJ_DELTA) && - ( (size != entry->size) || - strcmp(type_names[entry->type], type)))) - err = -1; - else - err = check_sha1_signature(entry->sha1, data, size, type); - free(data); - return err; + z_stream stream; + unsigned char fakebuf[4096]; + int st; + + memset(&stream, 0, sizeof(stream)); + stream.next_in = data; + stream.avail_in = len; + stream.next_out = fakebuf; + stream.avail_out = sizeof(fakebuf); + inflateInit(&stream); + + while (1) { + st = inflate(&stream, Z_FINISH); + if (st == Z_STREAM_END || st == Z_OK) { + st = (stream.total_out == expect && + stream.total_in == len) ? 0 : -1; + break; + } + if (st != Z_BUF_ERROR) { + st = -1; + break; + } + stream.next_out = fakebuf; + stream.avail_out = sizeof(fakebuf); + } + inflateEnd(&stream); + return st; } /* * we are going to reuse the existing pack entry data. make * sure it is not corrupt. */ -static int revalidate_pack_entry(struct object_entry *entry) +static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len) { - void *data; - char type[20]; - unsigned long size; - struct pack_entry e; + enum object_type type; + unsigned long size, used; if (pack_to_stdout) return 0; - e.p = entry->in_pack; - e.offset = entry->in_pack_offset; - - /* the caller has already called use_packed_git() for us */ - data = unpack_entry_gently(&e, type, &size); - return revalidate_one(entry, data, type, size); + /* the caller has already called use_packed_git() for us, + * so it is safe to access the pack data from mmapped location. + * make sure the entry inflates correctly. + */ + used = unpack_object_header_gently(data, len, &type, &size); + if (!used) + return -1; + if (type == OBJ_DELTA) + used += 20; /* skip base object name */ + data += used; + len -= used; + return check_inflate(data, len, entry->size); } static int revalidate_loose_object(struct object_entry *entry, @@ -285,15 +305,18 @@ static int revalidate_loose_object(struct object_entry *entry, unsigned long mapsize) { /* we already know this is a loose object with new type header. */ - void *data; - char type[20]; - unsigned long size; + enum object_type type; + unsigned long size, used; if (pack_to_stdout) return 0; - data = unpack_sha1_file(map, mapsize, type, &size); - return revalidate_one(entry, data, type, size); + used = unpack_object_header_gently(map, mapsize, &type, &size); + if (!used) + return -1; + map += used; + mapsize -= used; + return check_inflate(map, mapsize, size); } static unsigned long write_object(struct sha1file *f, @@ -377,7 +400,7 @@ static unsigned long write_object(struct sha1file *f, datalen = find_packed_object_size(p, entry->in_pack_offset); buf = (char *) p->pack_base + entry->in_pack_offset; - if (revalidate_pack_entry(entry)) + if (revalidate_pack_entry(entry, buf, datalen)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); unuse_packed_git(p); diff --git a/cache.h b/cache.h index 195908fc34..a53204f6d6 100644 --- a/cache.h +++ b/cache.h @@ -267,6 +267,17 @@ extern int legacy_loose_object(unsigned char *); extern int has_pack_file(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); +enum object_type { + OBJ_NONE = 0, + OBJ_COMMIT = 1, + OBJ_TREE = 2, + OBJ_BLOB = 3, + OBJ_TAG = 4, + /* 5/6 for future expansion */ + OBJ_DELTA = 7, + OBJ_BAD, +}; + /* Convert to/from hex/sha1 representation */ #define MINIMUM_ABBREV 4 #define DEFAULT_ABBREV 7 @@ -374,6 +385,7 @@ extern int num_packed_objects(const struct packed_git *p); extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*); extern int find_pack_entry_one(const unsigned char *, struct pack_entry *, struct packed_git *); extern void *unpack_entry_gently(struct pack_entry *, char *, unsigned long *); +extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern void packed_object_info_detail(struct pack_entry *, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *); /* Dumb servers support */ diff --git a/object.h b/object.h index 733faac4cc..3d4ff4611f 100644 --- a/object.h +++ b/object.h @@ -27,17 +27,6 @@ struct object_array { /* * The object type is stored in 3 bits. */ -enum object_type { - OBJ_NONE = 0, - OBJ_COMMIT = 1, - OBJ_TREE = 2, - OBJ_BLOB = 3, - OBJ_TAG = 4, - /* 5/6 for future expansion */ - OBJ_DELTA = 7, - OBJ_BAD, -}; - struct object { unsigned parsed : 1; unsigned used : 1; diff --git a/sha1_file.c b/sha1_file.c index 4ef98053f8..428d791ba8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -711,7 +711,7 @@ int legacy_loose_object(unsigned char *map) return 0; } -static unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) +unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { unsigned shift; unsigned char c;