From 3613f9b4c02c3e3d35f4c6c06e40d5c176e7eb6d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:45 -0400 Subject: [PATCH 1/7] pack-objects: small cleanup Better encapsulate delta creation for writing. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 777f272668..8691c99518 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -102,21 +102,24 @@ static uint32_t written, written_delta; static uint32_t reused, reused_delta; -static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) +static void *get_delta(struct object_entry *entry) { - unsigned long othersize, delta_size; + unsigned long size, base_size, delta_size; + void *buf, *base_buf, *delta_buf; enum object_type type; - void *otherbuf = read_sha1_file(entry->delta->idx.sha1, &type, &othersize); - void *delta_buf; - if (!otherbuf) + buf = read_sha1_file(entry->idx.sha1, &type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size); + if (!base_buf) die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1)); - delta_buf = diff_delta(otherbuf, othersize, + delta_buf = diff_delta(base_buf, base_size, buf, size, &delta_size, 0); - if (!delta_buf || delta_size != entry->delta_size) + if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); - free(buf); - free(otherbuf); + free(buf); + free(base_buf); return delta_buf; } @@ -223,7 +226,6 @@ static unsigned long write_object(struct sha1file *f, off_t write_offset) { unsigned long size; - enum object_type type; void *buf; unsigned char header[10]; unsigned char dheader[10]; @@ -281,10 +283,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { - buf = read_sha1_file(entry->idx.sha1, &type, &size); - if (!buf) - die("unable to read %s", sha1_to_hex(entry->idx.sha1)); - buf = delta_against(buf, size, entry); + buf = get_delta(entry); size = entry->delta_size; obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; From a7de713089082005d14679b694fd3999978fb735 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:46 -0400 Subject: [PATCH 2/7] pack-objects: remove some double negative logic Parsing !no_reuse_delta everywhere makes my brain spend extra cycles wondering each time. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 8691c99518..afbf3ddc68 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -65,7 +65,8 @@ static struct pack_idx_entry **written_list; static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; -static int no_reuse_delta, no_reuse_object, keep_unreachable, include_tag; +static int reuse_delta = 1, reuse_object = 1; +static int keep_unreachable, include_tag; static int local; static int incremental; static int allow_ofs_delta; @@ -251,7 +252,7 @@ static unsigned long write_object(struct sha1file *f, crc32_begin(f); obj_type = entry->type; - if (no_reuse_object) + if (!reuse_object) to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -1021,7 +1022,7 @@ static void check_object(struct object_entry *entry) unuse_pack(&w_curs); return; case OBJ_REF_DELTA: - if (!no_reuse_delta && !entry->preferred_base) + if (reuse_delta && !entry->preferred_base) base_ref = use_pack(p, &w_curs, entry->in_pack_offset + used, NULL); entry->in_pack_header_size = used + 20; @@ -1044,7 +1045,7 @@ static void check_object(struct object_entry *entry) die("delta base offset out of bound for %s", sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; - if (!no_reuse_delta && !entry->preferred_base) { + if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); base_ref = nth_packed_object_sha1(p, revidx->nr); @@ -1232,7 +1233,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, * We do not bother to try a delta that we discarded * on an earlier try, but only when reusing delta data. */ - if (!no_reuse_delta && trg_entry->in_pack && + if (reuse_delta && trg_entry->in_pack && trg_entry->in_pack == src_entry->in_pack && trg_entry->in_pack_type != OBJ_REF_DELTA && trg_entry->in_pack_type != OBJ_OFS_DELTA) @@ -1687,7 +1688,7 @@ static void prepare_pack(int window, int depth) if (entry->delta) /* This happens if we decided to reuse existing - * delta from a pack. "!no_reuse_delta &&" is implied. + * delta from a pack. "reuse_delta &&" is implied. */ continue; @@ -2049,11 +2050,11 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("--no-reuse-delta", arg)) { - no_reuse_delta = 1; + reuse_delta = 0; continue; } if (!strcmp("--no-reuse-object", arg)) { - no_reuse_object = no_reuse_delta = 1; + reuse_object = reuse_delta = 0; continue; } if (!strcmp("--delta-base-offset", arg)) { From bcd7954e217994fafabe5106e56829c8a2cb7e99 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:47 -0400 Subject: [PATCH 3/7] pack-objects: simplify the condition associated with --all-progress Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index afbf3ddc68..b4a63d2468 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -452,11 +452,10 @@ static void write_pack_file(void) struct sha1file *f; off_t offset, offset_one, last_obj_offset = 0; struct pack_header hdr; - int do_progress = progress >> pack_to_stdout; uint32_t nr_remaining = nr_result; time_t last_mtime = 0; - if (do_progress) + if (progress > pack_to_stdout) progress_state = start_progress("Writing objects", nr_result); written_list = xmalloc(nr_objects * sizeof(*written_list)); From 2c5ef824637cffa574bdce7149791a9cb520595e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:48 -0400 Subject: [PATCH 4/7] pack-objects: clean up write_object() a bit ... for improved readability. No functional changes. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 64 ++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index b4a63d2468..69ac27788b 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -226,41 +226,43 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry, off_t write_offset) { - unsigned long size; + unsigned long size, limit; void *buf; - unsigned char header[10]; - unsigned char dheader[10]; + unsigned char header[10], dheader[10]; unsigned hdrlen; off_t datalen; - enum object_type obj_type; - int to_reuse = 0; - /* write limit if limited packsize and not first object */ - unsigned long limit = pack_size_limit && nr_written ? - pack_size_limit - write_offset : 0; - /* no if no delta */ - int usable_delta = !entry->delta ? 0 : - /* yes if unlimited packfile */ - !pack_size_limit ? 1 : - /* no if base written to previous pack */ - entry->delta->idx.offset == (off_t)-1 ? 0 : - /* otherwise double-check written to this - * pack, like we do below - */ - entry->delta->idx.offset ? 1 : 0; + enum object_type type; + int usable_delta, to_reuse; if (!pack_to_stdout) crc32_begin(f); - obj_type = entry->type; + type = entry->type; + + /* write limit if limited packsize and not first object */ + limit = pack_size_limit && nr_written ? + pack_size_limit - write_offset : 0; + + if (!entry->delta) + usable_delta = 0; /* no delta */ + else if (!pack_size_limit) + usable_delta = 1; /* unlimited packfile */ + else if (entry->delta->idx.offset == (off_t)-1) + usable_delta = 0; /* base was written to another pack */ + else if (entry->delta->idx.offset) + usable_delta = 1; /* base already exists in this pack */ + else + usable_delta = 0; /* base could end up in another pack */ + if (!reuse_object) to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) + else if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) /* check_object() decided it for us ... */ to_reuse = usable_delta; /* ... but pack split may override that */ - else if (obj_type != entry->in_pack_type) + else if (type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) to_reuse = 0; /* we want to pack afresh */ @@ -274,19 +276,19 @@ static unsigned long write_object(struct sha1file *f, unsigned long maxsize; void *out; if (!usable_delta) { - buf = read_sha1_file(entry->idx.sha1, &obj_type, &size); + buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->idx.sha1)); } else if (entry->delta_data) { size = entry->delta_size; buf = entry->delta_data; entry->delta_data = NULL; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { buf = get_delta(entry); size = entry->delta_size; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } /* compress the data to store and put compressed length in datalen */ @@ -308,9 +310,9 @@ static unsigned long write_object(struct sha1file *f, * The object header is a byte of 'type' followed by zero or * more bytes of length. */ - hdrlen = encode_header(obj_type, size, header); + hdrlen = encode_header(type, size, header); - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { /* * Deltas with relative base contain an additional * encoding of the relative offset for the delta @@ -329,7 +331,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain * an additional 20 bytes for the base sha1. @@ -361,11 +363,11 @@ static unsigned long write_object(struct sha1file *f, off_t offset; if (entry->delta) { - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; reused_delta++; } - hdrlen = encode_header(obj_type, entry->size, header); + hdrlen = encode_header(type, entry->size, header); offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; @@ -374,7 +376,7 @@ static unsigned long write_object(struct sha1file *f, die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; @@ -385,7 +387,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { if (limit && hdrlen + 20 + datalen + 20 >= limit) return 0; sha1write(f, header, hdrlen); From 30ebb40aa1acdccbebe3a04cfb198b2b6f033afc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:49 -0400 Subject: [PATCH 5/7] pack-objects: move compression code in a separate function A later patch will make use of that code too. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 53 +++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 69ac27788b..3395acada4 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -124,6 +124,32 @@ static void *get_delta(struct object_entry *entry) return delta_buf; } +static unsigned long do_compress(void **pptr, unsigned long size) +{ + z_stream stream; + void *in, *out; + unsigned long maxsize; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, pack_compression_level); + maxsize = deflateBound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + + free(in); + return stream.total_out; +} + /* * The per-object header is a pretty dense thing, which is * - first byte: low four bits are "size", then three bits of "type", @@ -226,11 +252,10 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry, off_t write_offset) { - unsigned long size, limit; + unsigned long size, limit, datalen; void *buf; unsigned char header[10], dheader[10]; unsigned hdrlen; - off_t datalen; enum object_type type; int usable_delta, to_reuse; @@ -272,9 +297,6 @@ static unsigned long write_object(struct sha1file *f, */ if (!to_reuse) { - z_stream stream; - unsigned long maxsize; - void *out; if (!usable_delta) { buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) @@ -291,20 +313,7 @@ static unsigned long write_object(struct sha1file *f, type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } - /* compress the data to store and put compressed length in datalen */ - memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, pack_compression_level); - maxsize = deflateBound(&stream, size); - out = xmalloc(maxsize); - /* Compress it */ - stream.next_in = buf; - stream.avail_in = size; - stream.next_out = out; - stream.avail_out = maxsize; - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); - datalen = stream.total_out; + datalen = do_compress(&buf, size); /* * The object header is a byte of 'type' followed by zero or @@ -324,7 +333,6 @@ static unsigned long write_object(struct sha1file *f, while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { - free(out); free(buf); return 0; } @@ -337,7 +345,6 @@ static unsigned long write_object(struct sha1file *f, * an additional 20 bytes for the base sha1. */ if (limit && hdrlen + 20 + datalen + 20 >= limit) { - free(out); free(buf); return 0; } @@ -346,14 +353,12 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } else { if (limit && hdrlen + datalen + 20 >= limit) { - free(out); free(buf); return 0; } sha1write(f, header, hdrlen); } - sha1write(f, out, datalen); - free(out); + sha1write(f, buf, datalen); free(buf); } else { From ed4a9031ea75858d6f8ec387993e8ff731e29d1a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:50 -0400 Subject: [PATCH 6/7] pack-objects: allow for early delta deflating When the delta data is cached in memory until it is written to a pack file on disk, it is best to compress it right away in find_deltas() for the following reasons: - we have to compress that data anyway; - this allows for caching more deltas with the same cache size limit; - compression is potentially threaded. This last point is especially relevant for SMP run time. For example, repacking the Linux repo on a quad core processor using 4 threads with all default settings produce the following results before this change: real 2m27.929s user 4m36.492s sys 0m3.091s And with this change applied: real 2m13.787s user 4m37.486s sys 0m3.159s So the actual execution time stayed more or less the same but the wall clock time is shorter. This is however not a good thing to do when generating a pack for network transmission. In that case, the network is most likely to throttle the data throughput, so it is best to make find_deltas() faster in order to start writing data ASAP since we can afford spending more time between writes to compress the data at that point. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 3395acada4..4a0c9c907b 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -43,6 +43,7 @@ struct object_entry { */ void *delta_data; /* cached delta (uncompressed) */ unsigned long delta_size; /* delta data size (uncompressed) */ + unsigned long z_delta_size; /* delta data size (compressed) */ unsigned int hash; /* name hint hash */ enum object_type type; enum object_type in_pack_type; /* could be delta */ @@ -301,6 +302,13 @@ static unsigned long write_object(struct sha1file *f, buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + /* + * make sure no cached delta data remains from a + * previous attempt before a pack split occured. + */ + free(entry->delta_data); + entry->delta_data = NULL; + entry->z_delta_size = 0; } else if (entry->delta_data) { size = entry->delta_size; buf = entry->delta_data; @@ -313,7 +321,11 @@ static unsigned long write_object(struct sha1file *f, type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } - datalen = do_compress(&buf, size); + + if (entry->z_delta_size) + datalen = entry->z_delta_size; + else + datalen = do_compress(&buf, size); /* * The object header is a byte of 'type' followed by zero or @@ -1447,6 +1459,29 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, best_base = other_idx; } + /* + * If we decided to cache the delta data, then it is best + * to compress it right away. First because we have to do + * it anyway, and doing it here while we're threaded will + * save a lot of time in the non threaded write phase, + * as well as allow for caching more deltas within + * the same cache size limit. + * ... + * But only if not writing to stdout, since in that case + * the network is most likely throttling writes anyway, + * and therefore it is best to go to the write phase ASAP + * instead, as we can afford spending more time compressing + * between writes at that moment. + */ + if (entry->delta_data && !pack_to_stdout) { + entry->z_delta_size = do_compress(&entry->delta_data, + entry->delta_size); + cache_lock(); + delta_cache_size -= entry->delta_size; + delta_cache_size += entry->z_delta_size; + cache_unlock(); + } + /* if we made n a delta, and if n is already at max * depth, leaving it in the window is pointless. we * should evict it first. From 70baf5d41a933c7972375ae2583aad8c8b92633f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 May 2008 15:11:51 -0400 Subject: [PATCH 7/7] pack-objects: fix early eviction for max depth delta objects The 'depth' variable doesn't reflect the actual maximum depth used when other objects already depend on the current one. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 4a0c9c907b..e20851e1c9 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1486,7 +1486,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, * depth, leaving it in the window is pointless. we * should evict it first. */ - if (entry->delta && depth <= n->depth) + if (entry->delta && max_depth <= n->depth) continue; /*