diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 777f272668..e20851e1c9 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -43,6 +43,7 @@ struct object_entry { */ void *delta_data; /* cached delta (uncompressed) */ unsigned long delta_size; /* delta data size (uncompressed) */ + unsigned long z_delta_size; /* delta data size (compressed) */ unsigned int hash; /* name hint hash */ enum object_type type; enum object_type in_pack_type; /* could be delta */ @@ -65,7 +66,8 @@ static struct pack_idx_entry **written_list; static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; -static int no_reuse_delta, no_reuse_object, keep_unreachable, include_tag; +static int reuse_delta = 1, reuse_object = 1; +static int keep_unreachable, include_tag; static int local; static int incremental; static int allow_ofs_delta; @@ -102,24 +104,53 @@ static uint32_t written, written_delta; static uint32_t reused, reused_delta; -static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) +static void *get_delta(struct object_entry *entry) { - unsigned long othersize, delta_size; + unsigned long size, base_size, delta_size; + void *buf, *base_buf, *delta_buf; enum object_type type; - void *otherbuf = read_sha1_file(entry->delta->idx.sha1, &type, &othersize); - void *delta_buf; - if (!otherbuf) + buf = read_sha1_file(entry->idx.sha1, &type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size); + if (!base_buf) die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1)); - delta_buf = diff_delta(otherbuf, othersize, + delta_buf = diff_delta(base_buf, base_size, buf, size, &delta_size, 0); - if (!delta_buf || delta_size != entry->delta_size) + if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); - free(buf); - free(otherbuf); + free(buf); + free(base_buf); return delta_buf; } +static unsigned long do_compress(void **pptr, unsigned long size) +{ + z_stream stream; + void *in, *out; + unsigned long maxsize; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, pack_compression_level); + maxsize = deflateBound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + + free(in); + return stream.total_out; +} + /* * The per-object header is a pretty dense thing, which is * - first byte: low four bits are "size", then three bits of "type", @@ -222,42 +253,42 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry, off_t write_offset) { - unsigned long size; - enum object_type type; + unsigned long size, limit, datalen; void *buf; - unsigned char header[10]; - unsigned char dheader[10]; + unsigned char header[10], dheader[10]; unsigned hdrlen; - off_t datalen; - enum object_type obj_type; - int to_reuse = 0; - /* write limit if limited packsize and not first object */ - unsigned long limit = pack_size_limit && nr_written ? - pack_size_limit - write_offset : 0; - /* no if no delta */ - int usable_delta = !entry->delta ? 0 : - /* yes if unlimited packfile */ - !pack_size_limit ? 1 : - /* no if base written to previous pack */ - entry->delta->idx.offset == (off_t)-1 ? 0 : - /* otherwise double-check written to this - * pack, like we do below - */ - entry->delta->idx.offset ? 1 : 0; + enum object_type type; + int usable_delta, to_reuse; if (!pack_to_stdout) crc32_begin(f); - obj_type = entry->type; - if (no_reuse_object) + type = entry->type; + + /* write limit if limited packsize and not first object */ + limit = pack_size_limit && nr_written ? + pack_size_limit - write_offset : 0; + + if (!entry->delta) + usable_delta = 0; /* no delta */ + else if (!pack_size_limit) + usable_delta = 1; /* unlimited packfile */ + else if (entry->delta->idx.offset == (off_t)-1) + usable_delta = 0; /* base was written to another pack */ + else if (entry->delta->idx.offset) + usable_delta = 1; /* base already exists in this pack */ + else + usable_delta = 0; /* base could end up in another pack */ + + if (!reuse_object) to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) + else if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) /* check_object() decided it for us ... */ to_reuse = usable_delta; /* ... but pack split may override that */ - else if (obj_type != entry->in_pack_type) + else if (type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) to_reuse = 0; /* we want to pack afresh */ @@ -267,50 +298,42 @@ static unsigned long write_object(struct sha1file *f, */ if (!to_reuse) { - z_stream stream; - unsigned long maxsize; - void *out; if (!usable_delta) { - buf = read_sha1_file(entry->idx.sha1, &obj_type, &size); + buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + /* + * make sure no cached delta data remains from a + * previous attempt before a pack split occured. + */ + free(entry->delta_data); + entry->delta_data = NULL; + entry->z_delta_size = 0; } else if (entry->delta_data) { size = entry->delta_size; buf = entry->delta_data; entry->delta_data = NULL; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { - buf = read_sha1_file(entry->idx.sha1, &type, &size); - if (!buf) - die("unable to read %s", sha1_to_hex(entry->idx.sha1)); - buf = delta_against(buf, size, entry); + buf = get_delta(entry); size = entry->delta_size; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } - /* compress the data to store and put compressed length in datalen */ - memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, pack_compression_level); - maxsize = deflateBound(&stream, size); - out = xmalloc(maxsize); - /* Compress it */ - stream.next_in = buf; - stream.avail_in = size; - stream.next_out = out; - stream.avail_out = maxsize; - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); - datalen = stream.total_out; + + if (entry->z_delta_size) + datalen = entry->z_delta_size; + else + datalen = do_compress(&buf, size); /* * The object header is a byte of 'type' followed by zero or * more bytes of length. */ - hdrlen = encode_header(obj_type, size, header); + hdrlen = encode_header(type, size, header); - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { /* * Deltas with relative base contain an additional * encoding of the relative offset for the delta @@ -322,20 +345,18 @@ static unsigned long write_object(struct sha1file *f, while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { - free(out); free(buf); return 0; } sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain * an additional 20 bytes for the base sha1. */ if (limit && hdrlen + 20 + datalen + 20 >= limit) { - free(out); free(buf); return 0; } @@ -344,14 +365,12 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } else { if (limit && hdrlen + datalen + 20 >= limit) { - free(out); free(buf); return 0; } sha1write(f, header, hdrlen); } - sha1write(f, out, datalen); - free(out); + sha1write(f, buf, datalen); free(buf); } else { @@ -361,11 +380,11 @@ static unsigned long write_object(struct sha1file *f, off_t offset; if (entry->delta) { - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; reused_delta++; } - hdrlen = encode_header(obj_type, entry->size, header); + hdrlen = encode_header(type, entry->size, header); offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; @@ -374,7 +393,7 @@ static unsigned long write_object(struct sha1file *f, die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; @@ -385,7 +404,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { if (limit && hdrlen + 20 + datalen + 20 >= limit) return 0; sha1write(f, header, hdrlen); @@ -452,11 +471,10 @@ static void write_pack_file(void) struct sha1file *f; off_t offset, offset_one, last_obj_offset = 0; struct pack_header hdr; - int do_progress = progress >> pack_to_stdout; uint32_t nr_remaining = nr_result; time_t last_mtime = 0; - if (do_progress) + if (progress > pack_to_stdout) progress_state = start_progress("Writing objects", nr_result); written_list = xmalloc(nr_objects * sizeof(*written_list)); @@ -1022,7 +1040,7 @@ static void check_object(struct object_entry *entry) unuse_pack(&w_curs); return; case OBJ_REF_DELTA: - if (!no_reuse_delta && !entry->preferred_base) + if (reuse_delta && !entry->preferred_base) base_ref = use_pack(p, &w_curs, entry->in_pack_offset + used, NULL); entry->in_pack_header_size = used + 20; @@ -1045,7 +1063,7 @@ static void check_object(struct object_entry *entry) die("delta base offset out of bound for %s", sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; - if (!no_reuse_delta && !entry->preferred_base) { + if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); base_ref = nth_packed_object_sha1(p, revidx->nr); @@ -1233,7 +1251,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, * We do not bother to try a delta that we discarded * on an earlier try, but only when reusing delta data. */ - if (!no_reuse_delta && trg_entry->in_pack && + if (reuse_delta && trg_entry->in_pack && trg_entry->in_pack == src_entry->in_pack && trg_entry->in_pack_type != OBJ_REF_DELTA && trg_entry->in_pack_type != OBJ_OFS_DELTA) @@ -1441,11 +1459,34 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, best_base = other_idx; } + /* + * If we decided to cache the delta data, then it is best + * to compress it right away. First because we have to do + * it anyway, and doing it here while we're threaded will + * save a lot of time in the non threaded write phase, + * as well as allow for caching more deltas within + * the same cache size limit. + * ... + * But only if not writing to stdout, since in that case + * the network is most likely throttling writes anyway, + * and therefore it is best to go to the write phase ASAP + * instead, as we can afford spending more time compressing + * between writes at that moment. + */ + if (entry->delta_data && !pack_to_stdout) { + entry->z_delta_size = do_compress(&entry->delta_data, + entry->delta_size); + cache_lock(); + delta_cache_size -= entry->delta_size; + delta_cache_size += entry->z_delta_size; + cache_unlock(); + } + /* if we made n a delta, and if n is already at max * depth, leaving it in the window is pointless. we * should evict it first. */ - if (entry->delta && depth <= n->depth) + if (entry->delta && max_depth <= n->depth) continue; /* @@ -1688,7 +1729,7 @@ static void prepare_pack(int window, int depth) if (entry->delta) /* This happens if we decided to reuse existing - * delta from a pack. "!no_reuse_delta &&" is implied. + * delta from a pack. "reuse_delta &&" is implied. */ continue; @@ -2050,11 +2091,11 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("--no-reuse-delta", arg)) { - no_reuse_delta = 1; + reuse_delta = 0; continue; } if (!strcmp("--no-reuse-object", arg)) { - no_reuse_object = no_reuse_delta = 1; + reuse_object = reuse_delta = 0; continue; } if (!strcmp("--delta-base-offset", arg)) {