From fa736f72b0840950e32dfd8bc470e91831107b10 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 May 2007 12:31:28 -0400 Subject: [PATCH 1/3] allow for undeltified objects not to be reused Currently non deltified object data is always reused when possible. This means that any change to core.compression has no effect on those objects as they don't get recompressed when repacking them. Let's add a --no-reuse-object flag to git-repack in order to force recompression of all objects when desired. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 7 +++++++ builtin-pack-objects.c | 22 ++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index bd3ee456e3..ce892147dd 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -127,6 +127,13 @@ base-name:: This flag tells the command not to reuse existing deltas but compute them from scratch. +--no-reuse-object:: + This flag tells the command not to reuse existing object data at all, + including non deltified object, forcing recompression of everything. + This implies --no-reuse-delta. Useful only in the obscur case where + wholesale enforcement of a different compression level on the + packed data is desired. + --delta-base-offset:: A packed archive can express base object of a delta as either 20-byte object name or as an offset in the diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 966f843e43..d94c79a339 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -17,9 +17,9 @@ static const char pack_usage[] = "\ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--local] [--incremental] [--window=N] [--depth=N] \n\ - [--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\ - [--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\ - [type; - if (! entry->in_pack) + if (no_reuse_object) + to_reuse = 0; /* explicit */ + else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) to_reuse = 1; /* check_object() decided it for us */ @@ -425,7 +427,7 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!entry->in_pack && !entry->delta) { + if (!no_reuse_object && !entry->in_pack && !entry->delta) { unsigned char *map; unsigned long mapsize; map = map_sha1_file(entry->sha1, &mapsize); @@ -1125,8 +1127,8 @@ static void check_object(struct object_entry *entry) buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); /* - * We want in_pack_type even if we do not reuse delta. - * There is no point not reusing non-delta representations. + * We want in_pack_type even if we do not reuse delta + * since non-delta representations could still be reused. */ used = unpack_object_header_gently(buf, avail, &entry->in_pack_type, @@ -1655,6 +1657,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) no_reuse_delta = 1; continue; } + if (!strcmp("--no-reuse-object", arg)) { + no_reuse_object = no_reuse_delta = 1; + continue; + } if (!strcmp("--delta-base-offset", arg)) { allow_ofs_delta = 1; continue; From 479b56ba50144b30f28c5b225d412125c07def9f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 May 2007 12:59:40 -0400 Subject: [PATCH 2/3] make "repack -f" imply "pack-objects --no-reuse-object" Recomputing delta is much more expensive than recompressing anyway, and when the user says 'repack -f', it is a sign that the user is willing to spend CPU cycles. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- git-repack.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git-repack.sh b/git-repack.sh index ddfa8b44a1..8bf66a4fe8 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -8,7 +8,7 @@ SUBDIRECTORY_OK='Yes' . git-sh-setup no_update_info= all_into_one= remove_redundant= -local= quiet= no_reuse_delta= extra= +local= quiet= no_reuse= extra= while case "$#" in 0) break ;; esac do case "$1" in @@ -16,7 +16,7 @@ do -a) all_into_one=t ;; -d) remove_redundant=t ;; -q) quiet=-q ;; - -f) no_reuse_delta=--no-reuse-delta ;; + -f) no_reuse=--no-reuse-object ;; -l) local=--local ;; --window=*) extra="$extra $1" ;; --depth=*) extra="$extra $1" ;; @@ -61,7 +61,7 @@ case ",$all_into_one," in ;; esac -args="$args $local $quiet $no_reuse_delta$extra" +args="$args $local $quiet $no_reuse$extra" name=$(git-pack-objects --non-empty --all --reflog $args Date: Wed, 9 May 2007 14:42:42 -0400 Subject: [PATCH 3/3] deprecate the new loose object header format Now that we encourage and actively preserve objects in a packed form more agressively than we did at the time the new loose object format and core.legacyheaders were introduced, that extra loose object format doesn't appear to be worth it anymore. Because the packing of loose objects has to go through the delta match loop anyway, and since most of them should end up being deltified in most cases, there is really little advantage to have this parallel loose object format as the CPU savings it might provide is rather lost in the noise in the end. This patch gets rid of core.legacyheaders, preserve the legacy format as the only writable loose object format and deprecate the other one to keep things simpler. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- Documentation/config.txt | 13 -------- builtin-pack-objects.c | 69 ---------------------------------------- cache.h | 2 -- config.c | 5 --- environment.c | 1 - sha1_file.c | 47 +++++++-------------------- 6 files changed, 11 insertions(+), 126 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ea434af9db..d6d89ba463 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -209,19 +209,6 @@ core.compression:: compression, and 1..9 are various speed/size tradeoffs, 9 being slowest. -core.legacyheaders:: - A boolean which - changes the format of loose objects so that they are more - efficient to pack and to send out of the repository over git - native protocol, since v1.4.2. However, loose objects - written in the new format cannot be read by git older than - that version; people fetching from your repository using - older versions of git over dumb transports (e.g. http) - will also be affected. -+ -To let git use the new loose object format, you have to -set core.legacyheaders to false. - core.packedGitWindowSize:: Number of bytes of a pack file to map into memory in a single mapping operation. Larger window sizes may allow diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index d94c79a339..5fa98132fe 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -346,56 +346,6 @@ static void copy_pack_data(struct sha1file *f, } } -static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) -{ - z_stream stream; - unsigned char fakebuf[4096]; - int st; - - memset(&stream, 0, sizeof(stream)); - stream.next_in = data; - stream.avail_in = len; - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - inflateInit(&stream); - - while (1) { - st = inflate(&stream, Z_FINISH); - if (st == Z_STREAM_END || st == Z_OK) { - st = (stream.total_out == expect && - stream.total_in == len) ? 0 : -1; - break; - } - if (st != Z_BUF_ERROR) { - st = -1; - break; - } - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - } - inflateEnd(&stream); - return st; -} - -static int revalidate_loose_object(struct object_entry *entry, - unsigned char *map, - unsigned long mapsize) -{ - /* we already know this is a loose object with new type header. */ - enum object_type type; - unsigned long size, used; - - if (pack_to_stdout) - return 0; - - used = unpack_object_header_gently(map, mapsize, &type, &size); - if (!used) - return -1; - map += used; - mapsize -= used; - return check_loose_inflate(map, mapsize, size); -} - static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { @@ -427,25 +377,6 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!no_reuse_object && !entry->in_pack && !entry->delta) { - unsigned char *map; - unsigned long mapsize; - map = map_sha1_file(entry->sha1, &mapsize); - if (map && !legacy_loose_object(map)) { - /* We can copy straight into the pack file */ - if (revalidate_loose_object(entry, map, mapsize)) - die("corrupt loose object %s", - sha1_to_hex(entry->sha1)); - sha1write(f, map, mapsize); - munmap(map, mapsize); - written++; - reused++; - return mapsize; - } - if (map) - munmap(map, mapsize); - } - if (!to_reuse) { buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) diff --git a/cache.h b/cache.h index 8e76152645..5725bce6f4 100644 --- a/cache.h +++ b/cache.h @@ -273,7 +273,6 @@ extern void rollback_lock_file(struct lock_file *); extern int delete_ref(const char *, const unsigned char *sha1); /* Environment bits from configuration mechanism */ -extern int use_legacy_headers; extern int trust_executable_bit; extern int has_symlinks; extern int assume_unchanged; @@ -354,7 +353,6 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename); extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); extern int has_sha1_file(const unsigned char *sha1); extern void *map_sha1_file(const unsigned char *sha1, unsigned long *); -extern int legacy_loose_object(unsigned char *); extern int has_pack_file(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); diff --git a/config.c b/config.c index 70d1055679..298966f215 100644 --- a/config.c +++ b/config.c @@ -299,11 +299,6 @@ int git_default_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.legacyheaders")) { - use_legacy_headers = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "core.compression")) { int level = git_config_int(var, value); if (level == -1) diff --git a/environment.c b/environment.c index 22316597df..54e3abae98 100644 --- a/environment.c +++ b/environment.c @@ -11,7 +11,6 @@ char git_default_email[MAX_GITNAME]; char git_default_name[MAX_GITNAME]; -int use_legacy_headers = 1; int trust_executable_bit = 1; int has_symlinks = 1; int assume_unchanged; diff --git a/sha1_file.c b/sha1_file.c index 32244d704e..e71552795a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -972,7 +972,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } -int legacy_loose_object(unsigned char *map) +static int legacy_loose_object(unsigned char *map) { unsigned int word; @@ -1034,6 +1034,14 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon return inflate(stream, 0); } + + /* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we don't write it any longer. But we + * can still read it. + */ used = unpack_object_header_gently(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; @@ -1962,40 +1970,6 @@ static int write_buffer(int fd, const void *buf, size_t len) return 0; } -static int write_binary_header(unsigned char *hdr, enum object_type type, unsigned long len) -{ - int hdr_len; - unsigned char c; - - c = (type << 4) | (len & 15); - len >>= 4; - hdr_len = 1; - while (len) { - *hdr++ = c | 0x80; - hdr_len++; - c = (len & 0x7f); - len >>= 7; - } - *hdr = c; - return hdr_len; -} - -static void setup_object_header(z_stream *stream, const char *type, unsigned long len) -{ - int obj_type, hdrlen; - - if (use_legacy_headers) { - while (deflate(stream, 0) == Z_OK) - /* nothing */; - return; - } - obj_type = type_from_string(type); - hdrlen = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdrlen; - stream->next_out += hdrlen; - stream->avail_out -= hdrlen; -} - int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { @@ -2062,7 +2036,8 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha /* First header.. */ stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; - setup_object_header(&stream, type, len); + while (deflate(&stream, 0) == Z_OK) + /* nothing */; /* Then the data itself.. */ stream.next_in = buf;