From a588d88aaff312f3afd5713ffcb4e4b1829fb5a6 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 28 May 2007 23:20:57 +0200 Subject: [PATCH 1/4] builtin-pack-objects: don't fail, if delta is not possible If builtin-pack-objects runs out of memory while finding the best deltas, it bails out with an error. If the delta index creation fails (because there is not enough memory), we can downgrade the error message to a warning and continue with the next object. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index e52332df99..17627b34e8 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1454,8 +1454,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, } if (!src->index) { src->index = create_delta_index(src->data, src_size); - if (!src->index) - die("out of memory"); + if (!src->index) { + static int warned = 0; + if (!warned++) + warning("suboptimal pack - out of memory"); + return 0; + } } delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); From 074b2eea296886e179ef73e1c364f370a223618a Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 28 May 2007 23:20:58 +0200 Subject: [PATCH 2/4] git-pack-objects: cache small deltas between big objects Creating deltas between big blobs is a CPU and memory intensive task. In the writing phase, all (not reused) deltas are redone. This patch adds support for caching deltas from the deltifing phase, so that that the writing phase is faster. The caching is limited to small deltas to avoid increasing memory usage very much. The implemented limit is (memory needed to create the delta)/1024. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 +++ builtin-pack-objects.c | 69 +++++++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 3d8f03dfe5..ab0f8f4865 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -567,6 +567,11 @@ pack.compression:: slowest. If not set, defaults to core.compression. If that is not set, defaults to -1. +pack.deltaCacheSize:: + The maxium memory in bytes used for caching deltas in + gitlink:git-pack-objects[1]. + A value of 0 means no limit. Defaults to 0. + pull.octopus:: The default merge strategy to use when pulling multiple branches at once. diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 17627b34e8..9f035ba8e6 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -36,6 +36,7 @@ struct object_entry { struct object_entry *delta_sibling; /* other deltified objects who * uses the same base as me */ + void *delta_data; /* cached delta (uncompressed) */ unsigned long delta_size; /* delta data size (uncompressed) */ enum object_type type; enum object_type in_pack_type; /* could be delta */ @@ -76,6 +77,9 @@ static struct progress progress_state; static int pack_compression_level = Z_DEFAULT_COMPRESSION; static int pack_compression_seen; +static unsigned long delta_cache_size = 0; +static unsigned long max_delta_cache_size = 0; + /* * The object names in objects array are hashed with this hashtable, * to help looking up the entry by object name. @@ -405,24 +409,31 @@ static unsigned long write_object(struct sha1file *f, z_stream stream; unsigned long maxsize; void *out; - buf = read_sha1_file(entry->sha1, &type, &size); - if (!buf) - die("unable to read %s", sha1_to_hex(entry->sha1)); - if (size != entry->size) - die("object %s size inconsistency (%lu vs %lu)", - sha1_to_hex(entry->sha1), size, entry->size); - if (usable_delta) { - buf = delta_against(buf, size, entry); + if (entry->delta_data && usable_delta) { + buf = entry->delta_data; size = entry->delta_size; obj_type = (allow_ofs_delta && entry->delta->offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { - /* - * recover real object type in case - * check_object() wanted to re-use a delta, - * but we couldn't since base was in previous split pack - */ - obj_type = type; + buf = read_sha1_file(entry->sha1, &type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->sha1)); + if (size != entry->size) + die("object %s size inconsistency (%lu vs %lu)", + sha1_to_hex(entry->sha1), size, entry->size); + if (usable_delta) { + buf = delta_against(buf, size, entry); + size = entry->delta_size; + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + } else { + /* + * recover real object type in case + * check_object() wanted to re-use a delta, + * but we couldn't since base was in previous split pack + */ + obj_type = type; + } } /* compress the data to store and put compressed length in datalen */ memset(&stream, 0, sizeof(stream)); @@ -1385,6 +1396,20 @@ struct unpacked { struct delta_index *index; }; +static int delta_cacheable(struct unpacked *trg, struct unpacked *src, + unsigned long src_size, unsigned long trg_size, + unsigned long delta_size) +{ + if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size) + return 0; + + /* cache delta, if objects are large enough compared to delta size */ + if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10)) + return 1; + + return 0; +} + /* * We search for deltas _backwards_ in a list sorted by type and * by size, so that we see progressively smaller and smaller files. @@ -1466,10 +1491,20 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, if (!delta_buf) return 0; + if (trg_entry->delta_data) { + delta_cache_size -= trg_entry->delta_size; + free(trg_entry->delta_data); + } + trg_entry->delta_data = 0; trg_entry->delta = src_entry; trg_entry->delta_size = delta_size; trg_entry->depth = src_entry->depth + 1; - free(delta_buf); + + if (delta_cacheable(src, trg, src_size, trg_size, delta_size)) { + trg_entry->delta_data = xrealloc(delta_buf, delta_size); + delta_cache_size += trg_entry->delta_size; + } else + free(delta_buf); return 1; } @@ -1615,6 +1650,10 @@ static int git_pack_config(const char *k, const char *v) pack_compression_seen = 1; return 0; } + if (!strcmp(k, "pack.deltacachesize")) { + max_delta_cache_size = git_config_int(k, v); + return 0; + } return git_default_config(k, v); } From e3dfddb377478dbee9c5b88636e97d62312f562d Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 28 May 2007 23:20:59 +0200 Subject: [PATCH 3/4] builtin-pack-object: cache small deltas Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/config.txt | 4 ++++ builtin-pack-objects.c | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index ab0f8f4865..6ea4f10150 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -572,6 +572,10 @@ pack.deltaCacheSize:: gitlink:git-pack-objects[1]. A value of 0 means no limit. Defaults to 0. +pack.deltaCacheLimit:: + The maxium size of a delta, that is cached in + gitlink:git-pack-objects[1]. Defaults to 1000. + pull.octopus:: The default merge strategy to use when pulling multiple branches at once. diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 9f035ba8e6..41472fcbd0 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -79,6 +79,7 @@ static int pack_compression_seen; static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 0; +static unsigned long cache_max_small_delta_size = 1000; /* * The object names in objects array are hashed with this hashtable, @@ -1403,6 +1404,9 @@ static int delta_cacheable(struct unpacked *trg, struct unpacked *src, if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size) return 0; + if (delta_size < cache_max_small_delta_size) + return 1; + /* cache delta, if objects are large enough compared to delta size */ if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10)) return 1; @@ -1654,6 +1658,10 @@ static int git_pack_config(const char *k, const char *v) max_delta_cache_size = git_config_int(k, v); return 0; } + if (!strcmp(k, "pack.deltacachelimit")) { + cache_max_small_delta_size = git_config_int(k, v); + return 0; + } return git_default_config(k, v); } From 5476a8adcc29985e5496dac7a340dfd178f43a17 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 30 May 2007 21:43:12 -0400 Subject: [PATCH 4/4] fix repack with --max-pack-size Two issues here: 1) git-repack -a --max-pack-size=10 on the GIT repo dies pretty quick. There is a lot of confusion about deltas that were suposed to be reused from another pack but that get stored undeltified due to pack limit and object size doesn't match entry->size anymore. This test is not really worth the complexity for determining when it is valid so get rid of it. 2) If pack limit is reached, the object buffer is freed, including when it comes from a cached delta data. In practice the object will be stored in a subsequent pack undeltified, but let's make sure no pointer to freed data subsists by clearing entry->delta_data. I also reorganized that code a bit to make it more readable. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 41472fcbd0..ccb25f6a9c 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -410,31 +410,24 @@ static unsigned long write_object(struct sha1file *f, z_stream stream; unsigned long maxsize; void *out; - if (entry->delta_data && usable_delta) { - buf = entry->delta_data; + if (!usable_delta) { + buf = read_sha1_file(entry->sha1, &obj_type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->sha1)); + } else if (entry->delta_data) { size = entry->delta_size; + buf = entry->delta_data; + entry->delta_data = NULL; obj_type = (allow_ofs_delta && entry->delta->offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); - if (size != entry->size) - die("object %s size inconsistency (%lu vs %lu)", - sha1_to_hex(entry->sha1), size, entry->size); - if (usable_delta) { - buf = delta_against(buf, size, entry); - size = entry->delta_size; - obj_type = (allow_ofs_delta && entry->delta->offset) ? - OBJ_OFS_DELTA : OBJ_REF_DELTA; - } else { - /* - * recover real object type in case - * check_object() wanted to re-use a delta, - * but we couldn't since base was in previous split pack - */ - obj_type = type; - } + buf = delta_against(buf, size, entry); + size = entry->delta_size; + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; } /* compress the data to store and put compressed length in datalen */ memset(&stream, 0, sizeof(stream));