diff --git a/Documentation/gitformat-pack.txt b/Documentation/gitformat-pack.txt index 9fcb29a9c8..d6ae229be5 100644 --- a/Documentation/gitformat-pack.txt +++ b/Documentation/gitformat-pack.txt @@ -396,6 +396,15 @@ CHUNK DATA: is padded at the end with between 0 and 3 NUL bytes to make the chunk size a multiple of 4 bytes. + Bitmapped Packfiles (ID: {'B', 'T', 'M', 'P'}) + Stores a table of two 4-byte unsigned integers in network order. + Each table entry corresponds to a single pack (in the order that + they appear above in the `PNAM` chunk). The values for each table + entry are as follows: + - The first bit position (in pseudo-pack order, see below) to + contain an object from that pack. + - The number of bits whose objects are selected from that pack. + OID Fanout (ID: {'O', 'I', 'D', 'F'}) The ith entry, F[i], stores the number of OIDs with first byte at most i. Thus F[255] stores the total @@ -509,6 +518,73 @@ packs arranged in MIDX order (with the preferred pack coming first). The MIDX's reverse index is stored in the optional 'RIDX' chunk within the MIDX itself. +=== `BTMP` chunk + +The Bitmapped Packfiles (`BTMP`) chunk encodes additional information +about the objects in the multi-pack index's reachability bitmap. Recall +that objects from the MIDX are arranged in "pseudo-pack" order (see +above) for reachability bitmaps. + +From the example above, suppose we have packs "a", "b", and "c", with +10, 15, and 20 objects, respectively. In pseudo-pack order, those would +be arranged as follows: + + |a,0|a,1|...|a,9|b,0|b,1|...|b,14|c,0|c,1|...|c,19| + +When working with single-pack bitmaps (or, equivalently, multi-pack +reachability bitmaps with a preferred pack), linkgit:git-pack-objects[1] +performs ``verbatim'' reuse, attempting to reuse chunks of the bitmapped +or preferred packfile instead of adding objects to the packing list. + +When a chunk of bytes is reused from an existing pack, any objects +contained therein do not need to be added to the packing list, saving +memory and CPU time. But a chunk from an existing packfile can only be +reused when the following conditions are met: + + - The chunk contains only objects which were requested by the caller + (i.e. does not contain any objects which the caller didn't ask for + explicitly or implicitly). + + - All objects stored in non-thin packs as offset- or reference-deltas + also include their base object in the resulting pack. + +The `BTMP` chunk encodes the necessary information in order to implement +multi-pack reuse over a set of packfiles as described above. +Specifically, the `BTMP` chunk encodes three pieces of information (all +32-bit unsigned integers in network byte-order) for each packfile `p` +that is stored in the MIDX, as follows: + +`bitmap_pos`:: The first bit position (in pseudo-pack order) in the + multi-pack index's reachability bitmap occupied by an object from `p`. + +`bitmap_nr`:: The number of bit positions (including the one at + `bitmap_pos`) that encode objects from that pack `p`. + +For example, the `BTMP` chunk corresponding to the above example (with +packs ``a'', ``b'', and ``c'') would look like: + +[cols="1,2,2"] +|=== +| |`bitmap_pos` |`bitmap_nr` + +|packfile ``a'' +|`0` +|`10` + +|packfile ``b'' +|`10` +|`15` + +|packfile ``c'' +|`25` +|`20` +|=== + +With this information in place, we can treat each packfile as +individually reusable in the same fashion as verbatim pack reuse is +performed on individual packs prior to the implementation of the `BTMP` +chunk. + == cruft packs The cruft packs feature offer an alternative to Git's traditional mechanism of diff --git a/midx.c b/midx.c index 8dba67ddbe..de25612b0c 100644 --- a/midx.c +++ b/midx.c @@ -33,6 +33,7 @@ #define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKID_BITMAPPEDPACKS 0x42544d50 /* "BTMP" */ #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ @@ -41,6 +42,7 @@ #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 #define PACK_EXPIRED UINT_MAX @@ -193,6 +195,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, &m->chunk_large_offsets_len); + pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, + (const unsigned char **)&m->chunk_bitmapped_packs, + &m->chunk_bitmapped_packs_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, @@ -286,6 +291,26 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t return 0; } +int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, + struct bitmapped_pack *bp, uint32_t pack_int_id) +{ + if (!m->chunk_bitmapped_packs) + return error(_("MIDX does not contain the BTMP chunk")); + + if (prepare_midx_pack(r, m, pack_int_id)) + return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); + + bp->p = m->packs[pack_int_id]; + bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs + + MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id); + bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs + + MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id + + sizeof(uint32_t)); + bp->pack_int_id = pack_int_id; + + return 0; +} + int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) { return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, @@ -468,10 +493,16 @@ static size_t write_midx_header(struct hashfile *f, return MIDX_HEADER_SIZE; } +#define BITMAP_POS_UNKNOWN (~((uint32_t)0)) + struct pack_info { uint32_t orig_pack_int_id; char *pack_name; struct packed_git *p; + + uint32_t bitmap_pos; + uint32_t bitmap_nr; + unsigned expired : 1; }; @@ -484,6 +515,7 @@ static void fill_pack_info(struct pack_info *info, info->orig_pack_int_id = orig_pack_int_id; info->pack_name = xstrdup(pack_name); info->p = p; + info->bitmap_pos = BITMAP_POS_UNKNOWN; } static int pack_info_compare(const void *_a, const void *_b) @@ -824,6 +856,26 @@ static int write_midx_pack_names(struct hashfile *f, void *data) return 0; } +static int write_midx_bitmapped_packs(struct hashfile *f, void *data) +{ + struct write_midx_context *ctx = data; + size_t i; + + for (i = 0; i < ctx->nr; i++) { + struct pack_info *pack = &ctx->info[i]; + if (pack->expired) + continue; + + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN && pack->bitmap_nr) + BUG("pack '%s' has no bitmap position, but has %d bitmapped object(s)", + pack->pack_name, pack->bitmap_nr); + + hashwrite_be32(f, pack->bitmap_pos); + hashwrite_be32(f, pack->bitmap_nr); + } + return 0; +} + static int write_midx_oid_fanout(struct hashfile *f, void *data) { @@ -991,8 +1043,19 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) QSORT(data, ctx->entries_nr, midx_pack_order_cmp); ALLOC_ARRAY(pack_order, ctx->entries_nr); - for (i = 0; i < ctx->entries_nr; i++) + for (i = 0; i < ctx->entries_nr; i++) { + struct pack_midx_entry *e = &ctx->entries[data[i].nr]; + struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) + pack->bitmap_pos = i; + pack->bitmap_nr++; pack_order[i] = data[i].nr; + } + for (i = 0; i < ctx->nr; i++) { + struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) + pack->bitmap_pos = 0; + } free(data); trace2_region_leave("midx", "midx_pack_order", the_repository); @@ -1293,6 +1356,7 @@ static int write_midx_internal(const char *object_dir, struct hashfile *f = NULL; struct lock_file lk; struct write_midx_context ctx = { 0 }; + int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; int result = 0; @@ -1505,8 +1569,10 @@ static int write_midx_internal(const char *object_dir, } for (i = 0; i < ctx.nr; i++) { - if (!ctx.info[i].expired) - pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; + if (ctx.info[i].expired) + continue; + pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; + bitmapped_packs_concat_len += 2 * sizeof(uint32_t); } /* Check that the preferred pack wasn't expired (if given). */ @@ -1566,6 +1632,9 @@ static int write_midx_internal(const char *object_dir, add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), write_midx_revindex); + add_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, + bitmapped_packs_concat_len, + write_midx_bitmapped_packs); } write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs); diff --git a/midx.h b/midx.h index a5d98919c8..b404235db5 100644 --- a/midx.h +++ b/midx.h @@ -7,6 +7,7 @@ struct object_id; struct pack_entry; struct repository; +struct bitmapped_pack; #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" #define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \ @@ -33,6 +34,8 @@ struct multi_pack_index { const unsigned char *chunk_pack_names; size_t chunk_pack_names_len; + const uint32_t *chunk_bitmapped_packs; + size_t chunk_bitmapped_packs_len; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; @@ -58,6 +61,8 @@ void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); +int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, + struct bitmapped_pack *bp, uint32_t pack_int_id); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos); uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos); diff --git a/pack-bitmap.h b/pack-bitmap.h index 5273a6a019..b68b213388 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -52,6 +52,15 @@ typedef int (*show_reachable_fn)( struct bitmap_index; +struct bitmapped_pack { + struct packed_git *p; + + uint32_t bitmap_pos; + uint32_t bitmap_nr; + + uint32_t pack_int_id; /* MIDX only */ +}; + struct bitmap_index *prepare_bitmap_git(struct repository *r); struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx); void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index e9a444ddba..e48557aba1 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -100,10 +100,36 @@ static int read_midx_preferred_pack(const char *object_dir) return 0; } +static int read_midx_bitmapped_packs(const char *object_dir) +{ + struct multi_pack_index *midx = NULL; + struct bitmapped_pack pack; + uint32_t i; + + setup_git_directory(); + + midx = load_multi_pack_index(object_dir, 1); + if (!midx) + return 1; + + for (i = 0; i < midx->num_packs; i++) { + if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0) + return 1; + + printf("%s\n", pack_basename(pack.p)); + printf(" bitmap_pos: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_pos); + printf(" bitmap_nr: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_nr); + } + + close_midx(midx); + + return 0; +} + int cmd__read_midx(int argc, const char **argv) { if (!(argc == 2 || argc == 3)) - usage("read-midx [--show-objects|--checksum|--preferred-pack] "); + usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] "); if (!strcmp(argv[1], "--show-objects")) return read_midx_file(argv[2], 1); @@ -111,5 +137,7 @@ int cmd__read_midx(int argc, const char **argv) return read_midx_checksum(argv[2]); else if (!strcmp(argv[1], "--preferred-pack")) return read_midx_preferred_pack(argv[2]); + else if (!strcmp(argv[1], "--bitmap")) + return read_midx_bitmapped_packs(argv[2]); return read_midx_file(argv[1], 0); } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c20aafe99a..dd09134db0 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1171,4 +1171,39 @@ test_expect_success 'reader notices out-of-bounds fanout' ' test_cmp expect err ' +test_expect_success 'bitmapped packs are stored via the BTMP chunk' ' + test_when_finished "rm -fr repo" && + git init repo && + ( + cd repo && + + for i in 1 2 3 4 5 + do + test_commit "$i" && + git repack -d || return 1 + done && + + find $objdir/pack -type f -name "*.idx" | xargs -n 1 basename | + sort >packs && + + git multi-pack-index write --stdin-packs err && + cat >expect <<-\EOF && + error: MIDX does not contain the BTMP chunk + EOF + test_cmp expect err && + + git multi-pack-index write --stdin-packs --bitmap \ + --preferred-pack="$(head -n1 actual && + for i in $(test_seq $(wc -l expect && + test_cmp expect actual + ) +' + test_done