@ -4,6 +4,7 @@
@@ -4,6 +4,7 @@
#include "lockfile.h"
#include "packfile.h"
#include "object-store.h"
#include "packfile.h"
#include "midx.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
packs->list[packs->nr] = add_packed_git(full_path,
full_path_len,
0);
if (!packs->list[packs->nr]) {
warning(_("failed to add packfile '%s'"),
full_path);
return;
}
if (open_pack_index(packs->list[packs->nr])) {
warning(_("failed to open pack-index '%s'"),
full_path);
close_pack(packs->list[packs->nr]);
FREE_AND_NULL(packs->list[packs->nr]);
return;
}
packs->names[packs->nr] = xstrdup(file_name);
packs->pack_name_concat_len += strlen(file_name) + 1;
packs->nr++;
@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
@@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
free(pairs);
}
struct pack_midx_entry {
struct object_id oid;
uint32_t pack_int_id;
time_t pack_mtime;
uint64_t offset;
};
static int midx_oid_compare(const void *_a, const void *_b)
{
const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
int cmp = oidcmp(&a->oid, &b->oid);
if (cmp)
return cmp;
if (a->pack_mtime > b->pack_mtime)
return -1;
else if (a->pack_mtime < b->pack_mtime)
return 1;
return a->pack_int_id - b->pack_int_id;
}
static void fill_pack_entry(uint32_t pack_int_id,
struct packed_git *p,
uint32_t cur_object,
struct pack_midx_entry *entry)
{
if (!nth_packed_object_oid(&entry->oid, p, cur_object))
die(_("failed to locate object %d in packfile"), cur_object);
entry->pack_int_id = pack_int_id;
entry->pack_mtime = p->mtime;
entry->offset = nth_packed_object_offset(p, cur_object);
}
/*
* It is possible to artificially get into a state where there are many
* duplicate copies of objects. That can create high memory pressure if
* we are to create a list of all objects before de-duplication. To reduce
* this memory pressure without a significant performance drop, automatically
* group objects by the first byte of their object id. Use the IDX fanout
* tables to group the data, copy to a local array, then sort.
*
* Copy only the de-duplicated entries (selected by most-recent modified time
* of a packfile containing the object).
*/
static struct pack_midx_entry *get_sorted_entries(struct packed_git **p,
uint32_t *perm,
uint32_t nr_packs,
uint32_t *nr_objects)
{
uint32_t cur_fanout, cur_pack, cur_object;
uint32_t alloc_fanout, alloc_objects, total_objects = 0;
struct pack_midx_entry *entries_by_fanout = NULL;
struct pack_midx_entry *deduplicated_entries = NULL;
for (cur_pack = 0; cur_pack < nr_packs; cur_pack++)
total_objects += p[cur_pack]->num_objects;
/*
* As we de-duplicate by fanout value, we expect the fanout
* slices to be evenly distributed, with some noise. Hence,
* allocate slightly more than one 256th.
*/
alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
ALLOC_ARRAY(deduplicated_entries, alloc_objects);
*nr_objects = 0;
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
uint32_t nr_fanout = 0;
for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) {
uint32_t start = 0, end;
if (cur_fanout)
start = get_pack_fanout(p[cur_pack], cur_fanout - 1);
end = get_pack_fanout(p[cur_pack], cur_fanout);
for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]);
nr_fanout++;
}
}
QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
/*
* The batch is now sorted by OID and then mtime (descending).
* Take only the first duplicate.
*/
for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid,
&entries_by_fanout[cur_object].oid))
continue;
ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
memcpy(&deduplicated_entries[*nr_objects],
&entries_by_fanout[cur_object],
sizeof(struct pack_midx_entry));
(*nr_objects)++;
}
}
free(entries_by_fanout);
return deduplicated_entries;
}
static size_t write_midx_pack_names(struct hashfile *f,
char **pack_names,
uint32_t num_packs)
@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir)
@@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir)
uint64_t written = 0;
uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
uint32_t nr_entries;
struct pack_midx_entry *entries = NULL;
midx_name = get_midx_filename(object_dir);
if (safe_create_leading_directories(midx_name)) {
@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir)
@@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir)
ALLOC_ARRAY(pack_perm, packs.nr);
sort_packs_by_name(packs.names, packs.nr, pack_perm);
entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries);
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
FREE_AND_NULL(midx_name);
@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir)
@@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir)
free(packs.list);
free(packs.names);
free(entries);
return 0;
}