pack-objects: better check_object() performances
With large amount of objects, check_object() is really trashing the pack sliding map and the filesystem cache. It has a completely random access pattern especially with old objects where delta replay jumps back and forth all over the pack. This patch improves things by: 1) sorting objects by their offset in pack before calling check_object() so the pack access pattern is linear; 2) recording the object type at add_object_entry() time since it is already known in most cases; 3) recording the pack offset even for preferred_base objects; 4) avoid calling sha1_object_info() if all possible. This limits pack accesses to the bare minimum and makes them perfectly linear. In the process check_object() was made more clear (to me at least). Note: I thought about walking the sorted_by_offset list backward in get_object_details() so if a pack happens to be larger than the available file cache, then the cache would have been populated with useful data from the beginning of the pack already when find_deltas() is called. Strangely, testing (on Linux) showed absolutely no performance difference. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>maint
parent
54dab52ae8
commit
5c49c11686
|
@ -813,7 +813,8 @@ static unsigned name_hash(const char *name)
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
|
static int add_object_entry(const unsigned char *sha1, enum object_type type,
|
||||||
|
unsigned hash, int exclude)
|
||||||
{
|
{
|
||||||
struct object_entry *entry;
|
struct object_entry *entry;
|
||||||
struct packed_git *p, *found_pack = NULL;
|
struct packed_git *p, *found_pack = NULL;
|
||||||
|
@ -831,19 +832,19 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!exclude) {
|
for (p = packed_git; p; p = p->next) {
|
||||||
for (p = packed_git; p; p = p->next) {
|
off_t offset = find_pack_entry_one(sha1, p);
|
||||||
off_t offset = find_pack_entry_one(sha1, p);
|
if (offset) {
|
||||||
if (offset) {
|
if (!found_pack) {
|
||||||
if (incremental)
|
found_offset = offset;
|
||||||
return 0;
|
found_pack = p;
|
||||||
if (local && !p->pack_local)
|
|
||||||
return 0;
|
|
||||||
if (!found_pack) {
|
|
||||||
found_offset = offset;
|
|
||||||
found_pack = p;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (exclude)
|
||||||
|
break;
|
||||||
|
if (incremental)
|
||||||
|
return 0;
|
||||||
|
if (local && !p->pack_local)
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -856,6 +857,8 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
|
||||||
memset(entry, 0, sizeof(*entry));
|
memset(entry, 0, sizeof(*entry));
|
||||||
hashcpy(entry->sha1, sha1);
|
hashcpy(entry->sha1, sha1);
|
||||||
entry->hash = hash;
|
entry->hash = hash;
|
||||||
|
if (type)
|
||||||
|
entry->type = type;
|
||||||
if (exclude)
|
if (exclude)
|
||||||
entry->preferred_base = 1;
|
entry->preferred_base = 1;
|
||||||
else
|
else
|
||||||
|
@ -1008,7 +1011,9 @@ static void add_pbase_object(struct tree_desc *tree,
|
||||||
return;
|
return;
|
||||||
if (name[cmplen] != '/') {
|
if (name[cmplen] != '/') {
|
||||||
unsigned hash = name_hash(fullname);
|
unsigned hash = name_hash(fullname);
|
||||||
add_object_entry(entry.sha1, hash, 1);
|
add_object_entry(entry.sha1,
|
||||||
|
S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB,
|
||||||
|
hash, 1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (S_ISDIR(entry.mode)) {
|
if (S_ISDIR(entry.mode)) {
|
||||||
|
@ -1079,7 +1084,7 @@ static void add_preferred_base_object(const char *name, unsigned hash)
|
||||||
cmplen = name_cmp_len(name);
|
cmplen = name_cmp_len(name);
|
||||||
for (it = pbase_tree; it; it = it->next) {
|
for (it = pbase_tree; it; it = it->next) {
|
||||||
if (cmplen == 0) {
|
if (cmplen == 0) {
|
||||||
add_object_entry(it->pcache.sha1, 0, 1);
|
add_object_entry(it->pcache.sha1, OBJ_TREE, 0, 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
struct tree_desc tree;
|
struct tree_desc tree;
|
||||||
|
@ -1121,87 +1126,105 @@ static void add_preferred_base(unsigned char *sha1)
|
||||||
|
|
||||||
static void check_object(struct object_entry *entry)
|
static void check_object(struct object_entry *entry)
|
||||||
{
|
{
|
||||||
if (entry->in_pack && !entry->preferred_base) {
|
if (entry->in_pack) {
|
||||||
struct packed_git *p = entry->in_pack;
|
struct packed_git *p = entry->in_pack;
|
||||||
struct pack_window *w_curs = NULL;
|
struct pack_window *w_curs = NULL;
|
||||||
unsigned long size, used;
|
const unsigned char *base_ref = NULL;
|
||||||
|
struct object_entry *base_entry;
|
||||||
|
unsigned long used, used_0;
|
||||||
unsigned int avail;
|
unsigned int avail;
|
||||||
unsigned char *buf;
|
off_t ofs;
|
||||||
struct object_entry *base_entry = NULL;
|
unsigned char *buf, c;
|
||||||
|
|
||||||
buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
|
buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
|
||||||
|
|
||||||
/* We want in_pack_type even if we do not reuse delta.
|
/*
|
||||||
|
* We want in_pack_type even if we do not reuse delta.
|
||||||
* There is no point not reusing non-delta representations.
|
* There is no point not reusing non-delta representations.
|
||||||
*/
|
*/
|
||||||
used = unpack_object_header_gently(buf, avail,
|
used = unpack_object_header_gently(buf, avail,
|
||||||
&entry->in_pack_type, &size);
|
&entry->in_pack_type,
|
||||||
|
&entry->size);
|
||||||
|
|
||||||
/* Check if it is delta, and the base is also an object
|
/*
|
||||||
* we are going to pack. If so we will reuse the existing
|
* Determine if this is a delta and if so whether we can
|
||||||
* delta.
|
* reuse it or not. Otherwise let's find out as cheaply as
|
||||||
|
* possible what the actual type and size for this object is.
|
||||||
*/
|
*/
|
||||||
if (!no_reuse_delta) {
|
switch (entry->in_pack_type) {
|
||||||
unsigned char c;
|
default:
|
||||||
const unsigned char *base_name;
|
/* Not a delta hence we've already got all we need. */
|
||||||
off_t ofs;
|
|
||||||
unsigned long used_0;
|
|
||||||
/* there is at least 20 bytes left in the pack */
|
|
||||||
switch (entry->in_pack_type) {
|
|
||||||
case OBJ_REF_DELTA:
|
|
||||||
base_name = use_pack(p, &w_curs,
|
|
||||||
entry->in_pack_offset + used, NULL);
|
|
||||||
used += 20;
|
|
||||||
break;
|
|
||||||
case OBJ_OFS_DELTA:
|
|
||||||
buf = use_pack(p, &w_curs,
|
|
||||||
entry->in_pack_offset + used, NULL);
|
|
||||||
used_0 = 0;
|
|
||||||
c = buf[used_0++];
|
|
||||||
ofs = c & 127;
|
|
||||||
while (c & 128) {
|
|
||||||
ofs += 1;
|
|
||||||
if (!ofs || MSB(ofs, 7))
|
|
||||||
die("delta base offset overflow in pack for %s",
|
|
||||||
sha1_to_hex(entry->sha1));
|
|
||||||
c = buf[used_0++];
|
|
||||||
ofs = (ofs << 7) + (c & 127);
|
|
||||||
}
|
|
||||||
if (ofs >= entry->in_pack_offset)
|
|
||||||
die("delta base offset out of bound for %s",
|
|
||||||
sha1_to_hex(entry->sha1));
|
|
||||||
ofs = entry->in_pack_offset - ofs;
|
|
||||||
base_name = find_packed_object_name(p, ofs);
|
|
||||||
used += used_0;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
base_name = NULL;
|
|
||||||
}
|
|
||||||
if (base_name)
|
|
||||||
base_entry = locate_object_entry(base_name);
|
|
||||||
}
|
|
||||||
unuse_pack(&w_curs);
|
|
||||||
entry->in_pack_header_size = used;
|
|
||||||
|
|
||||||
if (base_entry) {
|
|
||||||
|
|
||||||
/* Depth value does not matter - find_deltas()
|
|
||||||
* will never consider reused delta as the
|
|
||||||
* base object to deltify other objects
|
|
||||||
* against, in order to avoid circular deltas.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* uncompressed size of the delta data */
|
|
||||||
entry->size = size;
|
|
||||||
entry->delta = base_entry;
|
|
||||||
entry->type = entry->in_pack_type;
|
entry->type = entry->in_pack_type;
|
||||||
|
entry->in_pack_header_size = used;
|
||||||
|
unuse_pack(&w_curs);
|
||||||
|
return;
|
||||||
|
case OBJ_REF_DELTA:
|
||||||
|
if (!no_reuse_delta && !entry->preferred_base)
|
||||||
|
base_ref = use_pack(p, &w_curs,
|
||||||
|
entry->in_pack_offset + used, NULL);
|
||||||
|
entry->in_pack_header_size = used + 20;
|
||||||
|
break;
|
||||||
|
case OBJ_OFS_DELTA:
|
||||||
|
buf = use_pack(p, &w_curs,
|
||||||
|
entry->in_pack_offset + used, NULL);
|
||||||
|
used_0 = 0;
|
||||||
|
c = buf[used_0++];
|
||||||
|
ofs = c & 127;
|
||||||
|
while (c & 128) {
|
||||||
|
ofs += 1;
|
||||||
|
if (!ofs || MSB(ofs, 7))
|
||||||
|
die("delta base offset overflow in pack for %s",
|
||||||
|
sha1_to_hex(entry->sha1));
|
||||||
|
c = buf[used_0++];
|
||||||
|
ofs = (ofs << 7) + (c & 127);
|
||||||
|
}
|
||||||
|
if (ofs >= entry->in_pack_offset)
|
||||||
|
die("delta base offset out of bound for %s",
|
||||||
|
sha1_to_hex(entry->sha1));
|
||||||
|
ofs = entry->in_pack_offset - ofs;
|
||||||
|
if (!no_reuse_delta && !entry->preferred_base)
|
||||||
|
base_ref = find_packed_object_name(p, ofs);
|
||||||
|
entry->in_pack_header_size = used + used_0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (base_ref && (base_entry = locate_object_entry(base_ref))) {
|
||||||
|
/*
|
||||||
|
* If base_ref was set above that means we wish to
|
||||||
|
* reuse delta data, and we even found that base
|
||||||
|
* in the list of objects we want to pack. Goodie!
|
||||||
|
*
|
||||||
|
* Depth value does not matter - find_deltas() will
|
||||||
|
* never consider reused delta as the base object to
|
||||||
|
* deltify other objects against, in order to avoid
|
||||||
|
* circular deltas.
|
||||||
|
*/
|
||||||
|
entry->type = entry->in_pack_type;
|
||||||
|
entry->delta = base_entry;
|
||||||
entry->delta_sibling = base_entry->delta_child;
|
entry->delta_sibling = base_entry->delta_child;
|
||||||
base_entry->delta_child = entry;
|
base_entry->delta_child = entry;
|
||||||
|
unuse_pack(&w_curs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* Otherwise we would do the usual */
|
|
||||||
|
if (entry->type) {
|
||||||
|
/*
|
||||||
|
* This must be a delta and we already know what the
|
||||||
|
* final object type is. Let's extract the actual
|
||||||
|
* object size from the delta header.
|
||||||
|
*/
|
||||||
|
entry->size = get_size_from_delta(p, &w_curs,
|
||||||
|
entry->in_pack_offset + entry->in_pack_header_size);
|
||||||
|
unuse_pack(&w_curs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No choice but to fall back to the recursive delta walk
|
||||||
|
* with sha1_object_info() to find about the object type
|
||||||
|
* at this point...
|
||||||
|
*/
|
||||||
|
unuse_pack(&w_curs);
|
||||||
}
|
}
|
||||||
|
|
||||||
entry->type = sha1_object_info(entry->sha1, &entry->size);
|
entry->type = sha1_object_info(entry->sha1, &entry->size);
|
||||||
|
@ -1210,14 +1233,37 @@ static void check_object(struct object_entry *entry)
|
||||||
sha1_to_hex(entry->sha1));
|
sha1_to_hex(entry->sha1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int pack_offset_sort(const void *_a, const void *_b)
|
||||||
|
{
|
||||||
|
const struct object_entry *a = *(struct object_entry **)_a;
|
||||||
|
const struct object_entry *b = *(struct object_entry **)_b;
|
||||||
|
|
||||||
|
/* avoid filesystem trashing with loose objects */
|
||||||
|
if (!a->in_pack && !b->in_pack)
|
||||||
|
return hashcmp(a->sha1, b->sha1);
|
||||||
|
|
||||||
|
if (a->in_pack < b->in_pack)
|
||||||
|
return -1;
|
||||||
|
if (a->in_pack > b->in_pack)
|
||||||
|
return 1;
|
||||||
|
return a->in_pack_offset < b->in_pack_offset ? -1 :
|
||||||
|
(a->in_pack_offset > b->in_pack_offset);
|
||||||
|
}
|
||||||
|
|
||||||
static void get_object_details(void)
|
static void get_object_details(void)
|
||||||
{
|
{
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
struct object_entry *entry;
|
struct object_entry **sorted_by_offset;
|
||||||
|
|
||||||
|
sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
|
||||||
|
for (i = 0; i < nr_objects; i++)
|
||||||
|
sorted_by_offset[i] = objects + i;
|
||||||
|
qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
|
||||||
|
|
||||||
prepare_pack_ix();
|
prepare_pack_ix();
|
||||||
for (i = 0, entry = objects; i < nr_objects; i++, entry++)
|
for (i = 0; i < nr_objects; i++)
|
||||||
check_object(entry);
|
check_object(sorted_by_offset[i]);
|
||||||
|
free(sorted_by_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int type_size_sort(const void *_a, const void *_b)
|
static int type_size_sort(const void *_a, const void *_b)
|
||||||
|
@ -1520,20 +1566,20 @@ static void read_object_list_from_stdin(void)
|
||||||
|
|
||||||
hash = name_hash(line+41);
|
hash = name_hash(line+41);
|
||||||
add_preferred_base_object(line+41, hash);
|
add_preferred_base_object(line+41, hash);
|
||||||
add_object_entry(sha1, hash, 0);
|
add_object_entry(sha1, 0, hash, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void show_commit(struct commit *commit)
|
static void show_commit(struct commit *commit)
|
||||||
{
|
{
|
||||||
add_object_entry(commit->object.sha1, 0, 0);
|
add_object_entry(commit->object.sha1, OBJ_COMMIT, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void show_object(struct object_array_entry *p)
|
static void show_object(struct object_array_entry *p)
|
||||||
{
|
{
|
||||||
unsigned hash = name_hash(p->name);
|
unsigned hash = name_hash(p->name);
|
||||||
add_preferred_base_object(p->name, hash);
|
add_preferred_base_object(p->name, hash);
|
||||||
add_object_entry(p->item->sha1, hash, 0);
|
add_object_entry(p->item->sha1, p->item->type, hash, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void show_edge(struct commit *commit)
|
static void show_edge(struct commit *commit)
|
||||||
|
|
Loading…
Reference in New Issue