Browse Source
The hash table that stores the packing list for a given `pack-objects` run was tightly coupled to the pack-objects code. In this commit, we refactor the hash table and the underlying storage array into a `packing_data` struct. The functionality for accessing and adding entries to the packing list is hence accessible from other parts of Git besides the `pack-objects` builtin. This refactoring is a requirement for further patches in this series that will require accessing the commit packing list from outside of `pack-objects`. The hash table implementation has been minimally altered: we now use table sizes which are always a power of two, to ensure a uniform index distribution in the array. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint


4 changed files with 200 additions and 135 deletions
@ -0,0 +1,111 @@
@@ -0,0 +1,111 @@
|
||||
#include "cache.h" |
||||
#include "object.h" |
||||
#include "pack.h" |
||||
#include "pack-objects.h" |
||||
|
||||
static uint32_t locate_object_entry_hash(struct packing_data *pdata, |
||||
const unsigned char *sha1, |
||||
int *found) |
||||
{ |
||||
uint32_t i, hash, mask = (pdata->index_size - 1); |
||||
|
||||
memcpy(&hash, sha1, sizeof(uint32_t)); |
||||
i = hash & mask; |
||||
|
||||
while (pdata->index[i] > 0) { |
||||
uint32_t pos = pdata->index[i] - 1; |
||||
|
||||
if (!hashcmp(sha1, pdata->objects[pos].idx.sha1)) { |
||||
*found = 1; |
||||
return i; |
||||
} |
||||
|
||||
i = (i + 1) & mask; |
||||
} |
||||
|
||||
*found = 0; |
||||
return i; |
||||
} |
||||
|
||||
static inline uint32_t closest_pow2(uint32_t v) |
||||
{ |
||||
v = v - 1; |
||||
v |= v >> 1; |
||||
v |= v >> 2; |
||||
v |= v >> 4; |
||||
v |= v >> 8; |
||||
v |= v >> 16; |
||||
return v + 1; |
||||
} |
||||
|
||||
static void rehash_objects(struct packing_data *pdata) |
||||
{ |
||||
uint32_t i; |
||||
struct object_entry *entry; |
||||
|
||||
pdata->index_size = closest_pow2(pdata->nr_objects * 3); |
||||
if (pdata->index_size < 1024) |
||||
pdata->index_size = 1024; |
||||
|
||||
pdata->index = xrealloc(pdata->index, sizeof(uint32_t) * pdata->index_size); |
||||
memset(pdata->index, 0, sizeof(int) * pdata->index_size); |
||||
|
||||
entry = pdata->objects; |
||||
|
||||
for (i = 0; i < pdata->nr_objects; i++) { |
||||
int found; |
||||
uint32_t ix = locate_object_entry_hash(pdata, entry->idx.sha1, &found); |
||||
|
||||
if (found) |
||||
die("BUG: Duplicate object in hash"); |
||||
|
||||
pdata->index[ix] = i + 1; |
||||
entry++; |
||||
} |
||||
} |
||||
|
||||
struct object_entry *packlist_find(struct packing_data *pdata, |
||||
const unsigned char *sha1, |
||||
uint32_t *index_pos) |
||||
{ |
||||
uint32_t i; |
||||
int found; |
||||
|
||||
if (!pdata->index_size) |
||||
return NULL; |
||||
|
||||
i = locate_object_entry_hash(pdata, sha1, &found); |
||||
|
||||
if (index_pos) |
||||
*index_pos = i; |
||||
|
||||
if (!found) |
||||
return NULL; |
||||
|
||||
return &pdata->objects[pdata->index[i] - 1]; |
||||
} |
||||
|
||||
struct object_entry *packlist_alloc(struct packing_data *pdata, |
||||
const unsigned char *sha1, |
||||
uint32_t index_pos) |
||||
{ |
||||
struct object_entry *new_entry; |
||||
|
||||
if (pdata->nr_objects >= pdata->nr_alloc) { |
||||
pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2; |
||||
pdata->objects = xrealloc(pdata->objects, |
||||
pdata->nr_alloc * sizeof(*new_entry)); |
||||
} |
||||
|
||||
new_entry = pdata->objects + pdata->nr_objects++; |
||||
|
||||
memset(new_entry, 0, sizeof(*new_entry)); |
||||
hashcpy(new_entry->idx.sha1, sha1); |
||||
|
||||
if (pdata->index_size * 3 <= pdata->nr_objects * 4) |
||||
rehash_objects(pdata); |
||||
else |
||||
pdata->index[index_pos] = pdata->nr_objects; |
||||
|
||||
return new_entry; |
||||
} |
@ -0,0 +1,47 @@
@@ -0,0 +1,47 @@
|
||||
#ifndef PACK_OBJECTS_H |
||||
#define PACK_OBJECTS_H |
||||
|
||||
struct object_entry { |
||||
struct pack_idx_entry idx; |
||||
unsigned long size; /* uncompressed size */ |
||||
struct packed_git *in_pack; /* already in pack */ |
||||
off_t in_pack_offset; |
||||
struct object_entry *delta; /* delta base object */ |
||||
struct object_entry *delta_child; /* deltified objects who bases me */ |
||||
struct object_entry *delta_sibling; /* other deltified objects who |
||||
* uses the same base as me |
||||
*/ |
||||
void *delta_data; /* cached delta (uncompressed) */ |
||||
unsigned long delta_size; /* delta data size (uncompressed) */ |
||||
unsigned long z_delta_size; /* delta data size (compressed) */ |
||||
enum object_type type; |
||||
enum object_type in_pack_type; /* could be delta */ |
||||
uint32_t hash; /* name hint hash */ |
||||
unsigned char in_pack_header_size; |
||||
unsigned preferred_base:1; /* |
||||
* we do not pack this, but is available |
||||
* to be used as the base object to delta |
||||
* objects against. |
||||
*/ |
||||
unsigned no_try_delta:1; |
||||
unsigned tagged:1; /* near the very tip of refs */ |
||||
unsigned filled:1; /* assigned write-order */ |
||||
}; |
||||
|
||||
struct packing_data { |
||||
struct object_entry *objects; |
||||
uint32_t nr_objects, nr_alloc; |
||||
|
||||
int32_t *index; |
||||
uint32_t index_size; |
||||
}; |
||||
|
||||
struct object_entry *packlist_alloc(struct packing_data *pdata, |
||||
const unsigned char *sha1, |
||||
uint32_t index_pos); |
||||
|
||||
struct object_entry *packlist_find(struct packing_data *pdata, |
||||
const unsigned char *sha1, |
||||
uint32_t *index_pos); |
||||
|
||||
#endif |
Loading…
Reference in new issue