hashmap: factor out getting a hash code from a SHA1
Copying the first bytes of a SHA1 is duplicated in six places, however, the implications (the actual value would depend on the endianness of the platform) is documented only once. Add a properly documented API for this. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
parent
6f92e5ff3c
commit
039dc71a7c
|
@ -58,6 +58,15 @@ Functions
|
||||||
+
|
+
|
||||||
`strihash` and `memihash` are case insensitive versions.
|
`strihash` and `memihash` are case insensitive versions.
|
||||||
|
|
||||||
|
`unsigned int sha1hash(const unsigned char *sha1)`::
|
||||||
|
|
||||||
|
Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
|
||||||
|
for use in hash tables. Cryptographic hashes are supposed to have
|
||||||
|
uniform distribution, so in contrast to `memhash()`, this just copies
|
||||||
|
the first `sizeof(int)` bytes without shuffling any bits. Note that
|
||||||
|
the results will be different on big-endian and little-endian
|
||||||
|
platforms, so they should not be stored or transferred over the net.
|
||||||
|
|
||||||
`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
|
`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
|
||||||
|
|
||||||
Initializes a hashmap structure.
|
Initializes a hashmap structure.
|
||||||
|
|
|
@ -56,17 +56,10 @@ static int commit_name_cmp(const struct commit_name *cn1,
|
||||||
return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled);
|
return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int hash_sha1(const unsigned char *sha1)
|
|
||||||
{
|
|
||||||
unsigned int hash;
|
|
||||||
memcpy(&hash, sha1, sizeof(hash));
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct commit_name *find_commit_name(const unsigned char *peeled)
|
static inline struct commit_name *find_commit_name(const unsigned char *peeled)
|
||||||
{
|
{
|
||||||
struct commit_name key;
|
struct commit_name key;
|
||||||
hashmap_entry_init(&key, hash_sha1(peeled));
|
hashmap_entry_init(&key, sha1hash(peeled));
|
||||||
return hashmap_get(&names, &key, peeled);
|
return hashmap_get(&names, &key, peeled);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +107,7 @@ static void add_to_known_names(const char *path,
|
||||||
if (!e) {
|
if (!e) {
|
||||||
e = xmalloc(sizeof(struct commit_name));
|
e = xmalloc(sizeof(struct commit_name));
|
||||||
hashcpy(e->peeled, peeled);
|
hashcpy(e->peeled, peeled);
|
||||||
hashmap_entry_init(e, hash_sha1(peeled));
|
hashmap_entry_init(e, sha1hash(peeled));
|
||||||
hashmap_add(&names, e);
|
hashmap_add(&names, e);
|
||||||
e->path = NULL;
|
e->path = NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,10 +8,7 @@
|
||||||
|
|
||||||
static unsigned int hash_obj(const struct object *obj, unsigned int n)
|
static unsigned int hash_obj(const struct object *obj, unsigned int n)
|
||||||
{
|
{
|
||||||
unsigned int hash;
|
return sha1hash(obj->sha1) % n;
|
||||||
|
|
||||||
memcpy(&hash, obj->sha1, sizeof(unsigned int));
|
|
||||||
return hash % n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration)
|
static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration)
|
||||||
|
|
|
@ -242,14 +242,12 @@ struct file_similarity {
|
||||||
|
|
||||||
static unsigned int hash_filespec(struct diff_filespec *filespec)
|
static unsigned int hash_filespec(struct diff_filespec *filespec)
|
||||||
{
|
{
|
||||||
unsigned int hash;
|
|
||||||
if (!filespec->sha1_valid) {
|
if (!filespec->sha1_valid) {
|
||||||
if (diff_populate_filespec(filespec, 0))
|
if (diff_populate_filespec(filespec, 0))
|
||||||
return 0;
|
return 0;
|
||||||
hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
|
hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
|
||||||
}
|
}
|
||||||
memcpy(&hash, filespec->sha1, sizeof(hash));
|
return sha1hash(filespec->sha1);
|
||||||
return hash;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_identical_files(struct hashmap *srcs,
|
static int find_identical_files(struct hashmap *srcs,
|
||||||
|
|
11
hashmap.h
11
hashmap.h
|
@ -13,6 +13,17 @@ extern unsigned int strihash(const char *buf);
|
||||||
extern unsigned int memhash(const void *buf, size_t len);
|
extern unsigned int memhash(const void *buf, size_t len);
|
||||||
extern unsigned int memihash(const void *buf, size_t len);
|
extern unsigned int memihash(const void *buf, size_t len);
|
||||||
|
|
||||||
|
static inline unsigned int sha1hash(const unsigned char *sha1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Equivalent to 'return *(unsigned int *)sha1;', but safe on
|
||||||
|
* platforms that don't support unaligned reads.
|
||||||
|
*/
|
||||||
|
unsigned int hash;
|
||||||
|
memcpy(&hash, sha1, sizeof(hash));
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
/* data structures */
|
/* data structures */
|
||||||
|
|
||||||
struct hashmap_entry {
|
struct hashmap_entry {
|
||||||
|
|
11
khash.h
11
khash.h
|
@ -320,19 +320,12 @@ static const double __ac_HASH_UPPER = 0.77;
|
||||||
code; \
|
code; \
|
||||||
} }
|
} }
|
||||||
|
|
||||||
static inline khint_t __kh_oid_hash(const unsigned char *oid)
|
|
||||||
{
|
|
||||||
khint_t hash;
|
|
||||||
memcpy(&hash, oid, sizeof(hash));
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define __kh_oid_cmp(a, b) (hashcmp(a, b) == 0)
|
#define __kh_oid_cmp(a, b) (hashcmp(a, b) == 0)
|
||||||
|
|
||||||
KHASH_INIT(sha1, const unsigned char *, void *, 1, __kh_oid_hash, __kh_oid_cmp)
|
KHASH_INIT(sha1, const unsigned char *, void *, 1, sha1hash, __kh_oid_cmp)
|
||||||
typedef kh_sha1_t khash_sha1;
|
typedef kh_sha1_t khash_sha1;
|
||||||
|
|
||||||
KHASH_INIT(sha1_pos, const unsigned char *, int, 1, __kh_oid_hash, __kh_oid_cmp)
|
KHASH_INIT(sha1_pos, const unsigned char *, int, 1, sha1hash, __kh_oid_cmp)
|
||||||
typedef kh_sha1_pos_t khash_sha1_pos;
|
typedef kh_sha1_pos_t khash_sha1_pos;
|
||||||
|
|
||||||
#endif /* __AC_KHASH_H */
|
#endif /* __AC_KHASH_H */
|
||||||
|
|
13
object.c
13
object.c
|
@ -50,18 +50,7 @@ int type_from_string(const char *str)
|
||||||
*/
|
*/
|
||||||
static unsigned int hash_obj(const unsigned char *sha1, unsigned int n)
|
static unsigned int hash_obj(const unsigned char *sha1, unsigned int n)
|
||||||
{
|
{
|
||||||
unsigned int hash;
|
return sha1hash(sha1) & (n - 1);
|
||||||
|
|
||||||
/*
|
|
||||||
* Since the sha1 is essentially random, we just take the
|
|
||||||
* required number of bits directly from the first
|
|
||||||
* sizeof(unsigned int) bytes of sha1. First we have to copy
|
|
||||||
* the bytes into a properly aligned integer. If we cared
|
|
||||||
* about getting consistent results across architectures, we
|
|
||||||
* would have to call ntohl() here, too.
|
|
||||||
*/
|
|
||||||
memcpy(&hash, sha1, sizeof(unsigned int));
|
|
||||||
return hash & (n - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -7,10 +7,9 @@ static uint32_t locate_object_entry_hash(struct packing_data *pdata,
|
||||||
const unsigned char *sha1,
|
const unsigned char *sha1,
|
||||||
int *found)
|
int *found)
|
||||||
{
|
{
|
||||||
uint32_t i, hash, mask = (pdata->index_size - 1);
|
uint32_t i, mask = (pdata->index_size - 1);
|
||||||
|
|
||||||
memcpy(&hash, sha1, sizeof(uint32_t));
|
i = sha1hash(sha1) & mask;
|
||||||
i = hash & mask;
|
|
||||||
|
|
||||||
while (pdata->index[i] > 0) {
|
while (pdata->index[i] > 0) {
|
||||||
uint32_t pos = pdata->index[i] - 1;
|
uint32_t pos = pdata->index[i] - 1;
|
||||||
|
|
Loading…
Reference in New Issue