Browse Source

hashmap: add API to disable item counting when threaded

This is to address concerns raised by ThreadSanitizer on the mailing list
about threaded unprotected R/W access to map.size with my previous "disallow
rehash" change (0607e10009).

See:
https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/

Add API to hashmap to disable item counting and thus automatic rehashing.
Also include API to later re-enable them.

When item counting is disabled, the map.size field is invalid.  So to
prevent accidents, the field has been renamed and an accessor function
hashmap_get_size() has been added.  All direct references to this
field have been been updated.  And the name of the field changed
to map.private_size to communicate this.

Here is the relevant output from ThreadSanitizer showing the problem:

WARNING: ThreadSanitizer: data race (pid=10554)
  Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 lazy_dir_thread_proc name-hash.c:471
    #5 <null> <null>

  Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31):
    #0 hashmap_add hashmap.c:209
    #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
    #2 handle_range_dir name-hash.c:347
    #3 handle_range_1 name-hash.c:415
    #4 handle_range_dir name-hash.c:380
    #5 handle_range_1 name-hash.c:415
    #6 lazy_dir_thread_proc name-hash.c:471
    #7 <null> <null>

Martin gives instructions for running TSan on test t3008 in this post:
https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Jeff Hostetler 7 years ago committed by Junio C Hamano
parent
commit
8b604d1951
  1. 15
      attr.c
  2. 2
      builtin/describe.c
  3. 26
      hashmap.c
  4. 72
      hashmap.h
  5. 10
      name-hash.c
  6. 3
      t/helper/test-hashmap.c

15
attr.c

@ -151,10 +151,12 @@ struct all_attrs_item {
static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check) static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
{ {
int i; int i;
unsigned int size;


hashmap_lock(map); hashmap_lock(map);


if (map->map.size < check->all_attrs_nr) size = hashmap_get_size(&map->map);
if (size < check->all_attrs_nr)
die("BUG: interned attributes shouldn't be deleted"); die("BUG: interned attributes shouldn't be deleted");


/* /*
@ -163,13 +165,13 @@ static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
* field), reallocate the provided attr_check instance's all_attrs * field), reallocate the provided attr_check instance's all_attrs
* field and fill each entry with its corresponding git_attr. * field and fill each entry with its corresponding git_attr.
*/ */
if (map->map.size != check->all_attrs_nr) { if (size != check->all_attrs_nr) {
struct attr_hash_entry *e; struct attr_hash_entry *e;
struct hashmap_iter iter; struct hashmap_iter iter;
hashmap_iter_init(&map->map, &iter); hashmap_iter_init(&map->map, &iter);


REALLOC_ARRAY(check->all_attrs, map->map.size); REALLOC_ARRAY(check->all_attrs, size);
check->all_attrs_nr = map->map.size; check->all_attrs_nr = size;


while ((e = hashmap_iter_next(&iter))) { while ((e = hashmap_iter_next(&iter))) {
const struct git_attr *a = e->value; const struct git_attr *a = e->value;
@ -237,10 +239,11 @@ static const struct git_attr *git_attr_internal(const char *name, int namelen)


if (!a) { if (!a) {
FLEX_ALLOC_MEM(a, name, name, namelen); FLEX_ALLOC_MEM(a, name, name, namelen);
a->attr_nr = g_attr_hashmap.map.size; a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);


attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a); attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
assert(a->attr_nr == (g_attr_hashmap.map.size - 1)); assert(a->attr_nr ==
(hashmap_get_size(&g_attr_hashmap.map) - 1));
} }


hashmap_unlock(&g_attr_hashmap); hashmap_unlock(&g_attr_hashmap);

2
builtin/describe.c

@ -508,7 +508,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)


hashmap_init(&names, commit_name_cmp, NULL, 0); hashmap_init(&names, commit_name_cmp, NULL, 0);
for_each_rawref(get_name, NULL); for_each_rawref(get_name, NULL);
if (!names.size && !always) if (!hashmap_get_size(&names) && !always)
die(_("No names found, cannot describe anything.")); die(_("No names found, cannot describe anything."));


if (argc == 0) { if (argc == 0) {

26
hashmap.c

@ -116,9 +116,6 @@ static void rehash(struct hashmap *map, unsigned int newsize)
unsigned int i, oldsize = map->tablesize; unsigned int i, oldsize = map->tablesize;
struct hashmap_entry **oldtable = map->table; struct hashmap_entry **oldtable = map->table;


if (map->disallow_rehash)
return;

alloc_table(map, newsize); alloc_table(map, newsize);
for (i = 0; i < oldsize; i++) { for (i = 0; i < oldsize; i++) {
struct hashmap_entry *e = oldtable[i]; struct hashmap_entry *e = oldtable[i];
@ -166,6 +163,12 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
while (initial_size > size) while (initial_size > size)
size <<= HASHMAP_RESIZE_BITS; size <<= HASHMAP_RESIZE_BITS;
alloc_table(map, size); alloc_table(map, size);

/*
* Keep track of the number of items in the map and
* allow the map to automatically grow as necessary.
*/
map->do_count_items = 1;
} }


void hashmap_free(struct hashmap *map, int free_entries) void hashmap_free(struct hashmap *map, int free_entries)
@ -206,9 +209,11 @@ void hashmap_add(struct hashmap *map, void *entry)
map->table[b] = entry; map->table[b] = entry;


/* fix size and rehash if appropriate */ /* fix size and rehash if appropriate */
map->size++; if (map->do_count_items) {
if (map->size > map->grow_at) map->private_size++;
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS); if (map->private_size > map->grow_at)
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
}
} }


void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
@ -224,9 +229,12 @@ void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
old->next = NULL; old->next = NULL;


/* fix size and rehash if appropriate */ /* fix size and rehash if appropriate */
map->size--; if (map->do_count_items) {
if (map->size < map->shrink_at) map->private_size--;
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS); if (map->private_size < map->shrink_at)
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
}

return old; return old;
} }



72
hashmap.h

@ -183,7 +183,7 @@ struct hashmap {
const void *cmpfn_data; const void *cmpfn_data;


/* total number of entries (0 means the hashmap is empty) */ /* total number of entries (0 means the hashmap is empty) */
unsigned int size; unsigned int private_size; /* use hashmap_get_size() */


/* /*
* tablesize is the allocated size of the hash table. A non-0 value * tablesize is the allocated size of the hash table. A non-0 value
@ -196,8 +196,7 @@ struct hashmap {
unsigned int grow_at; unsigned int grow_at;
unsigned int shrink_at; unsigned int shrink_at;


/* See `hashmap_disallow_rehash`. */ unsigned int do_count_items : 1;
unsigned disallow_rehash : 1;
}; };


/* hashmap functions */ /* hashmap functions */
@ -252,6 +251,18 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash)
e->next = NULL; e->next = NULL;
} }


/*
* Return the number of items in the map.
*/
static inline unsigned int hashmap_get_size(struct hashmap *map)
{
if (map->do_count_items)
return map->private_size;

BUG("hashmap_get_size: size not set");
return 0;
}

/* /*
* Returns the hashmap entry for the specified key, or NULL if not found. * Returns the hashmap entry for the specified key, or NULL if not found.
* *
@ -344,24 +355,6 @@ extern void *hashmap_remove(struct hashmap *map, const void *key,
*/ */
int hashmap_bucket(const struct hashmap *map, unsigned int hash); int hashmap_bucket(const struct hashmap *map, unsigned int hash);


/*
* Disallow/allow rehashing of the hashmap.
* This is useful if the caller knows that the hashmap needs multi-threaded
* access. The caller is still required to guard/lock searches and inserts
* in a manner appropriate to their usage. This simply prevents the table
* from being unexpectedly re-mapped.
*
* It is up to the caller to ensure that the hashmap is initialized to a
* reasonable size to prevent poor performance.
*
* A call to allow rehashing does not force a rehash; that might happen
* with the next insert or delete.
*/
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
{
map->disallow_rehash = value;
}

/* /*
* Used to iterate over all entries of a hashmap. Note that it is * Used to iterate over all entries of a hashmap. Note that it is
* not safe to add or remove entries to the hashmap while * not safe to add or remove entries to the hashmap while
@ -387,6 +380,43 @@ static inline void *hashmap_iter_first(struct hashmap *map,
return hashmap_iter_next(iter); return hashmap_iter_next(iter);
} }


/*
* Disable item counting and automatic rehashing when adding/removing items.
*
* Normally, the hashmap keeps track of the number of items in the map
* and uses it to dynamically resize it. This (both the counting and
* the resizing) can cause problems when the map is being used by
* threaded callers (because the hashmap code does not know about the
* locking strategy used by the threaded callers and therefore, does
* not know how to protect the "private_size" counter).
*/
static inline void hashmap_disable_item_counting(struct hashmap *map)
{
map->do_count_items = 0;
}

/*
* Re-enable item couting when adding/removing items.
* If counting is currently disabled, it will force count them.
* It WILL NOT automatically rehash them.
*/
static inline void hashmap_enable_item_counting(struct hashmap *map)
{
void *item;
unsigned int n = 0;
struct hashmap_iter iter;

if (map->do_count_items)
return;

hashmap_iter_init(map, &iter);
while ((item = hashmap_iter_next(&iter)))
n++;

map->do_count_items = 1;
map->private_size = n;
}

/* String interning */ /* String interning */


/* /*

10
name-hash.c

@ -584,9 +584,15 @@ static void lazy_init_name_hash(struct index_state *istate)
hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr); hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);


if (lookup_lazy_params(istate)) { if (lookup_lazy_params(istate)) {
hashmap_disallow_rehash(&istate->dir_hash, 1); /*
* Disable item counting and automatic rehashing because
* we do per-chain (mod n) locking rather than whole hashmap
* locking and we need to prevent the table-size from changing
* and bucket items from being redistributed.
*/
hashmap_disable_item_counting(&istate->dir_hash);
threaded_lazy_init_name_hash(istate); threaded_lazy_init_name_hash(istate);
hashmap_disallow_rehash(&istate->dir_hash, 0); hashmap_enable_item_counting(&istate->dir_hash);
} else { } else {
int nr; int nr;
for (nr = 0; nr < istate->cache_nr; nr++) for (nr = 0; nr < istate->cache_nr; nr++)

3
t/helper/test-hashmap.c

@ -235,7 +235,8 @@ int cmd_main(int argc, const char **argv)
} else if (!strcmp("size", cmd)) { } else if (!strcmp("size", cmd)) {


/* print table sizes */ /* print table sizes */
printf("%u %u\n", map.tablesize, map.size); printf("%u %u\n", map.tablesize,
hashmap_get_size(&map));


} else if (!strcmp("intern", cmd) && l1) { } else if (!strcmp("intern", cmd) && l1) {



Loading…
Cancel
Save