281 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
			
		
		
	
	
			281 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
| hashmap API
 | |
| ===========
 | |
| 
 | |
| The hashmap API is a generic implementation of hash-based key-value mappings.
 | |
| 
 | |
| Data Structures
 | |
| ---------------
 | |
| 
 | |
| `struct hashmap`::
 | |
| 
 | |
| 	The hash table structure. Members can be used as follows, but should
 | |
| 	not be modified directly:
 | |
| +
 | |
| The `size` member keeps track of the total number of entries (0 means the
 | |
| hashmap is empty).
 | |
| +
 | |
| `tablesize` is the allocated size of the hash table. A non-0 value indicates
 | |
| that the hashmap is initialized. It may also be useful for statistical purposes
 | |
| (i.e. `size / tablesize` is the current load factor).
 | |
| +
 | |
| `cmpfn` stores the comparison function specified in `hashmap_init()`. In
 | |
| advanced scenarios, it may be useful to change this, e.g. to switch between
 | |
| case-sensitive and case-insensitive lookup.
 | |
| 
 | |
| `struct hashmap_entry`::
 | |
| 
 | |
| 	An opaque structure representing an entry in the hash table, which must
 | |
| 	be used as first member of user data structures. Ideally it should be
 | |
| 	followed by an int-sized member to prevent unused memory on 64-bit
 | |
| 	systems due to alignment.
 | |
| +
 | |
| The `hash` member is the entry's hash code and the `next` member points to the
 | |
| next entry in case of collisions (i.e. if multiple entries map to the same
 | |
| bucket).
 | |
| 
 | |
| `struct hashmap_iter`::
 | |
| 
 | |
| 	An iterator structure, to be used with hashmap_iter_* functions.
 | |
| 
 | |
| Types
 | |
| -----
 | |
| 
 | |
| `int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void *keydata)`::
 | |
| 
 | |
| 	User-supplied function to test two hashmap entries for equality. Shall
 | |
| 	return 0 if the entries are equal.
 | |
| +
 | |
| This function is always called with non-NULL `entry` / `entry_or_key`
 | |
| parameters that have the same hash code. When looking up an entry, the `key`
 | |
| and `keydata` parameters to hashmap_get and hashmap_remove are always passed
 | |
| as second and third argument, respectively. Otherwise, `keydata` is NULL.
 | |
| 
 | |
| Functions
 | |
| ---------
 | |
| 
 | |
| `unsigned int strhash(const char *buf)`::
 | |
| `unsigned int strihash(const char *buf)`::
 | |
| `unsigned int memhash(const void *buf, size_t len)`::
 | |
| `unsigned int memihash(const void *buf, size_t len)`::
 | |
| 
 | |
| 	Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
 | |
| 	http://www.isthe.com/chongo/tech/comp/fnv).
 | |
| +
 | |
| `strhash` and `strihash` take 0-terminated strings, while `memhash` and
 | |
| `memihash` operate on arbitrary-length memory.
 | |
| +
 | |
| `strihash` and `memihash` are case insensitive versions.
 | |
| 
 | |
| `unsigned int sha1hash(const unsigned char *sha1)`::
 | |
| 
 | |
| 	Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
 | |
| 	for use in hash tables. Cryptographic hashes are supposed to have
 | |
| 	uniform distribution, so in contrast to `memhash()`, this just copies
 | |
| 	the first `sizeof(int)` bytes without shuffling any bits. Note that
 | |
| 	the results will be different on big-endian and little-endian
 | |
| 	platforms, so they should not be stored or transferred over the net.
 | |
| 
 | |
| `void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
 | |
| 
 | |
| 	Initializes a hashmap structure.
 | |
| +
 | |
| `map` is the hashmap to initialize.
 | |
| +
 | |
| The `equals_function` can be specified to compare two entries for equality.
 | |
| If NULL, entries are considered equal if their hash codes are equal.
 | |
| +
 | |
| If the total number of entries is known in advance, the `initial_size`
 | |
| parameter may be used to preallocate a sufficiently large table and thus
 | |
| prevent expensive resizing. If 0, the table is dynamically resized.
 | |
| 
 | |
| `void hashmap_free(struct hashmap *map, int free_entries)`::
 | |
| 
 | |
| 	Frees a hashmap structure and allocated memory.
 | |
| +
 | |
| `map` is the hashmap to free.
 | |
| +
 | |
| If `free_entries` is true, each hashmap_entry in the map is freed as well
 | |
| (using stdlib's free()).
 | |
| 
 | |
| `void hashmap_entry_init(void *entry, unsigned int hash)`::
 | |
| 
 | |
| 	Initializes a hashmap_entry structure.
 | |
| +
 | |
| `entry` points to the entry to initialize.
 | |
| +
 | |
| `hash` is the hash code of the entry.
 | |
| 
 | |
| `void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)`::
 | |
| 
 | |
| 	Returns the hashmap entry for the specified key, or NULL if not found.
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `key` is a hashmap_entry structure (or user data structure that starts with
 | |
| hashmap_entry) that has at least been initialized with the proper hash code
 | |
| (via `hashmap_entry_init`).
 | |
| +
 | |
| If an entry with matching hash code is found, `key` and `keydata` are passed
 | |
| to `hashmap_cmp_fn` to decide whether the entry matches the key.
 | |
| 
 | |
| `void *hashmap_get_from_hash(const struct hashmap *map, unsigned int hash, const void *keydata)`::
 | |
| 
 | |
| 	Returns the hashmap entry for the specified hash code and key data,
 | |
| 	or NULL if not found.
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `hash` is the hash code of the entry to look up.
 | |
| +
 | |
| If an entry with matching hash code is found, `keydata` is passed to
 | |
| `hashmap_cmp_fn` to decide whether the entry matches the key. The
 | |
| `entry_or_key` parameter points to a bogus hashmap_entry structure that
 | |
| should not be used in the comparison.
 | |
| 
 | |
| `void *hashmap_get_next(const struct hashmap *map, const void *entry)`::
 | |
| 
 | |
| 	Returns the next equal hashmap entry, or NULL if not found. This can be
 | |
| 	used to iterate over duplicate entries (see `hashmap_add`).
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `entry` is the hashmap_entry to start the search from, obtained via a previous
 | |
| call to `hashmap_get` or `hashmap_get_next`.
 | |
| 
 | |
| `void hashmap_add(struct hashmap *map, void *entry)`::
 | |
| 
 | |
| 	Adds a hashmap entry. This allows to add duplicate entries (i.e.
 | |
| 	separate values with the same key according to hashmap_cmp_fn).
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `entry` is the entry to add.
 | |
| 
 | |
| `void *hashmap_put(struct hashmap *map, void *entry)`::
 | |
| 
 | |
| 	Adds or replaces a hashmap entry. If the hashmap contains duplicate
 | |
| 	entries equal to the specified entry, only one of them will be replaced.
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `entry` is the entry to add or replace.
 | |
| +
 | |
| Returns the replaced entry, or NULL if not found (i.e. the entry was added).
 | |
| 
 | |
| `void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)`::
 | |
| 
 | |
| 	Removes a hashmap entry matching the specified key. If the hashmap
 | |
| 	contains duplicate entries equal to the specified key, only one of
 | |
| 	them will be removed.
 | |
| +
 | |
| `map` is the hashmap structure.
 | |
| +
 | |
| `key` is a hashmap_entry structure (or user data structure that starts with
 | |
| hashmap_entry) that has at least been initialized with the proper hash code
 | |
| (via `hashmap_entry_init`).
 | |
| +
 | |
| If an entry with matching hash code is found, `key` and `keydata` are
 | |
| passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
 | |
| +
 | |
| Returns the removed entry, or NULL if not found.
 | |
| 
 | |
| `void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
 | |
| `void *hashmap_iter_next(struct hashmap_iter *iter)`::
 | |
| `void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::
 | |
| 
 | |
| 	Used to iterate over all entries of a hashmap.
 | |
| +
 | |
| `hashmap_iter_init` initializes a `hashmap_iter` structure.
 | |
| +
 | |
| `hashmap_iter_next` returns the next hashmap_entry, or NULL if there are no
 | |
| more entries.
 | |
| +
 | |
| `hashmap_iter_first` is a combination of both (i.e. initializes the iterator
 | |
| and returns the first entry, if any).
 | |
| 
 | |
| `const char *strintern(const char *string)`::
 | |
| `const void *memintern(const void *data, size_t len)`::
 | |
| 
 | |
| 	Returns the unique, interned version of the specified string or data,
 | |
| 	similar to the `String.intern` API in Java and .NET, respectively.
 | |
| 	Interned strings remain valid for the entire lifetime of the process.
 | |
| +
 | |
| Can be used as `[x]strdup()` or `xmemdupz` replacement, except that interned
 | |
| strings / data must not be modified or freed.
 | |
| +
 | |
| Interned strings are best used for short strings with high probability of
 | |
| duplicates.
 | |
| +
 | |
| Uses a hashmap to store the pool of interned strings.
 | |
| 
 | |
| Usage example
 | |
| -------------
 | |
| 
 | |
| Here's a simple usage example that maps long keys to double values.
 | |
| ------------
 | |
| struct hashmap map;
 | |
| 
 | |
| struct long2double {
 | |
| 	struct hashmap_entry ent; /* must be the first member! */
 | |
| 	long key;
 | |
| 	double value;
 | |
| };
 | |
| 
 | |
| static int long2double_cmp(const struct long2double *e1, const struct long2double *e2, const void *unused)
 | |
| {
 | |
| 	return !(e1->key == e2->key);
 | |
| }
 | |
| 
 | |
| void long2double_init(void)
 | |
| {
 | |
| 	hashmap_init(&map, (hashmap_cmp_fn) long2double_cmp, 0);
 | |
| }
 | |
| 
 | |
| void long2double_free(void)
 | |
| {
 | |
| 	hashmap_free(&map, 1);
 | |
| }
 | |
| 
 | |
| static struct long2double *find_entry(long key)
 | |
| {
 | |
| 	struct long2double k;
 | |
| 	hashmap_entry_init(&k, memhash(&key, sizeof(long)));
 | |
| 	k.key = key;
 | |
| 	return hashmap_get(&map, &k, NULL);
 | |
| }
 | |
| 
 | |
| double get_value(long key)
 | |
| {
 | |
| 	struct long2double *e = find_entry(key);
 | |
| 	return e ? e->value : 0;
 | |
| }
 | |
| 
 | |
| void set_value(long key, double value)
 | |
| {
 | |
| 	struct long2double *e = find_entry(key);
 | |
| 	if (!e) {
 | |
| 		e = malloc(sizeof(struct long2double));
 | |
| 		hashmap_entry_init(e, memhash(&key, sizeof(long)));
 | |
| 		e->key = key;
 | |
| 		hashmap_add(&map, e);
 | |
| 	}
 | |
| 	e->value = value;
 | |
| }
 | |
| ------------
 | |
| 
 | |
| Using variable-sized keys
 | |
| -------------------------
 | |
| 
 | |
| The `hashmap_entry_get` and `hashmap_entry_remove` functions expect an ordinary
 | |
| `hashmap_entry` structure as key to find the correct entry. If the key data is
 | |
| variable-sized (e.g. a FLEX_ARRAY string) or quite large, it is undesirable
 | |
| to create a full-fledged entry structure on the heap and copy all the key data
 | |
| into the structure.
 | |
| 
 | |
| In this case, the `keydata` parameter can be used to pass
 | |
| variable-sized key data directly to the comparison function, and the `key`
 | |
| parameter can be a stripped-down, fixed size entry structure allocated on the
 | |
| stack.
 | |
| 
 | |
| See test-hashmap.c for an example using arbitrary-length strings as keys.
 |