171 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
			
		
		
	
	
			171 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C
		
	
	
| #ifndef BLOOM_H
 | |
| #define BLOOM_H
 | |
| 
 | |
| struct commit;
 | |
| struct repository;
 | |
| struct commit_graph;
 | |
| 
 | |
| struct bloom_filter_settings {
 | |
| 	/*
 | |
| 	 * The version of the hashing technique being used.
 | |
| 	 * The newest version is 2, which is
 | |
| 	 * the seeded murmur3 hashing technique implemented
 | |
| 	 * in bloom.c. Bloom filters of version 1 were created
 | |
| 	 * with prior versions of Git, which had a bug in the
 | |
| 	 * implementation of the hash function.
 | |
| 	 */
 | |
| 	uint32_t hash_version;
 | |
| 
 | |
| 	/*
 | |
| 	 * The number of times a path is hashed, i.e. the
 | |
| 	 * number of bit positions that cumulatively
 | |
| 	 * determine whether a path is present in the
 | |
| 	 * Bloom filter.
 | |
| 	 */
 | |
| 	uint32_t num_hashes;
 | |
| 
 | |
| 	/*
 | |
| 	 * The minimum number of bits per entry in the Bloom
 | |
| 	 * filter. If the filter contains 'n' entries, then
 | |
| 	 * filter size is the minimum number of 8-bit words
 | |
| 	 * that contain n*b bits.
 | |
| 	 */
 | |
| 	uint32_t bits_per_entry;
 | |
| 
 | |
| 	/*
 | |
| 	 * The maximum number of changed paths per commit
 | |
| 	 * before declaring a Bloom filter to be too-large.
 | |
| 	 *
 | |
| 	 * Not written to the commit-graph file.
 | |
| 	 */
 | |
| 	uint32_t max_changed_paths;
 | |
| };
 | |
| 
 | |
| #define DEFAULT_BLOOM_MAX_CHANGES 512
 | |
| #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
 | |
| #define BITS_PER_WORD 8
 | |
| #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
 | |
| 
 | |
| /*
 | |
|  * A bloom_filter struct represents a data segment to
 | |
|  * use when testing hash values. The 'len' member
 | |
|  * dictates how many entries are stored in
 | |
|  * 'data'.
 | |
|  */
 | |
| struct bloom_filter {
 | |
| 	unsigned char *data;
 | |
| 	size_t len;
 | |
| 	int version;
 | |
| 
 | |
| 	void *to_free;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * A bloom_key represents the k hash values for a
 | |
|  * given string. These can be precomputed and
 | |
|  * stored in a bloom_key for re-use when testing
 | |
|  * against a bloom_filter. The number of hashes is
 | |
|  * given by the Bloom filter settings and is the same
 | |
|  * for all Bloom filters and keys interacting with
 | |
|  * the loaded version of the commit graph file and
 | |
|  * the Bloom data chunks.
 | |
|  */
 | |
| struct bloom_key {
 | |
| 	uint32_t *hashes;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * A bloom_keyvec is a vector of bloom_keys, which
 | |
|  * can be used to store multiple keys for a single
 | |
|  * pathspec item.
 | |
|  */
 | |
| struct bloom_keyvec {
 | |
| 	size_t count;
 | |
| 	struct bloom_key key[FLEX_ARRAY];
 | |
| };
 | |
| 
 | |
| int load_bloom_filter_from_graph(struct commit_graph *g,
 | |
| 				 struct bloom_filter *filter,
 | |
| 				 uint32_t graph_pos);
 | |
| 
 | |
| void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
 | |
| 		    const struct bloom_filter_settings *settings);
 | |
| void bloom_key_clear(struct bloom_key *key);
 | |
| 
 | |
| /*
 | |
|  * bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the
 | |
|  * given path.
 | |
|  *
 | |
|  * This function splits the input path by '/' and generates a bloom key for each
 | |
|  * prefix, in reverse order of specificity. For example, given the input
 | |
|  * "a/b/c", it will generate bloom keys for:
 | |
|  *   - "a/b/c"
 | |
|  *   - "a/b"
 | |
|  *   - "a"
 | |
|  *
 | |
|  * The resulting keys are stored in a newly allocated bloom_keyvec.
 | |
|  */
 | |
| struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
 | |
| 				      const struct bloom_filter_settings *settings);
 | |
| void bloom_keyvec_free(struct bloom_keyvec *vec);
 | |
| 
 | |
| void add_key_to_filter(const struct bloom_key *key,
 | |
| 		       struct bloom_filter *filter,
 | |
| 		       const struct bloom_filter_settings *settings);
 | |
| 
 | |
| void init_bloom_filters(void);
 | |
| void deinit_bloom_filters(void);
 | |
| 
 | |
| enum bloom_filter_computed {
 | |
| 	BLOOM_NOT_COMPUTED = (1 << 0),
 | |
| 	BLOOM_COMPUTED     = (1 << 1),
 | |
| 	BLOOM_TRUNC_LARGE  = (1 << 2),
 | |
| 	BLOOM_TRUNC_EMPTY  = (1 << 3),
 | |
| 	BLOOM_UPGRADED     = (1 << 4),
 | |
| };
 | |
| 
 | |
| struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 | |
| 						 struct commit *c,
 | |
| 						 int compute_if_not_present,
 | |
| 						 const struct bloom_filter_settings *settings,
 | |
| 						 enum bloom_filter_computed *computed);
 | |
| 
 | |
| /*
 | |
|  * Find the Bloom filter associated with the given commit "c".
 | |
|  *
 | |
|  * If any of the following are true
 | |
|  *
 | |
|  *   - the repository does not have a commit-graph, or
 | |
|  *   - the repository disables reading from the commit-graph, or
 | |
|  *   - the given commit does not have a Bloom filter computed, or
 | |
|  *   - there is a Bloom filter for commit "c", but it cannot be read
 | |
|  *     because the filter uses an incompatible version of murmur3
 | |
|  *
 | |
|  * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding
 | |
|  * Bloom filter will be returned.
 | |
|  *
 | |
|  * For callers who wish to inspect Bloom filters with incompatible hash
 | |
|  * versions, use get_or_compute_bloom_filter().
 | |
|  */
 | |
| struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c);
 | |
| 
 | |
| int bloom_filter_contains(const struct bloom_filter *filter,
 | |
| 			  const struct bloom_key *key,
 | |
| 			  const struct bloom_filter_settings *settings);
 | |
| 
 | |
| /*
 | |
|  * bloom_filter_contains_vec - Check if all keys in a key vector are in the
 | |
|  * Bloom filter.
 | |
|  *
 | |
|  * Returns 1 if **all** keys in the vector are present in the filter,
 | |
|  * 0 if **any** key is not present.
 | |
|  */
 | |
| int bloom_filter_contains_vec(const struct bloom_filter *filter,
 | |
| 			      const struct bloom_keyvec *v,
 | |
| 			      const struct bloom_filter_settings *settings);
 | |
| 
 | |
| uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
 | |
| 				   int version);
 | |
| 
 | |
| #endif
 |