Merge branch 'dk/diff-delta'
* dk/diff-delta: diff-delta.c: Rationalize culling of hash buckets diff-delta.c: pack the index structuremaint
						commit
						eb6d54fc79
					
				
							
								
								
									
										109
									
								
								diff-delta.c
								
								
								
								
							
							
						
						
									
										109
									
								
								diff-delta.c
								
								
								
								
							|  | @ -115,7 +115,11 @@ static const unsigned int U[256] = { | |||
| struct index_entry { | ||||
| 	const unsigned char *ptr; | ||||
| 	unsigned int val; | ||||
| 	struct index_entry *next; | ||||
| }; | ||||
|  | ||||
| struct unpacked_index_entry { | ||||
| 	struct index_entry entry; | ||||
| 	struct unpacked_index_entry *next; | ||||
| }; | ||||
|  | ||||
| struct delta_index { | ||||
|  | @ -131,7 +135,8 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) | |||
| 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count; | ||||
| 	const unsigned char *data, *buffer = buf; | ||||
| 	struct delta_index *index; | ||||
| 	struct index_entry *entry, **hash; | ||||
| 	struct unpacked_index_entry *entry, **hash; | ||||
| 	struct index_entry *packed_entry, **packed_hash; | ||||
| 	void *mem; | ||||
| 	unsigned long memsize; | ||||
|  | ||||
|  | @ -148,28 +153,21 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) | |||
| 	hmask = hsize - 1; | ||||
|  | ||||
| 	/* allocate lookup index */ | ||||
| 	memsize = sizeof(*index) + | ||||
| 		  sizeof(*hash) * hsize + | ||||
| 	memsize = sizeof(*hash) * hsize + | ||||
| 		  sizeof(*entry) * entries; | ||||
| 	mem = malloc(memsize); | ||||
| 	if (!mem) | ||||
| 		return NULL; | ||||
| 	index = mem; | ||||
| 	mem = index + 1; | ||||
| 	hash = mem; | ||||
| 	mem = hash + hsize; | ||||
| 	entry = mem; | ||||
|  | ||||
| 	index->memsize = memsize; | ||||
| 	index->src_buf = buf; | ||||
| 	index->src_size = bufsize; | ||||
| 	index->hash_mask = hmask; | ||||
| 	memset(hash, 0, hsize * sizeof(*hash)); | ||||
|  | ||||
| 	/* allocate an array to count hash entries */ | ||||
| 	hash_count = calloc(hsize, sizeof(*hash_count)); | ||||
| 	if (!hash_count) { | ||||
| 		free(index); | ||||
| 		free(hash); | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
|  | @ -183,12 +181,13 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) | |||
| 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; | ||||
| 		if (val == prev_val) { | ||||
| 			/* keep the lowest of consecutive identical blocks */ | ||||
| 			entry[-1].ptr = data + RABIN_WINDOW; | ||||
| 			entry[-1].entry.ptr = data + RABIN_WINDOW; | ||||
| 			--entries; | ||||
| 		} else { | ||||
| 			prev_val = val; | ||||
| 			i = val & hmask; | ||||
| 			entry->ptr = data + RABIN_WINDOW; | ||||
| 			entry->val = val; | ||||
| 			entry->entry.ptr = data + RABIN_WINDOW; | ||||
| 			entry->entry.val = val; | ||||
| 			entry->next = hash[i]; | ||||
| 			hash[i] = entry++; | ||||
| 			hash_count[i]++; | ||||
|  | @ -208,20 +207,84 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) | |||
| 	 * the reference buffer. | ||||
| 	 */ | ||||
| 	for (i = 0; i < hsize; i++) { | ||||
| 		if (hash_count[i] < HASH_LIMIT) | ||||
| 		int acc; | ||||
|  | ||||
| 		if (hash_count[i] <= HASH_LIMIT) | ||||
| 			continue; | ||||
|  | ||||
| 		entries -= hash_count[i] - HASH_LIMIT; | ||||
| 		/* We leave exactly HASH_LIMIT entries in the bucket */ | ||||
|  | ||||
| 		entry = hash[i]; | ||||
| 		acc = 0; | ||||
| 		do { | ||||
| 			struct index_entry *keep = entry; | ||||
| 			int skip = hash_count[i] / HASH_LIMIT; | ||||
| 			do { | ||||
| 				entry = entry->next; | ||||
| 			} while(--skip && entry); | ||||
| 			keep->next = entry; | ||||
| 		} while(entry); | ||||
| 			acc += hash_count[i] - HASH_LIMIT; | ||||
| 			if (acc > 0) { | ||||
| 				struct unpacked_index_entry *keep = entry; | ||||
| 				do { | ||||
| 					entry = entry->next; | ||||
| 					acc -= HASH_LIMIT; | ||||
| 				} while (acc > 0); | ||||
| 				keep->next = entry->next; | ||||
| 			} | ||||
| 			entry = entry->next; | ||||
| 		} while (entry); | ||||
|  | ||||
| 		/* Assume that this loop is gone through exactly | ||||
| 		 * HASH_LIMIT times and is entered and left with | ||||
| 		 * acc==0.  So the first statement in the loop | ||||
| 		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT | ||||
| 		 * to the accumulator, and the inner loop consequently | ||||
| 		 * is run (hash_count[i]-HASH_LIMIT) times, removing | ||||
| 		 * one element from the list each time.  Since acc | ||||
| 		 * balances out to 0 at the final run, the inner loop | ||||
| 		 * body can't be left with entry==NULL.  So we indeed | ||||
| 		 * encounter entry==NULL in the outer loop only. | ||||
| 		 */ | ||||
| 	} | ||||
| 	free(hash_count); | ||||
|  | ||||
| 	/* Now create the packed index in array form rather than | ||||
| 	 * linked lists */ | ||||
|  | ||||
| 	memsize = sizeof(*index) | ||||
| 		+ sizeof(*packed_hash) * (hsize+1) | ||||
| 		+ sizeof(*packed_entry) * entries; | ||||
|  | ||||
| 	mem = malloc(memsize); | ||||
|  | ||||
| 	if (!mem) { | ||||
| 		free(hash); | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
| 	index = mem; | ||||
| 	index->memsize = memsize; | ||||
| 	index->src_buf = buf; | ||||
| 	index->src_size = bufsize; | ||||
| 	index->hash_mask = hmask; | ||||
|  | ||||
| 	mem = index + 1; | ||||
| 	packed_hash = mem; | ||||
| 	mem = packed_hash + (hsize+1); | ||||
| 	packed_entry = mem; | ||||
|  | ||||
| 	/* Coalesce all entries belonging to one linked list into | ||||
| 	 * consecutive array entries */ | ||||
|  | ||||
| 	for (i = 0; i < hsize; i++) { | ||||
| 		packed_hash[i] = packed_entry; | ||||
| 		for (entry = hash[i]; entry; entry = entry->next) | ||||
| 			*packed_entry++ = entry->entry; | ||||
| 	} | ||||
|  | ||||
| 	/* Sentinel value to indicate the length of the last hash | ||||
| 	 * bucket */ | ||||
|  | ||||
| 	packed_hash[hsize] = packed_entry; | ||||
| 	assert(packed_entry - (struct index_entry *)mem == entries); | ||||
| 	free(hash); | ||||
|  | ||||
| 	return index; | ||||
| } | ||||
|  | ||||
|  | @ -302,7 +365,7 @@ create_delta(const struct delta_index *index, | |||
| 			val ^= U[data[-RABIN_WINDOW]]; | ||||
| 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; | ||||
| 			i = val & index->hash_mask; | ||||
| 			for (entry = index->hash[i]; entry; entry = entry->next) { | ||||
| 			for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) { | ||||
| 				const unsigned char *ref = entry->ptr; | ||||
| 				const unsigned char *src = data; | ||||
| 				unsigned int ref_size = ref_top - ref; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Junio C Hamano
						Junio C Hamano