split the diff-delta interface
This patch splits the diff-delta interface into index creation and delta generation. A wrapper is provided to preserve the diff-delta() call. This will allow for an optimization in pack-objects.c where the source object could be fixed and a full window of objects tentatively tried against that same source object without recomputing the source index each time. This patch only restructure things, plus a couple cleanups for good measure. There is no performance change yet. Signed-off-by: Nicolas Pitre <nico@cam.org>maint
							parent
							
								
									36932eab77
								
							
						
					
					
						commit
						08abe669c0
					
				
							
								
								
									
										75
									
								
								delta.h
								
								
								
								
							
							
						
						
									
										75
									
								
								delta.h
								
								
								
								
							|  | @ -1,12 +1,73 @@ | |||
| #ifndef DELTA_H | ||||
| #define DELTA_H | ||||
|  | ||||
| /* handling of delta buffers */ | ||||
| extern void *diff_delta(void *from_buf, unsigned long from_size, | ||||
| 			void *to_buf, unsigned long to_size, | ||||
| 		        unsigned long *delta_size, unsigned long max_size); | ||||
| extern void *patch_delta(void *src_buf, unsigned long src_size, | ||||
| 			 void *delta_buf, unsigned long delta_size, | ||||
| /* opaque object for delta index */ | ||||
| struct delta_index; | ||||
|  | ||||
| /* | ||||
|  * create_delta_index: compute index data from given buffer | ||||
|  * | ||||
|  * This returns a pointer to a struct delta_index that should be passed to | ||||
|  * subsequent create_delta() calls, or to free_delta_index().  A NULL pointer | ||||
|  * is returned on failure.  The given buffer must not be freed nor altered | ||||
|  * before free_delta_index() is called.  The returned pointer must be freed | ||||
|  * using free_delta_index(). | ||||
|  */ | ||||
| extern struct delta_index * | ||||
| create_delta_index(const void *buf, unsigned long bufsize); | ||||
|  | ||||
| /* | ||||
|  * free_delta_index: free the index created by create_delta_index() | ||||
|  */ | ||||
| extern void free_delta_index(struct delta_index *index); | ||||
|  | ||||
| /* | ||||
|  * create_delta: create a delta from given index for the given buffer | ||||
|  * | ||||
|  * This function may be called multiple times with different buffers using | ||||
|  * the same delta_index pointer.  If max_delta_size is non-zero and the | ||||
|  * resulting delta is to be larger than max_delta_size then NULL is returned. | ||||
|  * On success, a non-NULL pointer to the buffer with the delta data is | ||||
|  * returned and *delta_size is updated with its size.  The returned buffer | ||||
|  * must be freed by the caller. | ||||
|  */ | ||||
| extern void * | ||||
| create_delta(const struct delta_index *index, | ||||
| 	     const void *buf, unsigned long bufsize, | ||||
| 	     unsigned long *delta_size, unsigned long max_delta_size); | ||||
|  | ||||
| /* | ||||
|  * diff_delta: create a delta from source buffer to target buffer | ||||
|  * | ||||
|  * If max_delta_size is non-zero and the resulting delta is to be larger | ||||
|  * than max_delta_size then NULL is returned.  On success, a non-NULL | ||||
|  * pointer to the buffer with the delta data is returned and *delta_size is | ||||
|  * updated with its size.  The returned buffer must be freed by the caller. | ||||
|  */ | ||||
| static inline void * | ||||
| diff_delta(const void *src_buf, unsigned long src_bufsize, | ||||
| 	   const void *trg_buf, unsigned long trg_bufsize, | ||||
| 	   unsigned long *delta_size, unsigned long max_delta_size) | ||||
| { | ||||
| 	struct delta_index *index = create_delta_index(src_buf, src_bufsize); | ||||
| 	if (index) { | ||||
| 		void *delta = create_delta(index, trg_buf, trg_bufsize, | ||||
| 					   delta_size, max_delta_size); | ||||
| 		free_delta_index(index); | ||||
| 		return delta; | ||||
| 	} | ||||
| 	return NULL; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * patch_delta: recreate target buffer given source buffer and delta data | ||||
|  * | ||||
|  * On success, a non-NULL pointer to the target buffer is returned and | ||||
|  * *trg_bufsize is updated with its size.  On failure a NULL pointer is | ||||
|  * returned.  The returned buffer must be freed by the caller. | ||||
|  */ | ||||
| extern void *patch_delta(const void *src_buf, unsigned long src_size, | ||||
| 			 const void *delta_buf, unsigned long delta_size, | ||||
| 			 unsigned long *dst_size); | ||||
|  | ||||
| /* the smallest possible delta size is 4 bytes */ | ||||
|  | @ -14,7 +75,7 @@ extern void *patch_delta(void *src_buf, unsigned long src_size, | |||
|  | ||||
| /* | ||||
|  * This must be called twice on the delta data buffer, first to get the | ||||
|  * expected reference buffer size, and again to get the result buffer size. | ||||
|  * expected source buffer size, and again to get the target buffer size. | ||||
|  */ | ||||
| static inline unsigned long get_delta_hdr_size(const unsigned char **datap, | ||||
| 					       const unsigned char *top) | ||||
|  |  | |||
							
								
								
									
										130
									
								
								diff-delta.c
								
								
								
								
							
							
						
						
									
										130
									
								
								diff-delta.c
								
								
								
								
							|  | @ -27,53 +27,70 @@ | |||
| /* block size: min = 16, max = 64k, power of 2 */ | ||||
| #define BLK_SIZE 16 | ||||
|  | ||||
| #define MIN(a, b) ((a) < (b) ? (a) : (b)) | ||||
| /* maximum hash entry list for the same hash bucket */ | ||||
| #define HASH_LIMIT 64 | ||||
|  | ||||
| #define GR_PRIME 0x9e370001 | ||||
| #define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift)) | ||||
|  | ||||
| struct index { | ||||
| struct index_entry { | ||||
| 	const unsigned char *ptr; | ||||
| 	unsigned int val; | ||||
| 	struct index *next; | ||||
| 	struct index_entry *next; | ||||
| }; | ||||
|  | ||||
| static struct index ** delta_index(const unsigned char *buf, | ||||
| 				   unsigned long bufsize, | ||||
| 				   unsigned long trg_bufsize, | ||||
| 				   unsigned int *hash_shift) | ||||
| struct delta_index { | ||||
| 	const void *src_buf; | ||||
| 	unsigned long src_size; | ||||
| 	unsigned int hash_shift; | ||||
| 	struct index_entry *hash[0]; | ||||
| }; | ||||
|  | ||||
| struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) | ||||
| { | ||||
| 	unsigned int i, hsize, hshift, hlimit, entries, *hash_count; | ||||
| 	const unsigned char *data; | ||||
| 	struct index *entry, **hash; | ||||
| 	unsigned int i, hsize, hshift, entries, *hash_count; | ||||
| 	const unsigned char *data, *buffer = buf; | ||||
| 	struct delta_index *index; | ||||
| 	struct index_entry *entry, **hash; | ||||
| 	void *mem; | ||||
|  | ||||
| 	if (!buf || !bufsize) | ||||
| 		return NULL; | ||||
|  | ||||
| 	/* determine index hash size */ | ||||
| 	entries = bufsize  / BLK_SIZE; | ||||
| 	hsize = entries / 4; | ||||
| 	for (i = 4; (1 << i) < hsize && i < 31; i++); | ||||
| 	hsize = 1 << i; | ||||
| 	hshift = 32 - i; | ||||
| 	*hash_shift = hshift; | ||||
|  | ||||
| 	/* allocate lookup index */ | ||||
| 	mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry)); | ||||
| 	mem = malloc(sizeof(*index) + | ||||
| 		     sizeof(*hash) * hsize + | ||||
| 		     sizeof(*entry) * entries); | ||||
| 	if (!mem) | ||||
| 		return NULL; | ||||
| 	index = mem; | ||||
| 	mem = index + 1; | ||||
| 	hash = mem; | ||||
| 	entry = mem + hsize * sizeof(*hash); | ||||
| 	mem = hash + hsize; | ||||
| 	entry = mem; | ||||
|  | ||||
| 	index->src_buf = buf; | ||||
| 	index->src_size = bufsize; | ||||
| 	index->hash_shift = hshift; | ||||
| 	memset(hash, 0, hsize * sizeof(*hash)); | ||||
|  | ||||
| 	/* allocate an array to count hash entries */ | ||||
| 	hash_count = calloc(hsize, sizeof(*hash_count)); | ||||
| 	if (!hash_count) { | ||||
| 		free(hash); | ||||
| 		free(index); | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
| 	/* then populate the index */ | ||||
| 	data = buf + entries * BLK_SIZE - BLK_SIZE; | ||||
| 	while (data >= buf) { | ||||
| 	data = buffer + entries * BLK_SIZE - BLK_SIZE; | ||||
| 	while (data >= buffer) { | ||||
| 		unsigned int val = adler32(0, data, BLK_SIZE); | ||||
| 		i = HASH(val, hshift); | ||||
| 		entry->ptr = data; | ||||
|  | @ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf, | |||
| 	 * bucket that would bring us to O(m*n) computing costs (m and n | ||||
| 	 * corresponding to reference and target buffer sizes). | ||||
| 	 * | ||||
| 	 * The more the target buffer is large, the more it is important to | ||||
| 	 * have small entry lists for each hash buckets.  With such a limit | ||||
| 	 * the cost is bounded to something more like O(m+n). | ||||
| 	 */ | ||||
| 	hlimit = (1 << 26) / trg_bufsize; | ||||
| 	if (hlimit < 4*BLK_SIZE) | ||||
| 		hlimit = 4*BLK_SIZE; | ||||
|  | ||||
| 	/* | ||||
| 	 * Now make sure none of the hash buckets has more entries than | ||||
| 	 * Make sure none of the hash buckets has more entries than | ||||
| 	 * we're willing to test.  Otherwise we cull the entry list | ||||
| 	 * uniformly to still preserve a good repartition across | ||||
| 	 * the reference buffer. | ||||
| 	 */ | ||||
| 	for (i = 0; i < hsize; i++) { | ||||
| 		if (hash_count[i] < hlimit) | ||||
| 		if (hash_count[i] < HASH_LIMIT) | ||||
| 			continue; | ||||
| 		entry = hash[i]; | ||||
| 		do { | ||||
| 			struct index *keep = entry; | ||||
| 			int skip = hash_count[i] / hlimit / 2; | ||||
| 			struct index_entry *keep = entry; | ||||
| 			int skip = hash_count[i] / HASH_LIMIT / 2; | ||||
| 			do { | ||||
| 				entry = entry->next; | ||||
| 			} while(--skip && entry); | ||||
|  | @ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf, | |||
| 	} | ||||
| 	free(hash_count); | ||||
|  | ||||
| 	return hash; | ||||
| 	return index; | ||||
| } | ||||
|  | ||||
| void free_delta_index(struct delta_index *index) | ||||
| { | ||||
| 	free(index); | ||||
| } | ||||
|  | ||||
| /* provide the size of the copy opcode given the block offset and size */ | ||||
|  | @ -131,21 +144,17 @@ static struct index ** delta_index(const unsigned char *buf, | |||
| /* the maximum size for any opcode */ | ||||
| #define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff) | ||||
|  | ||||
| void *diff_delta(void *from_buf, unsigned long from_size, | ||||
| 		 void *to_buf, unsigned long to_size, | ||||
| 		 unsigned long *delta_size, | ||||
| 		 unsigned long max_size) | ||||
| void * | ||||
| create_delta(const struct delta_index *index, | ||||
| 	     const void *trg_buf, unsigned long trg_size, | ||||
| 	     unsigned long *delta_size, unsigned long max_size) | ||||
| { | ||||
| 	unsigned int i, outpos, outsize, hash_shift; | ||||
| 	int inscnt; | ||||
| 	const unsigned char *ref_data, *ref_top, *data, *top; | ||||
| 	unsigned char *out; | ||||
| 	struct index *entry, **hash; | ||||
|  | ||||
| 	if (!from_size || !to_size) | ||||
| 		return NULL; | ||||
| 	hash = delta_index(from_buf, from_size, to_size, &hash_shift); | ||||
| 	if (!hash) | ||||
| 	if (!trg_buf || !trg_size) | ||||
| 		return NULL; | ||||
|  | ||||
| 	outpos = 0; | ||||
|  | @ -153,42 +162,38 @@ void *diff_delta(void *from_buf, unsigned long from_size, | |||
| 	if (max_size && outsize >= max_size) | ||||
| 		outsize = max_size + MAX_OP_SIZE + 1; | ||||
| 	out = malloc(outsize); | ||||
| 	if (!out) { | ||||
| 		free(hash); | ||||
| 	if (!out) | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
| 	ref_data = from_buf; | ||||
| 	ref_top = from_buf + from_size; | ||||
| 	data = to_buf; | ||||
| 	top = to_buf + to_size; | ||||
|  | ||||
| 	/* store reference buffer size */ | ||||
| 	out[outpos++] = from_size; | ||||
| 	from_size >>= 7; | ||||
| 	while (from_size) { | ||||
| 		out[outpos - 1] |= 0x80; | ||||
| 		out[outpos++] = from_size; | ||||
| 		from_size >>= 7; | ||||
| 	i = index->src_size; | ||||
| 	while (i >= 0x80) { | ||||
| 		out[outpos++] = i | 0x80; | ||||
| 		i >>= 7; | ||||
| 	} | ||||
| 	out[outpos++] = i; | ||||
|  | ||||
| 	/* store target buffer size */ | ||||
| 	out[outpos++] = to_size; | ||||
| 	to_size >>= 7; | ||||
| 	while (to_size) { | ||||
| 		out[outpos - 1] |= 0x80; | ||||
| 		out[outpos++] = to_size; | ||||
| 		to_size >>= 7; | ||||
| 	i = trg_size; | ||||
| 	while (i >= 0x80) { | ||||
| 		out[outpos++] = i | 0x80; | ||||
| 		i >>= 7; | ||||
| 	} | ||||
| 	out[outpos++] = i; | ||||
|  | ||||
| 	ref_data = index->src_buf; | ||||
| 	ref_top = ref_data + index->src_size; | ||||
| 	data = trg_buf; | ||||
| 	top = trg_buf + trg_size; | ||||
| 	hash_shift = index->hash_shift; | ||||
| 	inscnt = 0; | ||||
|  | ||||
| 	while (data < top) { | ||||
| 		unsigned int moff = 0, msize = 0; | ||||
| 		if (data + BLK_SIZE <= top) { | ||||
| 		struct index_entry *entry; | ||||
| 		unsigned int val = adler32(0, data, BLK_SIZE); | ||||
| 		i = HASH(val, hash_shift); | ||||
| 			for (entry = hash[i]; entry; entry = entry->next) { | ||||
| 		for (entry = index->hash[i]; entry; entry = entry->next) { | ||||
| 			const unsigned char *ref = entry->ptr; | ||||
| 			const unsigned char *src = data; | ||||
| 			unsigned int ref_size = ref_top - ref; | ||||
|  | @ -208,7 +213,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, | |||
| 				moff = entry->ptr - ref_data; | ||||
| 			} | ||||
| 		} | ||||
| 		} | ||||
|  | ||||
| 		if (!msize || msize < COPYOP_SIZE(moff, msize)) { | ||||
| 			if (!inscnt) | ||||
|  | @ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, | |||
| 				out = realloc(out, outsize); | ||||
| 			if (!out) { | ||||
| 				free(tmp); | ||||
| 				free(hash); | ||||
| 				return NULL; | ||||
| 			} | ||||
| 		} | ||||
|  | @ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, | |||
| 	if (inscnt) | ||||
| 		out[outpos - inscnt - 1] = inscnt; | ||||
|  | ||||
| 	free(hash); | ||||
| 	*delta_size = outpos; | ||||
| 	return out; | ||||
| } | ||||
|  |  | |||
|  | @ -13,8 +13,8 @@ | |||
| #include <string.h> | ||||
| #include "delta.h" | ||||
|  | ||||
| void *patch_delta(void *src_buf, unsigned long src_size, | ||||
| 		  void *delta_buf, unsigned long delta_size, | ||||
| void *patch_delta(const void *src_buf, unsigned long src_size, | ||||
| 		  const void *delta_buf, unsigned long delta_size, | ||||
| 		  unsigned long *dst_size) | ||||
| { | ||||
| 	const unsigned char *data, *top; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Nicolas Pitre
						Nicolas Pitre