chunk-format: create read chunk API
Add the capability to read the table of contents, then pair the chunks
with necessary logic using read_chunk_fn pointers. Callers will be added
in future changes, but the typical outline will be:
 1. initialize a 'struct chunkfile' with init_chunkfile(NULL).
 2. call read_table_of_contents().
 3. for each chunk to parse,
    a. call pair_chunk() to assign a pointer with the chunk position, or
    b. call read_chunk() to run a callback on the chunk start and size.
 4. call free_chunkfile() to clear the 'struct chunkfile' data.
We are re-using the anonymous 'struct chunkfile' data, as it is internal
to the chunk-format API. This gives it essentially two modes: write and
read. If the same struct instance was used for both reads and writes,
then there would be failures.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
			
			
				maint
			
			
		
							parent
							
								
									63a8f0e9b9
								
							
						
					
					
						commit
						5f0879f54b
					
				|  | @ -11,6 +11,8 @@ struct chunk_info { | ||||||
| 	uint32_t id; | 	uint32_t id; | ||||||
| 	uint64_t size; | 	uint64_t size; | ||||||
| 	chunk_write_fn write_fn; | 	chunk_write_fn write_fn; | ||||||
|  |  | ||||||
|  | 	const void *start; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| struct chunkfile { | struct chunkfile { | ||||||
|  | @ -88,3 +90,81 @@ int write_chunkfile(struct chunkfile *cf, void *data) | ||||||
|  |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | int read_table_of_contents(struct chunkfile *cf, | ||||||
|  | 			   const unsigned char *mfile, | ||||||
|  | 			   size_t mfile_size, | ||||||
|  | 			   uint64_t toc_offset, | ||||||
|  | 			   int toc_length) | ||||||
|  | { | ||||||
|  | 	uint32_t chunk_id; | ||||||
|  | 	const unsigned char *table_of_contents = mfile + toc_offset; | ||||||
|  |  | ||||||
|  | 	ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc); | ||||||
|  |  | ||||||
|  | 	while (toc_length--) { | ||||||
|  | 		uint64_t chunk_offset, next_chunk_offset; | ||||||
|  |  | ||||||
|  | 		chunk_id = get_be32(table_of_contents); | ||||||
|  | 		chunk_offset = get_be64(table_of_contents + 4); | ||||||
|  |  | ||||||
|  | 		if (!chunk_id) { | ||||||
|  | 			error(_("terminating chunk id appears earlier than expected")); | ||||||
|  | 			return 1; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		table_of_contents += CHUNK_TOC_ENTRY_SIZE; | ||||||
|  | 		next_chunk_offset = get_be64(table_of_contents + 4); | ||||||
|  |  | ||||||
|  | 		if (next_chunk_offset < chunk_offset || | ||||||
|  | 		    next_chunk_offset > mfile_size - the_hash_algo->rawsz) { | ||||||
|  | 			error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""), | ||||||
|  | 			      chunk_offset, next_chunk_offset); | ||||||
|  | 			return -1; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		cf->chunks[cf->chunks_nr].id = chunk_id; | ||||||
|  | 		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset; | ||||||
|  | 		cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset; | ||||||
|  | 		cf->chunks_nr++; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	chunk_id = get_be32(table_of_contents); | ||||||
|  | 	if (chunk_id) { | ||||||
|  | 		error(_("final chunk has non-zero id %"PRIx32""), chunk_id); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static int pair_chunk_fn(const unsigned char *chunk_start, | ||||||
|  | 			 size_t chunk_size, | ||||||
|  | 			 void *data) | ||||||
|  | { | ||||||
|  | 	const unsigned char **p = data; | ||||||
|  | 	*p = chunk_start; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int pair_chunk(struct chunkfile *cf, | ||||||
|  | 	       uint32_t chunk_id, | ||||||
|  | 	       const unsigned char **p) | ||||||
|  | { | ||||||
|  | 	return read_chunk(cf, chunk_id, pair_chunk_fn, p); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int read_chunk(struct chunkfile *cf, | ||||||
|  | 	       uint32_t chunk_id, | ||||||
|  | 	       chunk_read_fn fn, | ||||||
|  | 	       void *data) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  |  | ||||||
|  | 	for (i = 0; i < cf->chunks_nr; i++) { | ||||||
|  | 		if (cf->chunks[i].id == chunk_id) | ||||||
|  | 			return fn(cf->chunks[i].start, cf->chunks[i].size, data); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return CHUNK_NOT_FOUND; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -8,6 +8,20 @@ struct chunkfile; | ||||||
|  |  | ||||||
| #define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) | #define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Initialize a 'struct chunkfile' for writing _or_ reading a file | ||||||
|  |  * with the chunk format. | ||||||
|  |  * | ||||||
|  |  * If writing a file, supply a non-NULL 'struct hashfile *' that will | ||||||
|  |  * be used to write. | ||||||
|  |  * | ||||||
|  |  * If reading a file, use a NULL 'struct hashfile *' and then call | ||||||
|  |  * read_table_of_contents(). Supply the memory-mapped data to the | ||||||
|  |  * pair_chunk() or read_chunk() methods, as appropriate. | ||||||
|  |  * | ||||||
|  |  * DO NOT MIX THESE MODES. Use different 'struct chunkfile' instances | ||||||
|  |  * for reading and writing. | ||||||
|  |  */ | ||||||
| struct chunkfile *init_chunkfile(struct hashfile *f); | struct chunkfile *init_chunkfile(struct hashfile *f); | ||||||
| void free_chunkfile(struct chunkfile *cf); | void free_chunkfile(struct chunkfile *cf); | ||||||
| int get_num_chunks(struct chunkfile *cf); | int get_num_chunks(struct chunkfile *cf); | ||||||
|  | @ -18,4 +32,37 @@ void add_chunk(struct chunkfile *cf, | ||||||
| 	       chunk_write_fn fn); | 	       chunk_write_fn fn); | ||||||
| int write_chunkfile(struct chunkfile *cf, void *data); | int write_chunkfile(struct chunkfile *cf, void *data); | ||||||
|  |  | ||||||
|  | int read_table_of_contents(struct chunkfile *cf, | ||||||
|  | 			   const unsigned char *mfile, | ||||||
|  | 			   size_t mfile_size, | ||||||
|  | 			   uint64_t toc_offset, | ||||||
|  | 			   int toc_length); | ||||||
|  |  | ||||||
|  | #define CHUNK_NOT_FOUND (-2) | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Find 'chunk_id' in the given chunkfile and assign the | ||||||
|  |  * given pointer to the position in the mmap'd file where | ||||||
|  |  * that chunk begins. | ||||||
|  |  * | ||||||
|  |  * Returns CHUNK_NOT_FOUND if the chunk does not exist. | ||||||
|  |  */ | ||||||
|  | int pair_chunk(struct chunkfile *cf, | ||||||
|  | 	       uint32_t chunk_id, | ||||||
|  | 	       const unsigned char **p); | ||||||
|  |  | ||||||
|  | typedef int (*chunk_read_fn)(const unsigned char *chunk_start, | ||||||
|  | 			     size_t chunk_size, void *data); | ||||||
|  | /* | ||||||
|  |  * Find 'chunk_id' in the given chunkfile and call the | ||||||
|  |  * given chunk_read_fn method with the information for | ||||||
|  |  * that chunk. | ||||||
|  |  * | ||||||
|  |  * Returns CHUNK_NOT_FOUND if the chunk does not exist. | ||||||
|  |  */ | ||||||
|  | int read_chunk(struct chunkfile *cf, | ||||||
|  | 	       uint32_t chunk_id, | ||||||
|  | 	       chunk_read_fn fn, | ||||||
|  | 	       void *data); | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Derrick Stolee
						Derrick Stolee