diffcore-rename.c: simplify finding exact renames
The find_exact_renames function currently only uses the hash table for grouping, i.e.: 1. add sources 2. add destinations 3. iterate all buckets, per bucket: 4. split sources from destinations 5. iterate destinations, per destination: 6. iterate sources to find best match This can be simplified by utilizing the lookup functionality of the hash table, i.e.: 1. add sources 2. iterate destinations, per destination: 3. lookup sources matching the current destination 4. iterate sources to find best match This saves several iterations and file_similarity allocations for the destinations. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									48f6407ffe
								
							
						
					
					
						commit
						7c85f8acb2
					
				|  | @ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_) | ||||||
| } | } | ||||||
|  |  | ||||||
| struct file_similarity { | struct file_similarity { | ||||||
| 	int src_dst, index; | 	int index; | ||||||
| 	struct diff_filespec *filespec; | 	struct diff_filespec *filespec; | ||||||
| 	struct file_similarity *next; | 	struct file_similarity *next; | ||||||
| }; | }; | ||||||
|  | @ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec) | ||||||
| 	return hash; | 	return hash; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int find_identical_files(struct file_similarity *src, | static int find_identical_files(struct hash_table *srcs, | ||||||
| 				struct file_similarity *dst, | 				int dst_index, | ||||||
| 				struct diff_options *options) | 				struct diff_options *options) | ||||||
| { | { | ||||||
| 	int renames = 0; | 	int renames = 0; | ||||||
|  |  | ||||||
| 	/* | 	struct diff_filespec *target = rename_dst[dst_index].two; | ||||||
| 	 * Walk over all the destinations ... |  | ||||||
| 	 */ |  | ||||||
| 	do { |  | ||||||
| 	struct diff_filespec *target = dst->filespec; |  | ||||||
| 	struct file_similarity *p, *best; | 	struct file_similarity *p, *best; | ||||||
| 	int i = 100, best_score = -1; | 	int i = 100, best_score = -1; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * .. to find the best source match | 	 * Find the best source match for specified destination. | ||||||
| 	 */ | 	 */ | ||||||
| 	best = NULL; | 	best = NULL; | ||||||
| 	for (p = src; p; p = p->next) { | 	for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) { | ||||||
| 		int score; | 		int score; | ||||||
| 		struct diff_filespec *source = p->filespec; | 		struct diff_filespec *source = p->filespec; | ||||||
|  |  | ||||||
|  | @ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src, | ||||||
| 			break; | 			break; | ||||||
| 	} | 	} | ||||||
| 	if (best) { | 	if (best) { | ||||||
| 		record_rename_pair(dst->index, best->index, MAX_SCORE); | 		record_rename_pair(dst_index, best->index, MAX_SCORE); | ||||||
| 		renames++; | 		renames++; | ||||||
| 	} | 	} | ||||||
| 	} while ((dst = dst->next) != NULL); |  | ||||||
| 	return renames; | 	return renames; | ||||||
| } | } | ||||||
|  |  | ||||||
| static void free_similarity_list(struct file_similarity *p) | static int free_similarity_list(void *p, void *unused) | ||||||
| { | { | ||||||
| 	while (p) { | 	while (p) { | ||||||
| 		struct file_similarity *entry = p; | 		struct file_similarity *entry = p; | ||||||
| 		p = p->next; | 		p = entry->next; | ||||||
| 		free(entry); | 		free(entry); | ||||||
| 	} | 	} | ||||||
|  | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int find_same_files(void *ptr, void *data) | static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec) | ||||||
| { |  | ||||||
| 	int ret; |  | ||||||
| 	struct file_similarity *p = ptr; |  | ||||||
| 	struct file_similarity *src = NULL, *dst = NULL; |  | ||||||
| 	struct diff_options *options = data; |  | ||||||
|  |  | ||||||
| 	/* Split the hash list up into sources and destinations */ |  | ||||||
| 	do { |  | ||||||
| 		struct file_similarity *entry = p; |  | ||||||
| 		p = p->next; |  | ||||||
| 		if (entry->src_dst < 0) { |  | ||||||
| 			entry->next = src; |  | ||||||
| 			src = entry; |  | ||||||
| 		} else { |  | ||||||
| 			entry->next = dst; |  | ||||||
| 			dst = entry; |  | ||||||
| 		} |  | ||||||
| 	} while (p); |  | ||||||
|  |  | ||||||
| 	/* |  | ||||||
| 	 * If we have both sources *and* destinations, see if |  | ||||||
| 	 * we can match them up |  | ||||||
| 	 */ |  | ||||||
| 	ret = (src && dst) ? find_identical_files(src, dst, options) : 0; |  | ||||||
|  |  | ||||||
| 	/* Free the hashes and return the number of renames found */ |  | ||||||
| 	free_similarity_list(src); |  | ||||||
| 	free_similarity_list(dst); |  | ||||||
| 	return ret; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec) |  | ||||||
| { | { | ||||||
| 	void **pos; | 	void **pos; | ||||||
| 	unsigned int hash; | 	unsigned int hash; | ||||||
| 	struct file_similarity *entry = xmalloc(sizeof(*entry)); | 	struct file_similarity *entry = xmalloc(sizeof(*entry)); | ||||||
|  |  | ||||||
| 	entry->src_dst = src_dst; |  | ||||||
| 	entry->index = index; | 	entry->index = index; | ||||||
| 	entry->filespec = filespec; | 	entry->filespec = filespec; | ||||||
| 	entry->next = NULL; | 	entry->next = NULL; | ||||||
|  | @ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index, | ||||||
|  */ |  */ | ||||||
| static int find_exact_renames(struct diff_options *options) | static int find_exact_renames(struct diff_options *options) | ||||||
| { | { | ||||||
| 	int i; | 	int i, renames = 0; | ||||||
| 	struct hash_table file_table; | 	struct hash_table file_table; | ||||||
|  |  | ||||||
|  | 	/* Add all sources to the hash table */ | ||||||
| 	init_hash(&file_table); | 	init_hash(&file_table); | ||||||
| 	preallocate_hash(&file_table, rename_src_nr + rename_dst_nr); | 	preallocate_hash(&file_table, rename_src_nr); | ||||||
| 	for (i = 0; i < rename_src_nr; i++) | 	for (i = 0; i < rename_src_nr; i++) | ||||||
| 		insert_file_table(&file_table, -1, i, rename_src[i].p->one); | 		insert_file_table(&file_table, i, rename_src[i].p->one); | ||||||
|  |  | ||||||
|  | 	/* Walk the destinations and find best source match */ | ||||||
| 	for (i = 0; i < rename_dst_nr; i++) | 	for (i = 0; i < rename_dst_nr; i++) | ||||||
| 		insert_file_table(&file_table, 1, i, rename_dst[i].two); | 		renames += find_identical_files(&file_table, i, options); | ||||||
|  |  | ||||||
| 	/* Find the renames */ | 	/* Free source file_similarity chains */ | ||||||
| 	i = for_each_hash(&file_table, find_same_files, options); | 	for_each_hash(&file_table, free_similarity_list, options); | ||||||
|  |  | ||||||
| 	/* .. and free the hash data structure */ | 	/* .. and free the hash data structure */ | ||||||
| 	free_hash(&file_table); | 	free_hash(&file_table); | ||||||
|  |  | ||||||
| 	return i; | 	return renames; | ||||||
| } | } | ||||||
|  |  | ||||||
| #define NUM_CANDIDATE_PER_DST 4 | #define NUM_CANDIDATE_PER_DST 4 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Karsten Blees
						Karsten Blees