git wrapper: DWIM mistyped commands
This patch introduces a modified Damerau-Levenshtein algorithm into Git's code base, and uses it with the following penalties to show some similar commands when an unknown command was encountered: swap = 0, insertion = 1, substitution = 2, deletion = 4 A typical output would now look like this: $ git sm git: 'sm' is not a git-command. See 'git --help'. Did you mean one of these? am rm The cut-off is at similarity rating 6, which was empirically determined to give sensible results. As a convenience, if there is only one candidate, Git continues under the assumption that the user mistyped it. Example: $ git reabse WARNING: You called a Git program named 'reabse', which does not exist. Continuing under the assumption that you meant 'rebase' [...] Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Alex Riesen <raa.lkml@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									a1184d85e8
								
							
						
					
					
						commit
						8af84dadb1
					
				
							
								
								
									
										2
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										2
									
								
								Makefile
								
								
								
								
							|  | @ -358,6 +358,7 @@ LIB_H += graph.h | ||||||
| LIB_H += grep.h | LIB_H += grep.h | ||||||
| LIB_H += hash.h | LIB_H += hash.h | ||||||
| LIB_H += help.h | LIB_H += help.h | ||||||
|  | LIB_H += levenshtein.h | ||||||
| LIB_H += list-objects.h | LIB_H += list-objects.h | ||||||
| LIB_H += ll-merge.h | LIB_H += ll-merge.h | ||||||
| LIB_H += log-tree.h | LIB_H += log-tree.h | ||||||
|  | @ -433,6 +434,7 @@ LIB_OBJS += hash.o | ||||||
| LIB_OBJS += help.o | LIB_OBJS += help.o | ||||||
| LIB_OBJS += ident.o | LIB_OBJS += ident.o | ||||||
| LIB_OBJS += interpolate.o | LIB_OBJS += interpolate.o | ||||||
|  | LIB_OBJS += levenshtein.o | ||||||
| LIB_OBJS += list-objects.o | LIB_OBJS += list-objects.o | ||||||
| LIB_OBJS += ll-merge.o | LIB_OBJS += ll-merge.o | ||||||
| LIB_OBJS += lockfile.o | LIB_OBJS += lockfile.o | ||||||
|  |  | ||||||
|  | @ -11,7 +11,7 @@ extern const char git_usage_string[]; | ||||||
| extern const char git_more_info_string[]; | extern const char git_more_info_string[]; | ||||||
|  |  | ||||||
| extern void list_common_cmds_help(void); | extern void list_common_cmds_help(void); | ||||||
| extern void help_unknown_cmd(const char *cmd); | extern const char *help_unknown_cmd(const char *cmd); | ||||||
| extern void prune_packed_objects(int); | extern void prune_packed_objects(int); | ||||||
| extern int read_line_with_nul(char *buf, int size, FILE *file); | extern int read_line_with_nul(char *buf, int size, FILE *file); | ||||||
| extern int fmt_merge_msg(int merge_summary, struct strbuf *in, | extern int fmt_merge_msg(int merge_summary, struct strbuf *in, | ||||||
|  |  | ||||||
							
								
								
									
										4
									
								
								git.c
								
								
								
								
							
							
						
						
									
										4
									
								
								git.c
								
								
								
								
							|  | @ -499,7 +499,9 @@ int main(int argc, const char **argv) | ||||||
| 				cmd, argv[0]); | 				cmd, argv[0]); | ||||||
| 			exit(1); | 			exit(1); | ||||||
| 		} | 		} | ||||||
| 		help_unknown_cmd(cmd); | 		argv[0] = help_unknown_cmd(cmd); | ||||||
|  | 		handle_internal_command(argc, argv); | ||||||
|  | 		execv_dashed_external(argv); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	fprintf(stderr, "Failed to run command '%s': %s\n", | 	fprintf(stderr, "Failed to run command '%s': %s\n", | ||||||
|  |  | ||||||
							
								
								
									
										72
									
								
								help.c
								
								
								
								
							
							
						
						
									
										72
									
								
								help.c
								
								
								
								
							|  | @ -1,6 +1,7 @@ | ||||||
| #include "cache.h" | #include "cache.h" | ||||||
| #include "builtin.h" | #include "builtin.h" | ||||||
| #include "exec_cmd.h" | #include "exec_cmd.h" | ||||||
|  | #include "levenshtein.h" | ||||||
| #include "help.h" | #include "help.h" | ||||||
|  |  | ||||||
| /* most GUI terminals set COLUMNS (although some don't export it) */ | /* most GUI terminals set COLUMNS (although some don't export it) */ | ||||||
|  | @ -37,6 +38,16 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len) | ||||||
| 	cmds->names[cmds->cnt++] = ent; | 	cmds->names[cmds->cnt++] = ent; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void clean_cmdnames(struct cmdnames *cmds) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	for (i = 0; i < cmds->cnt; ++i) | ||||||
|  | 		free(cmds->names[i]); | ||||||
|  | 	free(cmds->names); | ||||||
|  | 	cmds->cnt = 0; | ||||||
|  | 	cmds->alloc = 0; | ||||||
|  | } | ||||||
|  |  | ||||||
| static int cmdname_compare(const void *a_, const void *b_) | static int cmdname_compare(const void *a_, const void *b_) | ||||||
| { | { | ||||||
| 	struct cmdname *a = *(struct cmdname **)a_; | 	struct cmdname *a = *(struct cmdname **)a_; | ||||||
|  | @ -257,9 +268,68 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| void help_unknown_cmd(const char *cmd) | static int levenshtein_compare(const void *p1, const void *p2) | ||||||
| { | { | ||||||
|  | 	const struct cmdname *const *c1 = p1, *const *c2 = p2; | ||||||
|  | 	const char *s1 = (*c1)->name, *s2 = (*c2)->name; | ||||||
|  | 	int l1 = (*c1)->len; | ||||||
|  | 	int l2 = (*c2)->len; | ||||||
|  | 	return l1 != l2 ? l1 - l2 : strcmp(s1, s2); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | const char *help_unknown_cmd(const char *cmd) | ||||||
|  | { | ||||||
|  | 	int i, n, best_similarity = 0; | ||||||
|  | 	struct cmdnames main_cmds, other_cmds; | ||||||
|  |  | ||||||
|  | 	memset(&main_cmds, 0, sizeof(main_cmds)); | ||||||
|  | 	memset(&other_cmds, 0, sizeof(main_cmds)); | ||||||
|  |  | ||||||
|  | 	load_command_list("git-", &main_cmds, &other_cmds); | ||||||
|  |  | ||||||
|  | 	ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt, | ||||||
|  | 		   main_cmds.alloc); | ||||||
|  | 	memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names, | ||||||
|  | 	       other_cmds.cnt * sizeof(other_cmds.names[0])); | ||||||
|  | 	main_cmds.cnt += other_cmds.cnt; | ||||||
|  | 	free(other_cmds.names); | ||||||
|  |  | ||||||
|  | 	/* This reuses cmdname->len for similarity index */ | ||||||
|  | 	for (i = 0; i < main_cmds.cnt; ++i) | ||||||
|  | 		main_cmds.names[i]->len = | ||||||
|  | 			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); | ||||||
|  |  | ||||||
|  | 	qsort(main_cmds.names, main_cmds.cnt, | ||||||
|  | 	      sizeof(*main_cmds.names), levenshtein_compare); | ||||||
|  |  | ||||||
|  | 	if (!main_cmds.cnt) | ||||||
|  | 		die ("Uh oh. Your system reports no Git commands at all."); | ||||||
|  |  | ||||||
|  | 	best_similarity = main_cmds.names[0]->len; | ||||||
|  | 	n = 1; | ||||||
|  | 	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) | ||||||
|  | 		++n; | ||||||
|  | 	if (n == 1) { | ||||||
|  | 		const char *assumed = main_cmds.names[0]->name; | ||||||
|  | 		main_cmds.names[0] = NULL; | ||||||
|  | 		clean_cmdnames(&main_cmds); | ||||||
|  | 		fprintf(stderr, "WARNING: You called a Git program named '%s', " | ||||||
|  | 			"which does not exist.\n" | ||||||
|  | 			"Continuing under the assumption that you meant '%s'\n", | ||||||
|  | 			cmd, assumed); | ||||||
|  | 		return assumed; | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd); | 	fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd); | ||||||
|  |  | ||||||
|  | 	if (best_similarity < 6) { | ||||||
|  | 		fprintf(stderr, "\nDid you mean %s?\n", | ||||||
|  | 			n < 2 ? "this": "one of these"); | ||||||
|  |  | ||||||
|  | 		for (i = 0; i < n; i++) | ||||||
|  | 			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	exit(1); | 	exit(1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								help.h
								
								
								
								
							
							
						
						
									
										2
									
								
								help.h
								
								
								
								
							|  | @ -5,7 +5,7 @@ struct cmdnames { | ||||||
| 	int alloc; | 	int alloc; | ||||||
| 	int cnt; | 	int cnt; | ||||||
| 	struct cmdname { | 	struct cmdname { | ||||||
| 		size_t len; | 		size_t len; /* also used for similarity index in help.c */ | ||||||
| 		char name[FLEX_ARRAY]; | 		char name[FLEX_ARRAY]; | ||||||
| 	} **names; | 	} **names; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -0,0 +1,47 @@ | ||||||
|  | #include "cache.h" | ||||||
|  | #include "levenshtein.h" | ||||||
|  |  | ||||||
|  | int levenshtein(const char *string1, const char *string2, | ||||||
|  | 		int w, int s, int a, int d) | ||||||
|  | { | ||||||
|  | 	int len1 = strlen(string1), len2 = strlen(string2); | ||||||
|  | 	int *row0 = xmalloc(sizeof(int) * (len2 + 1)); | ||||||
|  | 	int *row1 = xmalloc(sizeof(int) * (len2 + 1)); | ||||||
|  | 	int *row2 = xmalloc(sizeof(int) * (len2 + 1)); | ||||||
|  | 	int i, j; | ||||||
|  |  | ||||||
|  | 	for (j = 0; j <= len2; j++) | ||||||
|  | 		row1[j] = j * a; | ||||||
|  | 	for (i = 0; i < len1; i++) { | ||||||
|  | 		int *dummy; | ||||||
|  |  | ||||||
|  | 		row2[0] = (i + 1) * d; | ||||||
|  | 		for (j = 0; j < len2; j++) { | ||||||
|  | 			/* substitution */ | ||||||
|  | 			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); | ||||||
|  | 			/* swap */ | ||||||
|  | 			if (i > 0 && j > 0 && string1[i - 1] == string2[j] && | ||||||
|  | 					string1[i] == string2[j - 1] && | ||||||
|  | 					row2[j + 1] > row0[j - 1] + w) | ||||||
|  | 				row2[j + 1] = row0[j - 1] + w; | ||||||
|  | 			/* deletion */ | ||||||
|  | 			if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d) | ||||||
|  | 				row2[j + 1] = row1[j + 1] + d; | ||||||
|  | 			/* insertion */ | ||||||
|  | 			if (row2[j + 1] > row2[j] + a) | ||||||
|  | 				row2[j + 1] = row2[j] + a; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		dummy = row0; | ||||||
|  | 		row0 = row1; | ||||||
|  | 		row1 = row2; | ||||||
|  | 		row2 = dummy; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	i = row1[len2]; | ||||||
|  | 	free(row0); | ||||||
|  | 	free(row1); | ||||||
|  | 	free(row2); | ||||||
|  |  | ||||||
|  | 	return i; | ||||||
|  | } | ||||||
|  | @ -0,0 +1,8 @@ | ||||||
|  | #ifndef LEVENSHTEIN_H | ||||||
|  | #define LEVENSHTEIN_H | ||||||
|  |  | ||||||
|  | int levenshtein(const char *string1, const char *string2, | ||||||
|  | 	int swap_penalty, int substition_penalty, | ||||||
|  | 	int insertion_penalty, int deletion_penalty); | ||||||
|  |  | ||||||
|  | #endif | ||||||
		Loading…
	
		Reference in New Issue
	
	 Johannes Schindelin
						Johannes Schindelin