color-words: make regex configurable via attributes
Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									c4b252c3d8
								
							
						
					
					
						commit
						80c49c3de2
					
				|  | @ -102,6 +102,10 @@ differences.  You may want to append `|[^[:space:]]` to your regular | |||
| expression to make sure that it matches all non-whitespace characters. | ||||
| A match that contains a newline is silently truncated(!) at the | ||||
| newline. | ||||
| + | ||||
| The regex can also be set via a diff driver, see | ||||
| linkgit:gitattributes[1]; giving it explicitly overrides any diff | ||||
| driver setting. | ||||
|  | ||||
| --no-renames:: | ||||
| 	Turn off rename detection, even when the configuration | ||||
|  |  | |||
|  | @ -317,6 +317,8 @@ patterns are available: | |||
|  | ||||
| - `bibtex` suitable for files with BibTeX coded references. | ||||
|  | ||||
| - `cpp` suitable for source code in the C and C++ languages. | ||||
|  | ||||
| - `html` suitable for HTML/XHTML documents. | ||||
|  | ||||
| - `java` suitable for source code in the Java language. | ||||
|  | @ -334,6 +336,25 @@ patterns are available: | |||
| - `tex` suitable for source code for LaTeX documents. | ||||
|  | ||||
|  | ||||
| Customizing word diff | ||||
| ^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| You can customize the rules that `git diff --color-words` uses to | ||||
| split words in a line, by specifying an appropriate regular expression | ||||
| in the "diff.*.wordregex" configuration variable.  For example, in TeX | ||||
| a backslash followed by a sequence of letters forms a command, but | ||||
| several such commands can be run together without intervening | ||||
| whitespace.  To separate them, use a regular expression such as | ||||
|  | ||||
| ------------------------ | ||||
| [diff "tex"] | ||||
| 	wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" | ||||
| ------------------------ | ||||
|  | ||||
| A built-in pattern is provided for all languages listed in the | ||||
| previous section. | ||||
|  | ||||
|  | ||||
| Performing text diffs of binary files | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
|  |  | |||
							
								
								
									
										10
									
								
								diff.c
								
								
								
								
							
							
						
						
									
										10
									
								
								diff.c
								
								
								
								
							|  | @ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe | |||
| 	return one->driver->funcname.pattern ? &one->driver->funcname : NULL; | ||||
| } | ||||
|  | ||||
| static const char *userdiff_word_regex(struct diff_filespec *one) | ||||
| { | ||||
| 	diff_filespec_load_driver(one); | ||||
| 	return one->driver->word_regex; | ||||
| } | ||||
|  | ||||
| void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) | ||||
| { | ||||
| 	if (!options->a_prefix) | ||||
|  | @ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a, | |||
| 			ecbdata.diff_words = | ||||
| 				xcalloc(1, sizeof(struct diff_words_data)); | ||||
| 			ecbdata.diff_words->file = o->file; | ||||
| 			if (!o->word_regex) | ||||
| 				o->word_regex = userdiff_word_regex(one); | ||||
| 			if (!o->word_regex) | ||||
| 				o->word_regex = userdiff_word_regex(two); | ||||
| 			if (o->word_regex) { | ||||
| 				ecbdata.diff_words->word_regex = (regex_t *) | ||||
| 					xmalloc(sizeof(regex_t)); | ||||
|  |  | |||
|  | @ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' ' | |||
|  | ||||
| ' | ||||
|  | ||||
| test_expect_success 'set a diff driver' ' | ||||
| 	git config diff.testdriver.wordregex "[^[:space:]]" && | ||||
| 	cat <<EOF > .gitattributes | ||||
| pre diff=testdriver | ||||
| post diff=testdriver | ||||
| EOF | ||||
| ' | ||||
|  | ||||
| test_expect_success 'option overrides default' ' | ||||
|  | ||||
| 	word_diff --color-words="[a-z]+" | ||||
|  | ||||
| ' | ||||
|  | ||||
| cat > expect <<\EOF | ||||
| <WHITE>diff --git a/pre b/post<RESET> | ||||
| <WHITE>index 330b04f..5ed8eff 100644<RESET> | ||||
| <WHITE>--- a/pre<RESET> | ||||
| <WHITE>+++ b/post<RESET> | ||||
| <BROWN>@@ -1,3 +1,7 @@<RESET> | ||||
| h(4)<GREEN>,hh[44]<RESET> | ||||
| <RESET> | ||||
| a = b + c<RESET> | ||||
|  | ||||
| <GREEN>aa = a<RESET> | ||||
|  | ||||
| <GREEN>aeff = aeff * ( aaa )<RESET> | ||||
| EOF | ||||
|  | ||||
| test_expect_success 'use default supplied by driver' ' | ||||
|  | ||||
| 	word_diff --color-words | ||||
|  | ||||
| ' | ||||
|  | ||||
| echo 'aaa (aaa)' > pre | ||||
| echo 'aaa (aaa) aaa' > post | ||||
|  | ||||
|  | @ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' ' | |||
|  | ||||
| 	word_diff --color-words="a+" | ||||
|  | ||||
|  | ||||
| ' | ||||
|  | ||||
| echo '(:' > pre | ||||
|  |  | |||
							
								
								
									
										78
									
								
								userdiff.c
								
								
								
								
							
							
						
						
									
										78
									
								
								userdiff.c
								
								
								
								
							|  | @ -6,14 +6,20 @@ static struct userdiff_driver *drivers; | |||
| static int ndrivers; | ||||
| static int drivers_alloc; | ||||
|  | ||||
| #define FUNCNAME(name, pattern) \ | ||||
| 	{ name, NULL, -1, { pattern, REG_EXTENDED } } | ||||
| #define PATTERNS(name, pattern, wordregex)			\ | ||||
| 	{ name, NULL, -1, { pattern, REG_EXTENDED }, wordregex } | ||||
| static struct userdiff_driver builtin_drivers[] = { | ||||
| FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), | ||||
| FUNCNAME("java", | ||||
| PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", | ||||
| 	 "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), | ||||
| PATTERNS("java", | ||||
| 	 "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" | ||||
| 	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), | ||||
| FUNCNAME("objc", | ||||
| 	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" | ||||
| 	 "|[-+*/<>%&^|=!]=" | ||||
| 	 "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|" | ||||
| 	 "|[^[:space:]]|[\x80-\xff]+"), | ||||
| PATTERNS("objc", | ||||
| 	 /* Negate C statements that can look like functions */ | ||||
| 	 "!^[ \t]*(do|for|if|else|return|switch|while)\n" | ||||
| 	 /* Objective-C methods */ | ||||
|  | @ -21,20 +27,60 @@ FUNCNAME("objc", | |||
| 	 /* C functions */ | ||||
| 	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" | ||||
| 	 /* Objective-C class/protocol definitions */ | ||||
| 	 "^(@(implementation|interface|protocol)[ \t].*)$"), | ||||
| FUNCNAME("pascal", | ||||
| 	 "^(@(implementation|interface|protocol)[ \t].*)$", | ||||
| 	 /* -- */ | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" | ||||
| 	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" | ||||
| 	 "|[^[:space:]]|[\x80-\xff]+"), | ||||
| PATTERNS("pascal", | ||||
| 	 "^((procedure|function|constructor|destructor|interface|" | ||||
| 		"implementation|initialization|finalization)[ \t]*.*)$" | ||||
| 	 "\n" | ||||
| 	 "^(.*=[ \t]*(class|record).*)$"), | ||||
| FUNCNAME("php", "^[\t ]*((function|class).*)"), | ||||
| FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), | ||||
| FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), | ||||
| FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), | ||||
| FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), | ||||
| 	 "^(.*=[ \t]*(class|record).*)$", | ||||
| 	 /* -- */ | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" | ||||
| 	 "|<>|<=|>=|:=|\\.\\." | ||||
| 	 "|[^[:space:]]|[\x80-\xff]+"), | ||||
| PATTERNS("php", "^[\t ]*((function|class).*)", | ||||
| 	 /* -- */ | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" | ||||
| 	 "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->" | ||||
| 	 "|[^[:space:]]|[\x80-\xff]+"), | ||||
| PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", | ||||
| 	 /* -- */ | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" | ||||
| 	 "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" | ||||
| 	 "|[^[:space:]|[\x80-\xff]+"), | ||||
| 	 /* -- */ | ||||
| PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", | ||||
| 	 /* -- */ | ||||
| 	 "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." | ||||
| 	 "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" | ||||
| 	 "|[^[:space:]|[\x80-\xff]+"), | ||||
| PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", | ||||
| 	 "[={}\"]|[^={}\" \t]+"), | ||||
| PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", | ||||
| 	 "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"), | ||||
| PATTERNS("cpp", | ||||
| 	 /* Jump targets or access declarations */ | ||||
| 	 "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n" | ||||
| 	 /* C/++ functions/methods at top level */ | ||||
| 	 "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n" | ||||
| 	 /* compound type at top level */ | ||||
| 	 "^((struct|class|enum)[^;]*)$", | ||||
| 	 /* -- */ | ||||
| 	 "[a-zA-Z_][a-zA-Z0-9_]*" | ||||
| 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" | ||||
| 	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" | ||||
| 	 "|[^[:space:]]|[\x80-\xff]+"), | ||||
| { "default", NULL, -1, { NULL, 0 } }, | ||||
| }; | ||||
| #undef FUNCNAME | ||||
| #undef PATTERNS | ||||
|  | ||||
| static struct userdiff_driver driver_true = { | ||||
| 	"diff=true", | ||||
|  | @ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v) | |||
| 		return parse_string(&drv->external, k, v); | ||||
| 	if ((drv = parse_driver(k, v, "textconv"))) | ||||
| 		return parse_string(&drv->textconv, k, v); | ||||
| 	if ((drv = parse_driver(k, v, "wordregex"))) | ||||
| 		return parse_string(&drv->word_regex, k, v); | ||||
|  | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ struct userdiff_driver { | |||
| 	const char *external; | ||||
| 	int binary; | ||||
| 	struct userdiff_funcname funcname; | ||||
| 	const char *word_regex; | ||||
| 	const char *textconv; | ||||
| }; | ||||
|  | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Thomas Rast
						Thomas Rast