Merge branch 'tb/diffstat-with-utf8-strwidth'

"git diff --stat" etc. were invented back when everything was ASCII
and strlen() was a way to measure the display width of a string;
adjust them to compute the display width assuming UTF-8 pathnames.

* tb/diffstat-with-utf8-strwidth:
  diff: leave NEEDWORK notes in show_stats() function
  diff.c: use utf8_strwidth() to count display width
maint
Junio C Hamano 2022-10-28 11:26:55 -07:00
commit 7d5a4d86a6
2 changed files with 38 additions and 18 deletions

42
diff.c
View File

@ -2624,7 +2624,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
continue; continue;
} }
fill_print_name(file); fill_print_name(file);
len = strlen(file->print_name); len = utf8_strwidth(file->print_name);
if (max_len < len) if (max_len < len)
max_len = len; max_len = len;


@ -2677,6 +2677,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
* making the line longer than the maximum width. * making the line longer than the maximum width.
*/ */


/*
* NEEDSWORK: line_prefix is often used for "log --graph" output
* and contains ANSI-colored string. utf8_strnwidth() should be
* used to correctly count the display width instead of strlen().
*/
if (options->stat_width == -1) if (options->stat_width == -1)
width = term_columns() - strlen(line_prefix); width = term_columns() - strlen(line_prefix);
else else
@ -2738,7 +2743,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
char *name = file->print_name; char *name = file->print_name;
uintmax_t added = file->added; uintmax_t added = file->added;
uintmax_t deleted = file->deleted; uintmax_t deleted = file->deleted;
int name_len; int name_len, padding;


if (!file->is_interesting && (added + deleted == 0)) if (!file->is_interesting && (added + deleted == 0))
continue; continue;
@ -2747,20 +2752,34 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
* "scale" the filename * "scale" the filename
*/ */
len = name_width; len = name_width;
name_len = strlen(name); name_len = utf8_strwidth(name);
if (name_width < name_len) { if (name_width < name_len) {
char *slash; char *slash;
prefix = "..."; prefix = "...";
len -= 3; len -= 3;
/*
* NEEDSWORK: (name_len - len) counts the display
* width, which would be shorter than the byte
* length of the corresponding substring.
* Advancing "name" by that number of bytes does
* *NOT* skip over that many columns, so it is
* very likely that chomping the pathname at the
* slash we will find starting from "name" will
* leave the resulting string still too long.
*/
name += name_len - len; name += name_len - len;
slash = strchr(name, '/'); slash = strchr(name, '/');
if (slash) if (slash)
name = slash; name = slash;
} }
padding = len - utf8_strwidth(name);
if (padding < 0)
padding = 0;


if (file->is_binary) { if (file->is_binary) {
strbuf_addf(&out, " %s%-*s |", prefix, len, name); strbuf_addf(&out, " %s%s%*s | %*s",
strbuf_addf(&out, " %*s", number_width, "Bin"); prefix, name, padding, "",
number_width, "Bin");
if (!added && !deleted) { if (!added && !deleted) {
strbuf_addch(&out, '\n'); strbuf_addch(&out, '\n');
emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE,
@ -2780,8 +2799,9 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
continue; continue;
} }
else if (file->is_unmerged) { else if (file->is_unmerged) {
strbuf_addf(&out, " %s%-*s |", prefix, len, name); strbuf_addf(&out, " %s%s%*s | %*s",
strbuf_addstr(&out, " Unmerged\n"); prefix, name, padding, "",
number_width, "Unmerged");
emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE,
out.buf, out.len, 0); out.buf, out.len, 0);
strbuf_reset(&out); strbuf_reset(&out);
@ -2807,10 +2827,10 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
add = total - del; add = total - del;
} }
} }
strbuf_addf(&out, " %s%-*s |", prefix, len, name); strbuf_addf(&out, " %s%s%*s | %*"PRIuMAX"%s",
strbuf_addf(&out, " %*"PRIuMAX"%s", prefix, name, padding, "",
number_width, added + deleted, number_width, added + deleted,
added + deleted ? " " : ""); added + deleted ? " " : "");
show_graph(&out, '+', add, add_c, reset); show_graph(&out, '+', add, add_c, reset);
show_graph(&out, '-', del, del_c, reset); show_graph(&out, '-', del, del_c, reset);
strbuf_addch(&out, '\n'); strbuf_addch(&out, '\n');

View File

@ -113,20 +113,20 @@ test_expect_success 'diff --no-index with binary creation' '
' '


cat >expect <<EOF cat >expect <<EOF
binfile | Bin 0 -> 1026 bytes binfilë | Bin 0 -> 1026 bytes
textfile | 10000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tëxtfilë | 10000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
EOF EOF


test_expect_success 'diff --stat with binary files and big change count' ' test_expect_success 'diff --stat with binary files and big change count' '
printf "\01\00%1024d" 1 >binfile && printf "\01\00%1024d" 1 >binfilë &&
git add binfile && git add binfilë &&
i=0 && i=0 &&
while test $i -lt 10000; do while test $i -lt 10000; do
echo $i && echo $i &&
i=$(($i + 1)) || return 1 i=$(($i + 1)) || return 1
done >textfile && done >tëxtfilë &&
git add textfile && git add tëxtfilë &&
git diff --cached --stat binfile textfile >output && git -c core.quotepath=false diff --cached --stat binfilë tëxtfilë >output &&
grep " | " output >actual && grep " | " output >actual &&
test_cmp expect actual test_cmp expect actual
' '