Browse Source

diff: do not chomp hunk-header in the middle of a character

We truncate hunk-header line at 80 bytes, but that 80th byte
could be in the middle of a character, which is bad.  This uses
pick_one_utf8_char() function to make sure we do not cut a character
in the middle.

This assumes that the internal representation of the text is
UTF-8.  This needs to be extended in the future but the optimal
direction has not been decided yet.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Junio C Hamano 17 years ago committed by Junio C Hamano
parent
commit
23707811c5
  1. 25
      diff.c
  2. 44
      t/t4025-hunk-header.sh

25
diff.c

@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
#include "color.h"
#include "attr.h"
#include "run-command.h"
#include "utf8.h"

#ifdef NO_FAST_WORKING_DIRECTORY
#define FAST_WORKING_DIRECTORY 0
@ -469,10 +470,13 @@ static void diff_words_show(struct diff_words_data *diff_words) @@ -469,10 +470,13 @@ static void diff_words_show(struct diff_words_data *diff_words)
}
}

typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);

struct emit_callback {
struct xdiff_emit_state xm;
int nparents, color_diff;
unsigned ws_rule;
sane_truncate_fn truncate;
const char **label_path;
struct diff_words_data *diff_words;
int *found_changesp;
@ -525,6 +529,24 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons @@ -525,6 +529,24 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons
}
}

static unsigned long sane_truncate_line(struct emit_callback *ecb, char *line, unsigned long len)
{
const char *cp;
unsigned long allot;
size_t l = len;

if (ecb->truncate)
return ecb->truncate(line, len);
cp = line;
allot = l;
while (0 < l) {
(void) utf8_width(&cp, &l);
if (!cp)
break; /* truncated in the middle? */
}
return allot - l;
}

static void fn_out_consume(void *priv, char *line, unsigned long len)
{
int i;
@ -555,8 +577,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) @@ -555,8 +577,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
;
if (2 <= i && i < len && line[i] == ' ') {
ecbdata->nparents = i - 1;
len = sane_truncate_line(ecbdata, line, len);
emit_line(diff_get_color(ecbdata->color_diff, DIFF_FRAGINFO),
reset, line, len);
if (line[len-1] != '\n')
putchar('\n');
return;
}


44
t/t4025-hunk-header.sh

@ -0,0 +1,44 @@ @@ -0,0 +1,44 @@
#!/bin/sh

test_description='diff hunk header truncation'

. ./test-lib.sh

N='日本語'
N1='日'
N2='日本'
NS="$N$N$N$N$N$N$N$N$N$N$N$N$N"

test_expect_success setup '

(
echo "A $NS"
for c in B C D E F G H I J K
do
echo " $c"
done
echo "L $NS"
for c in M N O P Q R S T U V
do
echo " $c"
done
) >file &&
git add file &&

sed -e "/^ [EP]/s/$/ modified/" <file >file+ &&
mv file+ file

'

test_expect_success 'hunk header truncation with an overly long line' '

git diff | sed -n -e "s/^.*@@//p" >actual &&
(
echo " A $N$N$N$N$N$N$N$N$N2"
echo " L $N$N$N$N$N$N$N$N$N1"
) >expected &&
diff -u actual expected

'

test_done
Loading…
Cancel
Save