Browse Source

Merge branch 'nd/pretty-formats'

pretty-printing body of the commit that is stored in non UTF-8
encoding did not work well.  The early part of this series fixes
it.  And then it adds %C(auto) specifier that turns the coloring on
when we are emitting to the terminal, and adds column-aligning
format directives.

* nd/pretty-formats:
  pretty: support %>> that steal trailing spaces
  pretty: support truncating in %>, %< and %><
  pretty: support padding placeholders, %< %> and %><
  pretty: add %C(auto) for auto-coloring
  pretty: split color parsing into a separate function
  pretty: two phase conversion for non utf-8 commits
  utf8.c: add reencode_string_len() that can handle NULs in string
  utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences
  utf8.c: move display_mode_esc_sequence_len() for use by other functions
  pretty: share code between format_decoration and show_decorations
  pretty-formats.txt: wrap long lines
  pretty: get the correct encoding for --pretty:format=%e
  pretty: save commit encoding from logmsg_reencode if the caller needs it
maint
Junio C Hamano 12 years ago
parent
commit
e52e6f79cc
  1. 35
      Documentation/pretty-formats.txt
  2. 2
      builtin/blame.c
  3. 2
      builtin/commit.c
  4. 1
      commit.h
  5. 2
      compat/precompose_utf8.c
  6. 48
      log-tree.c
  7. 1
      log-tree.h
  8. 358
      pretty.c
  9. 2
      revision.c
  10. 175
      t/t4205-log-pretty-formats.sh
  11. 8
      t/t4207-log-decoration-colors.sh
  12. 12
      t/t6006-rev-list-format.sh
  13. 104
      utf8.c
  14. 23
      utf8.h

35
Documentation/pretty-formats.txt

@ -106,18 +106,22 @@ The placeholders are: @@ -106,18 +106,22 @@ The placeholders are:
- '%P': parent hashes
- '%p': abbreviated parent hashes
- '%an': author name
- '%aN': author name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%aN': author name (respecting .mailmap, see linkgit:git-shortlog[1]
or linkgit:git-blame[1])
- '%ae': author email
- '%aE': author email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%aE': author email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ad': author date (format respects --date= option)
- '%aD': author date, RFC2822 style
- '%ar': author date, relative
- '%at': author date, UNIX timestamp
- '%ai': author date, ISO 8601 format
- '%cn': committer name
- '%cN': committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%cN': committer name (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ce': committer email
- '%cE': committer email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%cE': committer email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%cd': committer date
- '%cD': committer date, RFC2822 style
- '%cr': committer date, relative
@ -138,9 +142,11 @@ The placeholders are: @@ -138,9 +142,11 @@ The placeholders are:
- '%gD': reflog selector, e.g., `refs/stash@{1}`
- '%gd': shortened reflog selector, e.g., `stash@{1}`
- '%gn': reflog identity name
- '%gN': reflog identity name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%gN': reflog identity name (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ge': reflog identity email
- '%gE': reflog identity email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%gE': reflog identity email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%gs': reflog subject
- '%Cred': switch color to red
- '%Cgreen': switch color to green
@ -150,13 +156,28 @@ The placeholders are: @@ -150,13 +156,28 @@ The placeholders are:
adding `auto,` at the beginning will emit color only when colors are
enabled for log output (by `color.diff`, `color.ui`, or `--color`, and
respecting the `auto` settings of the former if we are going to a
terminal)
terminal). `auto` alone (i.e. `%C(auto)`) will turn on auto coloring
on the next placeholders until the color is switched again.
- '%m': left, right or boundary mark
- '%n': newline
- '%%': a raw '%'
- '%x00': print a byte from a hex code
- '%w([<w>[,<i1>[,<i2>]]])': switch line wrapping, like the -w option of
linkgit:git-shortlog[1].
- '%<(<N>[,trunc|ltrunc|mtrunc])': make the next placeholder take at
least N columns, padding spaces on the right if necessary.
Optionally truncate at the beginning (ltrunc), the middle (mtrunc)
or the end (trunc) if the output is longer than N columns.
Note that truncating only works correctly with N >= 2.
- '%<|(<N>)': make the next placeholder take at least until Nth
columns, padding spaces on the right if necessary
- '%>(<N>)', '%>|(<N>)': similar to '%<(<N>)', '%<|(<N>)'
respectively, but padding spaces on the left
- '%>>(<N>)', '%>>|(<N>)': similar to '%>(<N>)', '%>|(<N>)'
respectively, except that if the next placeholder takes more spaces
than given and there are spaces on its left, use those spaces
- '%><(<N>)', '%><|(<N>)': similar to '% <(<N>)', '%<|(<N>)'
respectively, but padding both sides (i.e. the text is centered)

NOTE: Some placeholders may depend on other options given to the
revision traversal engine. For example, the `%g*` reflog options will

2
builtin/blame.c

@ -1430,7 +1430,7 @@ static void get_commit_info(struct commit *commit, @@ -1430,7 +1430,7 @@ static void get_commit_info(struct commit *commit,
commit_info_init(ret);

encoding = get_log_output_encoding();
message = logmsg_reencode(commit, encoding);
message = logmsg_reencode(commit, NULL, encoding);
get_ac_line(message, "\nauthor ",
&ret->author, &ret->author_mail,
&ret->author_time, &ret->author_tz);

2
builtin/commit.c

@ -955,7 +955,7 @@ static const char *read_commit_message(const char *name) @@ -955,7 +955,7 @@ static const char *read_commit_message(const char *name)
if (!commit)
die(_("could not lookup commit %s"), name);
out_enc = get_commit_output_encoding();
return logmsg_reencode(commit, out_enc);
return logmsg_reencode(commit, NULL, out_enc);
}

static int parse_and_validate_options(int argc, const char *argv[],

1
commit.h

@ -101,6 +101,7 @@ struct userformat_want { @@ -101,6 +101,7 @@ struct userformat_want {
extern int has_non_ascii(const char *text);
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
extern char *logmsg_reencode(const struct commit *commit,
char **commit_encoding,
const char *output_encoding);
extern void logmsg_free(char *msg, const struct commit *commit);
extern void get_commit_format(const char *arg, struct rev_info *);

2
compat/precompose_utf8.c

@ -78,7 +78,7 @@ void precompose_argv(int argc, const char **argv) @@ -78,7 +78,7 @@ void precompose_argv(int argc, const char **argv)
size_t namelen;
oldarg = argv[i];
if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
newarg = reencode_string_iconv(oldarg, namelen, ic_precompose);
newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
if (newarg)
argv[i] = newarg;
}

48
log-tree.c

@ -175,36 +175,52 @@ static void show_children(struct rev_info *opt, struct commit *commit, int abbre @@ -175,36 +175,52 @@ static void show_children(struct rev_info *opt, struct commit *commit, int abbre
}
}

void show_decorations(struct rev_info *opt, struct commit *commit)
/*
* The caller makes sure there is no funny color before
* calling. format_decorations makes sure the same after return.
*/
void format_decorations(struct strbuf *sb,
const struct commit *commit,
int use_color)
{
const char *prefix;
struct name_decoration *decoration;
const char *color_commit =
diff_get_color_opt(&opt->diffopt, DIFF_COMMIT);
diff_get_color(use_color, DIFF_COMMIT);
const char *color_reset =
decorate_get_color_opt(&opt->diffopt, DECORATION_NONE);
decorate_get_color(use_color, DECORATION_NONE);

if (opt->show_source && commit->util)
printf("\t%s", (char *) commit->util);
if (!opt->show_decorations)
return;
decoration = lookup_decoration(&name_decoration, &commit->object);
if (!decoration)
return;
prefix = " (";
while (decoration) {
printf("%s", prefix);
fputs(decorate_get_color_opt(&opt->diffopt, decoration->type),
stdout);
strbuf_addstr(sb, color_commit);
strbuf_addstr(sb, prefix);
strbuf_addstr(sb, decorate_get_color(use_color, decoration->type));
if (decoration->type == DECORATION_REF_TAG)
fputs("tag: ", stdout);
printf("%s", decoration->name);
fputs(color_reset, stdout);
fputs(color_commit, stdout);
strbuf_addstr(sb, "tag: ");
strbuf_addstr(sb, decoration->name);
strbuf_addstr(sb, color_reset);
prefix = ", ";
decoration = decoration->next;
}
putchar(')');
strbuf_addstr(sb, color_commit);
strbuf_addch(sb, ')');
strbuf_addstr(sb, color_reset);
}

void show_decorations(struct rev_info *opt, struct commit *commit)
{
struct strbuf sb = STRBUF_INIT;

if (opt->show_source && commit->util)
printf("\t%s", (char *) commit->util);
if (!opt->show_decorations)
return;
format_decorations(&sb, commit, opt->diffopt.use_color);
fputs(sb.buf, stdout);
strbuf_release(&sb);
}

static unsigned int digits_in_number(unsigned int number)
@ -540,8 +556,8 @@ void show_log(struct rev_info *opt) @@ -540,8 +556,8 @@ void show_log(struct rev_info *opt)
printf(" (from %s)",
find_unique_abbrev(parent->object.sha1,
abbrev_commit));
fputs(diff_get_color_opt(&opt->diffopt, DIFF_RESET), stdout);
show_decorations(opt, commit);
printf("%s", diff_get_color_opt(&opt->diffopt, DIFF_RESET));
if (opt->commit_format == CMIT_FMT_ONELINE) {
putchar(' ');
} else {

1
log-tree.h

@ -13,6 +13,7 @@ int log_tree_diff_flush(struct rev_info *); @@ -13,6 +13,7 @@ int log_tree_diff_flush(struct rev_info *);
int log_tree_commit(struct rev_info *, struct commit *);
int log_tree_opt_parse(struct rev_info *, const char **, int);
void show_log(struct rev_info *opt);
void format_decorations(struct strbuf *sb, const struct commit *commit, int use_color);
void show_decorations(struct rev_info *opt, struct commit *commit);
void log_write_email_headers(struct rev_info *opt, struct commit *commit,
const char **subject_p,

358
pretty.c

@ -606,6 +606,7 @@ static char *replace_encoding_header(char *buf, const char *encoding) @@ -606,6 +606,7 @@ static char *replace_encoding_header(char *buf, const char *encoding)
}

char *logmsg_reencode(const struct commit *commit,
char **commit_encoding,
const char *output_encoding)
{
static const char *utf8 = "UTF-8";
@ -627,9 +628,15 @@ char *logmsg_reencode(const struct commit *commit, @@ -627,9 +628,15 @@ char *logmsg_reencode(const struct commit *commit,
sha1_to_hex(commit->object.sha1), typename(type));
}

if (!output_encoding || !*output_encoding)
if (!output_encoding || !*output_encoding) {
if (commit_encoding)
*commit_encoding =
get_header(commit, msg, "encoding");
return msg;
}
encoding = get_header(commit, msg, "encoding");
if (commit_encoding)
*commit_encoding = encoding;
use_encoding = encoding ? encoding : utf8;
if (same_encoding(use_encoding, output_encoding)) {
/*
@ -670,7 +677,8 @@ char *logmsg_reencode(const struct commit *commit, @@ -670,7 +677,8 @@ char *logmsg_reencode(const struct commit *commit,
if (out)
out = replace_encoding_header(out, output_encoding);

free(encoding);
if (!commit_encoding)
free(encoding);
/*
* If the re-encoding failed, out might be NULL here; in that
* case we just return the commit message verbatim.
@ -764,19 +772,38 @@ struct chunk { @@ -764,19 +772,38 @@ struct chunk {
size_t len;
};

enum flush_type {
no_flush,
flush_right,
flush_left,
flush_left_and_steal,
flush_both
};

enum trunc_type {
trunc_none,
trunc_left,
trunc_middle,
trunc_right
};

struct format_commit_context {
const struct commit *commit;
const struct pretty_print_context *pretty_ctx;
unsigned commit_header_parsed:1;
unsigned commit_message_parsed:1;
struct signature_check signature_check;
enum flush_type flush_type;
enum trunc_type truncate;
char *message;
char *commit_encoding;
size_t width, indent1, indent2;
int auto_color;
int padding;

/* These offsets are relative to the start of the commit message. */
struct chunk author;
struct chunk committer;
struct chunk encoding;
size_t message_off;
size_t subject_off;
size_t body_off;
@ -823,9 +850,6 @@ static void parse_commit_header(struct format_commit_context *context) @@ -823,9 +850,6 @@ static void parse_commit_header(struct format_commit_context *context)
} else if (!prefixcmp(msg + i, "committer ")) {
context->committer.off = i + 10;
context->committer.len = eol - i - 10;
} else if (!prefixcmp(msg + i, "encoding ")) {
context->encoding.off = i + 9;
context->encoding.len = eol - i - 9;
}
i = eol;
}
@ -906,23 +930,6 @@ static void parse_commit_message(struct format_commit_context *c) @@ -906,23 +930,6 @@ static void parse_commit_message(struct format_commit_context *c)
c->commit_message_parsed = 1;
}

static void format_decoration(struct strbuf *sb, const struct commit *commit)
{
struct name_decoration *d;
const char *prefix = " (";

load_ref_decorations(DECORATE_SHORT_REFS);
d = lookup_decoration(&name_decoration, &commit->object);
while (d) {
strbuf_addstr(sb, prefix);
prefix = ", ";
strbuf_addstr(sb, d->name);
d = d->next;
}
if (prefix[0] == ',')
strbuf_addch(sb, ')');
}

static void strbuf_wrap(struct strbuf *sb, size_t pos,
size_t width, size_t indent1, size_t indent2)
{
@ -969,7 +976,112 @@ static int format_reflog_person(struct strbuf *sb, @@ -969,7 +976,112 @@ static int format_reflog_person(struct strbuf *sb,
return format_person_part(sb, part, ident, strlen(ident), dmode);
}

static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
static size_t parse_color(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
struct format_commit_context *c)
{
if (placeholder[1] == '(') {
const char *begin = placeholder + 2;
const char *end = strchr(begin, ')');
char color[COLOR_MAXLEN];

if (!end)
return 0;
if (!prefixcmp(begin, "auto,")) {
if (!want_color(c->pretty_ctx->color))
return end - placeholder + 1;
begin += 5;
}
color_parse_mem(begin,
end - begin,
"--pretty format", color);
strbuf_addstr(sb, color);
return end - placeholder + 1;
}
if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, GIT_COLOR_RED);
return 4;
} else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, GIT_COLOR_GREEN);
return 6;
} else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, GIT_COLOR_BLUE);
return 5;
} else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, GIT_COLOR_RESET);
return 6;
} else
return 0;
}

static size_t parse_padding_placeholder(struct strbuf *sb,
const char *placeholder,
struct format_commit_context *c)
{
const char *ch = placeholder;
enum flush_type flush_type;
int to_column = 0;

switch (*ch++) {
case '<':
flush_type = flush_right;
break;
case '>':
if (*ch == '<') {
flush_type = flush_both;
ch++;
} else if (*ch == '>') {
flush_type = flush_left_and_steal;
ch++;
} else
flush_type = flush_left;
break;
default:
return 0;
}

/* the next value means "wide enough to that column" */
if (*ch == '|') {
to_column = 1;
ch++;
}

if (*ch == '(') {
const char *start = ch + 1;
const char *end = start + strcspn(start, ",)");
char *next;
int width;
if (!end || end == start)
return 0;
width = strtoul(start, &next, 10);
if (next == start || width == 0)
return 0;
c->padding = to_column ? -width : width;
c->flush_type = flush_type;

if (*end == ',') {
start = end + 1;
end = strchr(start, ')');
if (!end || end == start)
return 0;
if (!prefixcmp(start, "trunc)"))
c->truncate = trunc_right;
else if (!prefixcmp(start, "ltrunc)"))
c->truncate = trunc_left;
else if (!prefixcmp(start, "mtrunc)"))
c->truncate = trunc_middle;
else
return 0;
} else
c->truncate = trunc_none;

return end - placeholder + 1;
}
return 0;
}

static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
void *context)
{
struct format_commit_context *c = context;
@ -981,38 +1093,20 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -981,38 +1093,20 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
/* these are independent of the commit */
switch (placeholder[0]) {
case 'C':
if (placeholder[1] == '(') {
const char *begin = placeholder + 2;
const char *end = strchr(begin, ')');
char color[COLOR_MAXLEN];

if (!end)
return 0;
if (!prefixcmp(begin, "auto,")) {
if (!want_color(c->pretty_ctx->color))
return end - placeholder + 1;
begin += 5;
}
color_parse_mem(begin,
end - begin,
"--pretty format", color);
strbuf_addstr(sb, color);
return end - placeholder + 1;
if (!prefixcmp(placeholder + 1, "(auto)")) {
c->auto_color = 1;
return 7; /* consumed 7 bytes, "C(auto)" */
} else {
int ret = parse_color(sb, placeholder, c);
if (ret)
c->auto_color = 0;
/*
* Otherwise, we decided to treat %C<unknown>
* as a literal string, and the previous
* %C(auto) is still valid.
*/
return ret;
}
if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, GIT_COLOR_RED);
return 4;
} else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, GIT_COLOR_GREEN);
return 6;
} else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, GIT_COLOR_BLUE);
return 5;
} else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, GIT_COLOR_RESET);
return 6;
} else
return 0;
case 'n': /* newline */
strbuf_addch(sb, '\n');
return 1;
@ -1050,6 +1144,10 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -1050,6 +1144,10 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
return end - placeholder + 1;
} else
return 0;

case '<':
case '>':
return parse_padding_placeholder(sb, placeholder, c);
}

/* these depend on the commit */
@ -1058,13 +1156,19 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -1058,13 +1156,19 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,

switch (placeholder[0]) {
case 'H': /* commit hash */
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1;
case 'h': /* abbreviated commit hash */
if (add_again(sb, &c->abbrev_commit_hash))
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
if (add_again(sb, &c->abbrev_commit_hash)) {
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1;
}
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
c->pretty_ctx->abbrev));
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
return 1;
case 'T': /* tree hash */
@ -1101,7 +1205,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -1101,7 +1205,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
strbuf_addstr(sb, get_revision_mark(NULL, commit));
return 1;
case 'd':
format_decoration(sb, commit);
load_ref_decorations(DECORATE_SHORT_REFS);
format_decorations(sb, commit, c->auto_color);
return 1;
case 'g': /* reflog info */
switch(placeholder[1]) {
@ -1180,7 +1285,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -1180,7 +1285,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
msg + c->committer.off, c->committer.len,
c->pretty_ctx->date_mode);
case 'e': /* encoding */
strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
if (c->commit_encoding)
strbuf_addstr(sb, c->commit_encoding);
return 1;
case 'B': /* raw body */
/* message_off is always left at the initial newline */
@ -1206,7 +1312,111 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, @@ -1206,7 +1312,111 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
return 0; /* unknown placeholder */
}

static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
struct format_commit_context *c)
{
struct strbuf local_sb = STRBUF_INIT;
int total_consumed = 0, len, padding = c->padding;
if (padding < 0) {
const char *start = strrchr(sb->buf, '\n');
int occupied;
if (!start)
start = sb->buf;
occupied = utf8_strnwidth(start, -1, 1);
padding = (-padding) - occupied;
}
while (1) {
int modifier = *placeholder == 'C';
int consumed = format_commit_one(&local_sb, placeholder, c);
total_consumed += consumed;

if (!modifier)
break;

placeholder += consumed;
if (*placeholder != '%')
break;
placeholder++;
total_consumed++;
}
len = utf8_strnwidth(local_sb.buf, -1, 1);

if (c->flush_type == flush_left_and_steal) {
const char *ch = sb->buf + sb->len - 1;
while (len > padding && ch > sb->buf) {
const char *p;
if (*ch == ' ') {
ch--;
padding++;
continue;
}
/* check for trailing ansi sequences */
if (*ch != 'm')
break;
p = ch - 1;
while (ch - p < 10 && *p != '\033')
p--;
if (*p != '\033' ||
ch + 1 - p != display_mode_esc_sequence_len(p))
break;
/*
* got a good ansi sequence, put it back to
* local_sb as we're cutting sb
*/
strbuf_insert(&local_sb, 0, p, ch + 1 - p);
ch = p - 1;
}
strbuf_setlen(sb, ch + 1 - sb->buf);
c->flush_type = flush_left;
}

if (len > padding) {
switch (c->truncate) {
case trunc_left:
strbuf_utf8_replace(&local_sb,
0, len - (padding - 2),
"..");
break;
case trunc_middle:
strbuf_utf8_replace(&local_sb,
padding / 2 - 1,
len - (padding - 2),
"..");
break;
case trunc_right:
strbuf_utf8_replace(&local_sb,
padding - 2, len - (padding - 2),
"..");
break;
case trunc_none:
break;
}
strbuf_addstr(sb, local_sb.buf);
} else {
int sb_len = sb->len, offset = 0;
if (c->flush_type == flush_left)
offset = padding - len;
else if (c->flush_type == flush_both)
offset = (padding - len) / 2;
/*
* we calculate padding in columns, now
* convert it back to chars
*/
padding = padding - len + local_sb.len;
strbuf_grow(sb, padding);
strbuf_setlen(sb, sb_len + padding);
memset(sb->buf + sb_len, ' ', sb->len - sb_len);
memcpy(sb->buf + sb_len + offset, local_sb.buf,
local_sb.len);
}
strbuf_release(&local_sb);
c->flush_type = no_flush;
return total_consumed;
}

static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
void *context)
{
int consumed;
@ -1235,7 +1445,10 @@ static size_t format_commit_item(struct strbuf *sb, const char *placeholder, @@ -1235,7 +1445,10 @@ static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
placeholder++;

orig_len = sb->len;
consumed = format_commit_one(sb, placeholder, context);
if (((struct format_commit_context *)context)->flush_type != no_flush)
consumed = format_and_pad_commit(sb, placeholder, context);
else
consumed = format_commit_one(sb, placeholder, context);
if (magic == NO_MAGIC)
return consumed;

@ -1286,16 +1499,37 @@ void format_commit_message(const struct commit *commit, @@ -1286,16 +1499,37 @@ void format_commit_message(const struct commit *commit,
{
struct format_commit_context context;
const char *output_enc = pretty_ctx->output_encoding;
const char *utf8 = "UTF-8";

memset(&context, 0, sizeof(context));
context.commit = commit;
context.pretty_ctx = pretty_ctx;
context.wrap_start = sb->len;
context.message = logmsg_reencode(commit, output_enc);
context.message = logmsg_reencode(commit,
&context.commit_encoding,
output_enc);

strbuf_expand(sb, format, format_commit_item, &context);
rewrap_message_tail(sb, &context, 0, 0, 0);

if (output_enc) {
if (same_encoding(utf8, output_enc))
output_enc = NULL;
} else {
if (context.commit_encoding &&
!same_encoding(context.commit_encoding, utf8))
output_enc = context.commit_encoding;
}

if (output_enc) {
int outsz;
char *out = reencode_string_len(sb->buf, sb->len,
output_enc, utf8, &outsz);
if (out)
strbuf_attach(sb, out, outsz, outsz + 1);
}

free(context.commit_encoding);
logmsg_free(context.message, commit);
free(context.signature_check.gpg_output);
free(context.signature_check.signer);
@ -1454,7 +1688,7 @@ void pretty_print_commit(const struct pretty_print_context *pp, @@ -1454,7 +1688,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
}

encoding = get_log_output_encoding();
msg = reencoded = logmsg_reencode(commit, encoding);
msg = reencoded = logmsg_reencode(commit, NULL, encoding);

if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
indent = 0;

2
revision.c

@ -2292,7 +2292,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) @@ -2292,7 +2292,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
* in it.
*/
encoding = get_log_output_encoding();
message = logmsg_reencode(commit, encoding);
message = logmsg_reencode(commit, NULL, encoding);

/* Copy the commit to temporary if we are using "fake" headers */
if (buf.len)

175
t/t4205-log-pretty-formats.sh

@ -99,4 +99,179 @@ test_expect_failure 'NUL termination with --stat' ' @@ -99,4 +99,179 @@ test_expect_failure 'NUL termination with --stat' '
test_i18ncmp expected actual
'

test_expect_success 'setup more commits' '
test_commit "message one" one one message-one &&
test_commit "message two" two two message-two
'

test_expect_success 'left alignment formatting' '
git log --pretty="format:%<(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
message two Z
message one Z
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting at the nth column' '
git log --pretty="format:%h %<|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two Z
7cd6c63 message one Z
1711bf9 add bar Z
af20c06 initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with no padding' '
git log --pretty="format:%<(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with trunc' '
git log --pretty="format:%<(10,trunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
message ..
message ..
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with ltrunc' '
git log --pretty="format:%<(10,ltrunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
..sage two
..sage one
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with mtrunc' '
git log --pretty="format:%<(10,mtrunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
mess.. two
mess.. one
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting' '
git log --pretty="format:%>(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
Z message two
Z message one
Z add bar
Z initial
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting at the nth column' '
git log --pretty="format:%h %>|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two
7cd6c63 message one
1711bf9 add bar
af20c06 initial
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting with no padding' '
git log --pretty="format:%>(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting' '
git log --pretty="format:%><(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
Z message two Z
Z message one Z
Z add bar Z
Z initial Z
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting at the nth column' '
git log --pretty="format:%h %><|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two Z
7cd6c63 message one Z
1711bf9 add bar Z
af20c06 initial Z
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting with no padding' '
git log --pretty="format:%><(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'left/right alignment formatting with stealing' '
git commit --amend -m short --author "long long long <long@me.com>" &&
git log --pretty="format:%<(10,trunc)%s%>>(10,ltrunc)% an" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
short long long long
message .. A U Thor
add bar A U Thor
initial A U Thor
EOF
test_cmp expected actual
'

test_done

8
t/t4207-log-decoration-colors.sh

@ -44,15 +44,15 @@ test_expect_success setup ' @@ -44,15 +44,15 @@ test_expect_success setup '
'

cat >expected <<EOF
${c_commit}COMMIT_ID (${c_HEAD}HEAD${c_reset}${c_commit},\
${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_HEAD}HEAD${c_reset}${c_commit},\
${c_tag}tag: v1.0${c_reset}${c_commit},\
${c_tag}tag: B${c_reset}${c_commit},\
${c_branch}master${c_reset}${c_commit})${c_reset} B
${c_commit}COMMIT_ID (${c_tag}tag: A1${c_reset}${c_commit},\
${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_tag}tag: A1${c_reset}${c_commit},\
${c_remoteBranch}other/master${c_reset}${c_commit})${c_reset} A1
${c_commit}COMMIT_ID (${c_stash}refs/stash${c_reset}${c_commit})${c_reset}\
${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_stash}refs/stash${c_reset}${c_commit})${c_reset}\
On master: Changes to A.t
${c_commit}COMMIT_ID (${c_tag}tag: A${c_reset}${c_commit})${c_reset} A
${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_tag}tag: A${c_reset}${c_commit})${c_reset} A
EOF

# We want log to show all, but the second parent to refs/stash is irrelevant

12
t/t6006-rev-list-format.sh

@ -184,7 +184,7 @@ Test printing of complex bodies @@ -184,7 +184,7 @@ Test printing of complex bodies

This commit message is much longer than the others,
and it will be encoded in iso8859-1. We should therefore
include an iso8859 character: ¡bueno!
include an iso8859 character: ¡bueno!
EOF
test_expect_success 'setup complex body' '
git config i18n.commitencoding iso8859-1 &&
@ -192,14 +192,14 @@ git config i18n.commitencoding iso8859-1 && @@ -192,14 +192,14 @@ git config i18n.commitencoding iso8859-1 &&
'

test_format complex-encoding %e <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
iso8859-1
commit 131a310eb913d107dd3c09a65d1651175898735d
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
EOF

test_format complex-subject %s <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
Test printing of complex bodies
commit 131a310eb913d107dd3c09a65d1651175898735d
changed foo
@ -208,17 +208,17 @@ added foo @@ -208,17 +208,17 @@ added foo
EOF

test_format complex-body %b <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
This commit message is much longer than the others,
and it will be encoded in iso8859-1. We should therefore
include an iso8859 character: ¡bueno!
include an iso8859 character: ¡bueno!

commit 131a310eb913d107dd3c09a65d1651175898735d
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
EOF

test_expect_success '%x00 shows NUL' '
echo >expect commit f58db70b055c5718631e5c61528b28b12090cdea &&
echo >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
echo >>expect fooQbar &&
git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
nul_to_q <actual.nul >actual &&

104
utf8.c

@ -9,6 +9,20 @@ struct interval { @@ -9,6 +9,20 @@ struct interval {
int last;
};

size_t display_mode_esc_sequence_len(const char *s)
{
const char *p = s;
if (*p++ != '\033')
return 0;
if (*p++ != '[')
return 0;
while (isdigit(*p) || *p == ';')
p++;
if (*p++ != 'm')
return 0;
return p - s;
}

/* auxiliary function for binary search in interval table */
static int bisearch(ucs_char_t ucs, const struct interval *table, int max)
{
@ -252,18 +266,26 @@ int utf8_width(const char **start, size_t *remainder_p) @@ -252,18 +266,26 @@ int utf8_width(const char **start, size_t *remainder_p)
* string, assuming that the string is utf8. Returns strlen() instead
* if the string does not look like a valid utf8 string.
*/
int utf8_strwidth(const char *string)
int utf8_strnwidth(const char *string, int len, int skip_ansi)
{
int width = 0;
const char *orig = string;

while (1) {
if (!string)
return strlen(orig);
if (!*string)
return width;
if (len == -1)
len = strlen(string);
while (string && string < orig + len) {
int skip;
while (skip_ansi &&
(skip = display_mode_esc_sequence_len(string)) != 0)
string += skip;
width += utf8_width(&string, NULL);
}
return string ? width : len;
}

int utf8_strwidth(const char *string)
{
return utf8_strnwidth(string, -1, 0);
}

int is_utf8(const char *text)
@ -303,20 +325,6 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text, @@ -303,20 +325,6 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
}
}

static size_t display_mode_esc_sequence_len(const char *s)
{
const char *p = s;
if (*p++ != '\033')
return 0;
if (*p++ != '[')
return 0;
while (isdigit(*p) || *p == ';')
p++;
if (*p++ != 'm')
return 0;
return p - s;
}

/*
* Wrap the text, if necessary. The variable indent is the indent for the
* first line, indent2 is the indent for all other lines.
@ -413,6 +421,52 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, @@ -413,6 +421,52 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
free(tmp);
}

void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
const char *subst)
{
struct strbuf sb_dst = STRBUF_INIT;
char *src = sb_src->buf;
char *end = src + sb_src->len;
char *dst;
int w = 0, subst_len = 0;

if (subst)
subst_len = strlen(subst);
strbuf_grow(&sb_dst, sb_src->len + subst_len);
dst = sb_dst.buf;

while (src < end) {
char *old;
size_t n;

while ((n = display_mode_esc_sequence_len(src))) {
memcpy(dst, src, n);
src += n;
dst += n;
}

old = src;
n = utf8_width((const char**)&src, NULL);
if (!src) /* broken utf-8, do nothing */
return;
if (n && w >= pos && w < pos + width) {
if (subst) {
memcpy(dst, subst, subst_len);
dst += subst_len;
subst = NULL;
}
w += n;
continue;
}
memcpy(dst, old, src - old);
dst += src - old;
w += n;
}
strbuf_setlen(&sb_dst, dst - sb_dst.buf);
strbuf_attach(sb_src, strbuf_detach(&sb_dst, NULL),
sb_dst.len, sb_dst.alloc);
}

int is_encoding_utf8(const char *name)
{
if (!name)
@ -460,7 +514,7 @@ int utf8_fprintf(FILE *stream, const char *format, ...) @@ -460,7 +514,7 @@ int utf8_fprintf(FILE *stream, const char *format, ...)
#else
typedef char * iconv_ibp;
#endif
char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv)
char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outsz_p)
{
size_t outsz, outalloc;
char *out, *outpos;
@ -494,13 +548,17 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv) @@ -494,13 +548,17 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv)
}
else {
*outpos = '\0';
if (outsz_p)
*outsz_p = outpos - out;
break;
}
}
return out;
}

char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
char *reencode_string_len(const char *in, int insz,
const char *out_encoding, const char *in_encoding,
int *outsz)
{
iconv_t conv;
char *out;
@ -526,7 +584,7 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e @@ -526,7 +584,7 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
return NULL;
}

out = reencode_string_iconv(in, strlen(in), conv);
out = reencode_string_iconv(in, insz, conv, outsz);
iconv_close(conv);
return out;
}

23
utf8.h

@ -3,7 +3,9 @@ @@ -3,7 +3,9 @@

typedef unsigned int ucs_char_t; /* assuming 32bit int */

size_t display_mode_esc_sequence_len(const char *s);
int utf8_width(const char **start, size_t *remainder_p);
int utf8_strnwidth(const char *string, int len, int skip_ansi);
int utf8_strwidth(const char *string);
int is_utf8(const char *text);
int is_encoding_utf8(const char *name);
@ -14,14 +16,29 @@ void strbuf_add_wrapped_text(struct strbuf *buf, @@ -14,14 +16,29 @@ void strbuf_add_wrapped_text(struct strbuf *buf,
const char *text, int indent, int indent2, int width);
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
int indent, int indent2, int width);
void strbuf_utf8_replace(struct strbuf *sb, int pos, int width,
const char *subst);

#ifndef NO_ICONV
char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv);
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding);
char *reencode_string_iconv(const char *in, size_t insz,
iconv_t conv, int *outsz);
char *reencode_string_len(const char *in, int insz,
const char *out_encoding,
const char *in_encoding,
int *outsz);
#else
#define reencode_string(a,b,c) NULL
#define reencode_string_len(a,b,c,d,e) NULL
#endif

static inline char *reencode_string(const char *in,
const char *out_encoding,
const char *in_encoding)
{
return reencode_string_len(in, strlen(in),
out_encoding, in_encoding,
NULL);
}

int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);

#endif

Loading…
Cancel
Save