Browse Source

Merge branch 'nd/pretty-formats'

pretty-printing body of the commit that is stored in non UTF-8
encoding did not work well.  The early part of this series fixes
it.  And then it adds %C(auto) specifier that turns the coloring on
when we are emitting to the terminal, and adds column-aligning
format directives.

* nd/pretty-formats:
  pretty: support %>> that steal trailing spaces
  pretty: support truncating in %>, %< and %><
  pretty: support padding placeholders, %< %> and %><
  pretty: add %C(auto) for auto-coloring
  pretty: split color parsing into a separate function
  pretty: two phase conversion for non utf-8 commits
  utf8.c: add reencode_string_len() that can handle NULs in string
  utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences
  utf8.c: move display_mode_esc_sequence_len() for use by other functions
  pretty: share code between format_decoration and show_decorations
  pretty-formats.txt: wrap long lines
  pretty: get the correct encoding for --pretty:format=%e
  pretty: save commit encoding from logmsg_reencode if the caller needs it
maint
Junio C Hamano 12 years ago
parent
commit
e52e6f79cc
  1. 35
      Documentation/pretty-formats.txt
  2. 2
      builtin/blame.c
  3. 2
      builtin/commit.c
  4. 1
      commit.h
  5. 2
      compat/precompose_utf8.c
  6. 48
      log-tree.c
  7. 1
      log-tree.h
  8. 358
      pretty.c
  9. 2
      revision.c
  10. 175
      t/t4205-log-pretty-formats.sh
  11. 8
      t/t4207-log-decoration-colors.sh
  12. 12
      t/t6006-rev-list-format.sh
  13. 104
      utf8.c
  14. 23
      utf8.h

35
Documentation/pretty-formats.txt

@ -106,18 +106,22 @@ The placeholders are:
- '%P': parent hashes - '%P': parent hashes
- '%p': abbreviated parent hashes - '%p': abbreviated parent hashes
- '%an': author name - '%an': author name
- '%aN': author name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%aN': author name (respecting .mailmap, see linkgit:git-shortlog[1]
or linkgit:git-blame[1])
- '%ae': author email - '%ae': author email
- '%aE': author email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%aE': author email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ad': author date (format respects --date= option) - '%ad': author date (format respects --date= option)
- '%aD': author date, RFC2822 style - '%aD': author date, RFC2822 style
- '%ar': author date, relative - '%ar': author date, relative
- '%at': author date, UNIX timestamp - '%at': author date, UNIX timestamp
- '%ai': author date, ISO 8601 format - '%ai': author date, ISO 8601 format
- '%cn': committer name - '%cn': committer name
- '%cN': committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%cN': committer name (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ce': committer email - '%ce': committer email
- '%cE': committer email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%cE': committer email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%cd': committer date - '%cd': committer date
- '%cD': committer date, RFC2822 style - '%cD': committer date, RFC2822 style
- '%cr': committer date, relative - '%cr': committer date, relative
@ -138,9 +142,11 @@ The placeholders are:
- '%gD': reflog selector, e.g., `refs/stash@{1}` - '%gD': reflog selector, e.g., `refs/stash@{1}`
- '%gd': shortened reflog selector, e.g., `stash@{1}` - '%gd': shortened reflog selector, e.g., `stash@{1}`
- '%gn': reflog identity name - '%gn': reflog identity name
- '%gN': reflog identity name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%gN': reflog identity name (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%ge': reflog identity email - '%ge': reflog identity email
- '%gE': reflog identity email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) - '%gE': reflog identity email (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
- '%gs': reflog subject - '%gs': reflog subject
- '%Cred': switch color to red - '%Cred': switch color to red
- '%Cgreen': switch color to green - '%Cgreen': switch color to green
@ -150,13 +156,28 @@ The placeholders are:
adding `auto,` at the beginning will emit color only when colors are adding `auto,` at the beginning will emit color only when colors are
enabled for log output (by `color.diff`, `color.ui`, or `--color`, and enabled for log output (by `color.diff`, `color.ui`, or `--color`, and
respecting the `auto` settings of the former if we are going to a respecting the `auto` settings of the former if we are going to a
terminal) terminal). `auto` alone (i.e. `%C(auto)`) will turn on auto coloring
on the next placeholders until the color is switched again.
- '%m': left, right or boundary mark - '%m': left, right or boundary mark
- '%n': newline - '%n': newline
- '%%': a raw '%' - '%%': a raw '%'
- '%x00': print a byte from a hex code - '%x00': print a byte from a hex code
- '%w([<w>[,<i1>[,<i2>]]])': switch line wrapping, like the -w option of - '%w([<w>[,<i1>[,<i2>]]])': switch line wrapping, like the -w option of
linkgit:git-shortlog[1]. linkgit:git-shortlog[1].
- '%<(<N>[,trunc|ltrunc|mtrunc])': make the next placeholder take at
least N columns, padding spaces on the right if necessary.
Optionally truncate at the beginning (ltrunc), the middle (mtrunc)
or the end (trunc) if the output is longer than N columns.
Note that truncating only works correctly with N >= 2.
- '%<|(<N>)': make the next placeholder take at least until Nth
columns, padding spaces on the right if necessary
- '%>(<N>)', '%>|(<N>)': similar to '%<(<N>)', '%<|(<N>)'
respectively, but padding spaces on the left
- '%>>(<N>)', '%>>|(<N>)': similar to '%>(<N>)', '%>|(<N>)'
respectively, except that if the next placeholder takes more spaces
than given and there are spaces on its left, use those spaces
- '%><(<N>)', '%><|(<N>)': similar to '% <(<N>)', '%<|(<N>)'
respectively, but padding both sides (i.e. the text is centered)


NOTE: Some placeholders may depend on other options given to the NOTE: Some placeholders may depend on other options given to the
revision traversal engine. For example, the `%g*` reflog options will revision traversal engine. For example, the `%g*` reflog options will

2
builtin/blame.c

@ -1430,7 +1430,7 @@ static void get_commit_info(struct commit *commit,
commit_info_init(ret); commit_info_init(ret);


encoding = get_log_output_encoding(); encoding = get_log_output_encoding();
message = logmsg_reencode(commit, encoding); message = logmsg_reencode(commit, NULL, encoding);
get_ac_line(message, "\nauthor ", get_ac_line(message, "\nauthor ",
&ret->author, &ret->author_mail, &ret->author, &ret->author_mail,
&ret->author_time, &ret->author_tz); &ret->author_time, &ret->author_tz);

2
builtin/commit.c

@ -955,7 +955,7 @@ static const char *read_commit_message(const char *name)
if (!commit) if (!commit)
die(_("could not lookup commit %s"), name); die(_("could not lookup commit %s"), name);
out_enc = get_commit_output_encoding(); out_enc = get_commit_output_encoding();
return logmsg_reencode(commit, out_enc); return logmsg_reencode(commit, NULL, out_enc);
} }


static int parse_and_validate_options(int argc, const char *argv[], static int parse_and_validate_options(int argc, const char *argv[],

1
commit.h

@ -101,6 +101,7 @@ struct userformat_want {
extern int has_non_ascii(const char *text); extern int has_non_ascii(const char *text);
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
extern char *logmsg_reencode(const struct commit *commit, extern char *logmsg_reencode(const struct commit *commit,
char **commit_encoding,
const char *output_encoding); const char *output_encoding);
extern void logmsg_free(char *msg, const struct commit *commit); extern void logmsg_free(char *msg, const struct commit *commit);
extern void get_commit_format(const char *arg, struct rev_info *); extern void get_commit_format(const char *arg, struct rev_info *);

2
compat/precompose_utf8.c

@ -78,7 +78,7 @@ void precompose_argv(int argc, const char **argv)
size_t namelen; size_t namelen;
oldarg = argv[i]; oldarg = argv[i];
if (has_non_ascii(oldarg, (size_t)-1, &namelen)) { if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
newarg = reencode_string_iconv(oldarg, namelen, ic_precompose); newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
if (newarg) if (newarg)
argv[i] = newarg; argv[i] = newarg;
} }

48
log-tree.c

@ -175,36 +175,52 @@ static void show_children(struct rev_info *opt, struct commit *commit, int abbre
} }
} }


void show_decorations(struct rev_info *opt, struct commit *commit) /*
* The caller makes sure there is no funny color before
* calling. format_decorations makes sure the same after return.
*/
void format_decorations(struct strbuf *sb,
const struct commit *commit,
int use_color)
{ {
const char *prefix; const char *prefix;
struct name_decoration *decoration; struct name_decoration *decoration;
const char *color_commit = const char *color_commit =
diff_get_color_opt(&opt->diffopt, DIFF_COMMIT); diff_get_color(use_color, DIFF_COMMIT);
const char *color_reset = const char *color_reset =
decorate_get_color_opt(&opt->diffopt, DECORATION_NONE); decorate_get_color(use_color, DECORATION_NONE);


if (opt->show_source && commit->util)
printf("\t%s", (char *) commit->util);
if (!opt->show_decorations)
return;
decoration = lookup_decoration(&name_decoration, &commit->object); decoration = lookup_decoration(&name_decoration, &commit->object);
if (!decoration) if (!decoration)
return; return;
prefix = " ("; prefix = " (";
while (decoration) { while (decoration) {
printf("%s", prefix); strbuf_addstr(sb, color_commit);
fputs(decorate_get_color_opt(&opt->diffopt, decoration->type), strbuf_addstr(sb, prefix);
stdout); strbuf_addstr(sb, decorate_get_color(use_color, decoration->type));
if (decoration->type == DECORATION_REF_TAG) if (decoration->type == DECORATION_REF_TAG)
fputs("tag: ", stdout); strbuf_addstr(sb, "tag: ");
printf("%s", decoration->name); strbuf_addstr(sb, decoration->name);
fputs(color_reset, stdout); strbuf_addstr(sb, color_reset);
fputs(color_commit, stdout);
prefix = ", "; prefix = ", ";
decoration = decoration->next; decoration = decoration->next;
} }
putchar(')'); strbuf_addstr(sb, color_commit);
strbuf_addch(sb, ')');
strbuf_addstr(sb, color_reset);
}

void show_decorations(struct rev_info *opt, struct commit *commit)
{
struct strbuf sb = STRBUF_INIT;

if (opt->show_source && commit->util)
printf("\t%s", (char *) commit->util);
if (!opt->show_decorations)
return;
format_decorations(&sb, commit, opt->diffopt.use_color);
fputs(sb.buf, stdout);
strbuf_release(&sb);
} }


static unsigned int digits_in_number(unsigned int number) static unsigned int digits_in_number(unsigned int number)
@ -540,8 +556,8 @@ void show_log(struct rev_info *opt)
printf(" (from %s)", printf(" (from %s)",
find_unique_abbrev(parent->object.sha1, find_unique_abbrev(parent->object.sha1,
abbrev_commit)); abbrev_commit));
fputs(diff_get_color_opt(&opt->diffopt, DIFF_RESET), stdout);
show_decorations(opt, commit); show_decorations(opt, commit);
printf("%s", diff_get_color_opt(&opt->diffopt, DIFF_RESET));
if (opt->commit_format == CMIT_FMT_ONELINE) { if (opt->commit_format == CMIT_FMT_ONELINE) {
putchar(' '); putchar(' ');
} else { } else {

1
log-tree.h

@ -13,6 +13,7 @@ int log_tree_diff_flush(struct rev_info *);
int log_tree_commit(struct rev_info *, struct commit *); int log_tree_commit(struct rev_info *, struct commit *);
int log_tree_opt_parse(struct rev_info *, const char **, int); int log_tree_opt_parse(struct rev_info *, const char **, int);
void show_log(struct rev_info *opt); void show_log(struct rev_info *opt);
void format_decorations(struct strbuf *sb, const struct commit *commit, int use_color);
void show_decorations(struct rev_info *opt, struct commit *commit); void show_decorations(struct rev_info *opt, struct commit *commit);
void log_write_email_headers(struct rev_info *opt, struct commit *commit, void log_write_email_headers(struct rev_info *opt, struct commit *commit,
const char **subject_p, const char **subject_p,

358
pretty.c

@ -606,6 +606,7 @@ static char *replace_encoding_header(char *buf, const char *encoding)
} }


char *logmsg_reencode(const struct commit *commit, char *logmsg_reencode(const struct commit *commit,
char **commit_encoding,
const char *output_encoding) const char *output_encoding)
{ {
static const char *utf8 = "UTF-8"; static const char *utf8 = "UTF-8";
@ -627,9 +628,15 @@ char *logmsg_reencode(const struct commit *commit,
sha1_to_hex(commit->object.sha1), typename(type)); sha1_to_hex(commit->object.sha1), typename(type));
} }


if (!output_encoding || !*output_encoding) if (!output_encoding || !*output_encoding) {
if (commit_encoding)
*commit_encoding =
get_header(commit, msg, "encoding");
return msg; return msg;
}
encoding = get_header(commit, msg, "encoding"); encoding = get_header(commit, msg, "encoding");
if (commit_encoding)
*commit_encoding = encoding;
use_encoding = encoding ? encoding : utf8; use_encoding = encoding ? encoding : utf8;
if (same_encoding(use_encoding, output_encoding)) { if (same_encoding(use_encoding, output_encoding)) {
/* /*
@ -670,7 +677,8 @@ char *logmsg_reencode(const struct commit *commit,
if (out) if (out)
out = replace_encoding_header(out, output_encoding); out = replace_encoding_header(out, output_encoding);


free(encoding); if (!commit_encoding)
free(encoding);
/* /*
* If the re-encoding failed, out might be NULL here; in that * If the re-encoding failed, out might be NULL here; in that
* case we just return the commit message verbatim. * case we just return the commit message verbatim.
@ -764,19 +772,38 @@ struct chunk {
size_t len; size_t len;
}; };


enum flush_type {
no_flush,
flush_right,
flush_left,
flush_left_and_steal,
flush_both
};

enum trunc_type {
trunc_none,
trunc_left,
trunc_middle,
trunc_right
};

struct format_commit_context { struct format_commit_context {
const struct commit *commit; const struct commit *commit;
const struct pretty_print_context *pretty_ctx; const struct pretty_print_context *pretty_ctx;
unsigned commit_header_parsed:1; unsigned commit_header_parsed:1;
unsigned commit_message_parsed:1; unsigned commit_message_parsed:1;
struct signature_check signature_check; struct signature_check signature_check;
enum flush_type flush_type;
enum trunc_type truncate;
char *message; char *message;
char *commit_encoding;
size_t width, indent1, indent2; size_t width, indent1, indent2;
int auto_color;
int padding;


/* These offsets are relative to the start of the commit message. */ /* These offsets are relative to the start of the commit message. */
struct chunk author; struct chunk author;
struct chunk committer; struct chunk committer;
struct chunk encoding;
size_t message_off; size_t message_off;
size_t subject_off; size_t subject_off;
size_t body_off; size_t body_off;
@ -823,9 +850,6 @@ static void parse_commit_header(struct format_commit_context *context)
} else if (!prefixcmp(msg + i, "committer ")) { } else if (!prefixcmp(msg + i, "committer ")) {
context->committer.off = i + 10; context->committer.off = i + 10;
context->committer.len = eol - i - 10; context->committer.len = eol - i - 10;
} else if (!prefixcmp(msg + i, "encoding ")) {
context->encoding.off = i + 9;
context->encoding.len = eol - i - 9;
} }
i = eol; i = eol;
} }
@ -906,23 +930,6 @@ static void parse_commit_message(struct format_commit_context *c)
c->commit_message_parsed = 1; c->commit_message_parsed = 1;
} }


static void format_decoration(struct strbuf *sb, const struct commit *commit)
{
struct name_decoration *d;
const char *prefix = " (";

load_ref_decorations(DECORATE_SHORT_REFS);
d = lookup_decoration(&name_decoration, &commit->object);
while (d) {
strbuf_addstr(sb, prefix);
prefix = ", ";
strbuf_addstr(sb, d->name);
d = d->next;
}
if (prefix[0] == ',')
strbuf_addch(sb, ')');
}

static void strbuf_wrap(struct strbuf *sb, size_t pos, static void strbuf_wrap(struct strbuf *sb, size_t pos,
size_t width, size_t indent1, size_t indent2) size_t width, size_t indent1, size_t indent2)
{ {
@ -969,7 +976,112 @@ static int format_reflog_person(struct strbuf *sb,
return format_person_part(sb, part, ident, strlen(ident), dmode); return format_person_part(sb, part, ident, strlen(ident), dmode);
} }


static size_t format_commit_one(struct strbuf *sb, const char *placeholder, static size_t parse_color(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
struct format_commit_context *c)
{
if (placeholder[1] == '(') {
const char *begin = placeholder + 2;
const char *end = strchr(begin, ')');
char color[COLOR_MAXLEN];

if (!end)
return 0;
if (!prefixcmp(begin, "auto,")) {
if (!want_color(c->pretty_ctx->color))
return end - placeholder + 1;
begin += 5;
}
color_parse_mem(begin,
end - begin,
"--pretty format", color);
strbuf_addstr(sb, color);
return end - placeholder + 1;
}
if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, GIT_COLOR_RED);
return 4;
} else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, GIT_COLOR_GREEN);
return 6;
} else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, GIT_COLOR_BLUE);
return 5;
} else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, GIT_COLOR_RESET);
return 6;
} else
return 0;
}

static size_t parse_padding_placeholder(struct strbuf *sb,
const char *placeholder,
struct format_commit_context *c)
{
const char *ch = placeholder;
enum flush_type flush_type;
int to_column = 0;

switch (*ch++) {
case '<':
flush_type = flush_right;
break;
case '>':
if (*ch == '<') {
flush_type = flush_both;
ch++;
} else if (*ch == '>') {
flush_type = flush_left_and_steal;
ch++;
} else
flush_type = flush_left;
break;
default:
return 0;
}

/* the next value means "wide enough to that column" */
if (*ch == '|') {
to_column = 1;
ch++;
}

if (*ch == '(') {
const char *start = ch + 1;
const char *end = start + strcspn(start, ",)");
char *next;
int width;
if (!end || end == start)
return 0;
width = strtoul(start, &next, 10);
if (next == start || width == 0)
return 0;
c->padding = to_column ? -width : width;
c->flush_type = flush_type;

if (*end == ',') {
start = end + 1;
end = strchr(start, ')');
if (!end || end == start)
return 0;
if (!prefixcmp(start, "trunc)"))
c->truncate = trunc_right;
else if (!prefixcmp(start, "ltrunc)"))
c->truncate = trunc_left;
else if (!prefixcmp(start, "mtrunc)"))
c->truncate = trunc_middle;
else
return 0;
} else
c->truncate = trunc_none;

return end - placeholder + 1;
}
return 0;
}

static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
void *context) void *context)
{ {
struct format_commit_context *c = context; struct format_commit_context *c = context;
@ -981,38 +1093,20 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
/* these are independent of the commit */ /* these are independent of the commit */
switch (placeholder[0]) { switch (placeholder[0]) {
case 'C': case 'C':
if (placeholder[1] == '(') { if (!prefixcmp(placeholder + 1, "(auto)")) {
const char *begin = placeholder + 2; c->auto_color = 1;
const char *end = strchr(begin, ')'); return 7; /* consumed 7 bytes, "C(auto)" */
char color[COLOR_MAXLEN]; } else {

int ret = parse_color(sb, placeholder, c);
if (!end) if (ret)
return 0; c->auto_color = 0;
if (!prefixcmp(begin, "auto,")) { /*
if (!want_color(c->pretty_ctx->color)) * Otherwise, we decided to treat %C<unknown>
return end - placeholder + 1; * as a literal string, and the previous
begin += 5; * %C(auto) is still valid.
} */
color_parse_mem(begin, return ret;
end - begin,
"--pretty format", color);
strbuf_addstr(sb, color);
return end - placeholder + 1;
} }
if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, GIT_COLOR_RED);
return 4;
} else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, GIT_COLOR_GREEN);
return 6;
} else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, GIT_COLOR_BLUE);
return 5;
} else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, GIT_COLOR_RESET);
return 6;
} else
return 0;
case 'n': /* newline */ case 'n': /* newline */
strbuf_addch(sb, '\n'); strbuf_addch(sb, '\n');
return 1; return 1;
@ -1050,6 +1144,10 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
return end - placeholder + 1; return end - placeholder + 1;
} else } else
return 0; return 0;

case '<':
case '>':
return parse_padding_placeholder(sb, placeholder, c);
} }


/* these depend on the commit */ /* these depend on the commit */
@ -1058,13 +1156,19 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,


switch (placeholder[0]) { switch (placeholder[0]) {
case 'H': /* commit hash */ case 'H': /* commit hash */
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1)); strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1; return 1;
case 'h': /* abbreviated commit hash */ case 'h': /* abbreviated commit hash */
if (add_again(sb, &c->abbrev_commit_hash)) strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
if (add_again(sb, &c->abbrev_commit_hash)) {
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1; return 1;
}
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1, strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
c->pretty_ctx->abbrev)); c->pretty_ctx->abbrev));
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off; c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
return 1; return 1;
case 'T': /* tree hash */ case 'T': /* tree hash */
@ -1101,7 +1205,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
strbuf_addstr(sb, get_revision_mark(NULL, commit)); strbuf_addstr(sb, get_revision_mark(NULL, commit));
return 1; return 1;
case 'd': case 'd':
format_decoration(sb, commit); load_ref_decorations(DECORATE_SHORT_REFS);
format_decorations(sb, commit, c->auto_color);
return 1; return 1;
case 'g': /* reflog info */ case 'g': /* reflog info */
switch(placeholder[1]) { switch(placeholder[1]) {
@ -1180,7 +1285,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
msg + c->committer.off, c->committer.len, msg + c->committer.off, c->committer.len,
c->pretty_ctx->date_mode); c->pretty_ctx->date_mode);
case 'e': /* encoding */ case 'e': /* encoding */
strbuf_add(sb, msg + c->encoding.off, c->encoding.len); if (c->commit_encoding)
strbuf_addstr(sb, c->commit_encoding);
return 1; return 1;
case 'B': /* raw body */ case 'B': /* raw body */
/* message_off is always left at the initial newline */ /* message_off is always left at the initial newline */
@ -1206,7 +1312,111 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
return 0; /* unknown placeholder */ return 0; /* unknown placeholder */
} }


static size_t format_commit_item(struct strbuf *sb, const char *placeholder, static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
struct format_commit_context *c)
{
struct strbuf local_sb = STRBUF_INIT;
int total_consumed = 0, len, padding = c->padding;
if (padding < 0) {
const char *start = strrchr(sb->buf, '\n');
int occupied;
if (!start)
start = sb->buf;
occupied = utf8_strnwidth(start, -1, 1);
padding = (-padding) - occupied;
}
while (1) {
int modifier = *placeholder == 'C';
int consumed = format_commit_one(&local_sb, placeholder, c);
total_consumed += consumed;

if (!modifier)
break;

placeholder += consumed;
if (*placeholder != '%')
break;
placeholder++;
total_consumed++;
}
len = utf8_strnwidth(local_sb.buf, -1, 1);

if (c->flush_type == flush_left_and_steal) {
const char *ch = sb->buf + sb->len - 1;
while (len > padding && ch > sb->buf) {
const char *p;
if (*ch == ' ') {
ch--;
padding++;
continue;
}
/* check for trailing ansi sequences */
if (*ch != 'm')
break;
p = ch - 1;
while (ch - p < 10 && *p != '\033')
p--;
if (*p != '\033' ||
ch + 1 - p != display_mode_esc_sequence_len(p))
break;
/*
* got a good ansi sequence, put it back to
* local_sb as we're cutting sb
*/
strbuf_insert(&local_sb, 0, p, ch + 1 - p);
ch = p - 1;
}
strbuf_setlen(sb, ch + 1 - sb->buf);
c->flush_type = flush_left;
}

if (len > padding) {
switch (c->truncate) {
case trunc_left:
strbuf_utf8_replace(&local_sb,
0, len - (padding - 2),
"..");
break;
case trunc_middle:
strbuf_utf8_replace(&local_sb,
padding / 2 - 1,
len - (padding - 2),
"..");
break;
case trunc_right:
strbuf_utf8_replace(&local_sb,
padding - 2, len - (padding - 2),
"..");
break;
case trunc_none:
break;
}
strbuf_addstr(sb, local_sb.buf);
} else {
int sb_len = sb->len, offset = 0;
if (c->flush_type == flush_left)
offset = padding - len;
else if (c->flush_type == flush_both)
offset = (padding - len) / 2;
/*
* we calculate padding in columns, now
* convert it back to chars
*/
padding = padding - len + local_sb.len;
strbuf_grow(sb, padding);
strbuf_setlen(sb, sb_len + padding);
memset(sb->buf + sb_len, ' ', sb->len - sb_len);
memcpy(sb->buf + sb_len + offset, local_sb.buf,
local_sb.len);
}
strbuf_release(&local_sb);
c->flush_type = no_flush;
return total_consumed;
}

static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
const char *placeholder,
void *context) void *context)
{ {
int consumed; int consumed;
@ -1235,7 +1445,10 @@ static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
placeholder++; placeholder++;


orig_len = sb->len; orig_len = sb->len;
consumed = format_commit_one(sb, placeholder, context); if (((struct format_commit_context *)context)->flush_type != no_flush)
consumed = format_and_pad_commit(sb, placeholder, context);
else
consumed = format_commit_one(sb, placeholder, context);
if (magic == NO_MAGIC) if (magic == NO_MAGIC)
return consumed; return consumed;


@ -1286,16 +1499,37 @@ void format_commit_message(const struct commit *commit,
{ {
struct format_commit_context context; struct format_commit_context context;
const char *output_enc = pretty_ctx->output_encoding; const char *output_enc = pretty_ctx->output_encoding;
const char *utf8 = "UTF-8";


memset(&context, 0, sizeof(context)); memset(&context, 0, sizeof(context));
context.commit = commit; context.commit = commit;
context.pretty_ctx = pretty_ctx; context.pretty_ctx = pretty_ctx;
context.wrap_start = sb->len; context.wrap_start = sb->len;
context.message = logmsg_reencode(commit, output_enc); context.message = logmsg_reencode(commit,
&context.commit_encoding,
output_enc);


strbuf_expand(sb, format, format_commit_item, &context); strbuf_expand(sb, format, format_commit_item, &context);
rewrap_message_tail(sb, &context, 0, 0, 0); rewrap_message_tail(sb, &context, 0, 0, 0);


if (output_enc) {
if (same_encoding(utf8, output_enc))
output_enc = NULL;
} else {
if (context.commit_encoding &&
!same_encoding(context.commit_encoding, utf8))
output_enc = context.commit_encoding;
}

if (output_enc) {
int outsz;
char *out = reencode_string_len(sb->buf, sb->len,
output_enc, utf8, &outsz);
if (out)
strbuf_attach(sb, out, outsz, outsz + 1);
}

free(context.commit_encoding);
logmsg_free(context.message, commit); logmsg_free(context.message, commit);
free(context.signature_check.gpg_output); free(context.signature_check.gpg_output);
free(context.signature_check.signer); free(context.signature_check.signer);
@ -1454,7 +1688,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
} }


encoding = get_log_output_encoding(); encoding = get_log_output_encoding();
msg = reencoded = logmsg_reencode(commit, encoding); msg = reencoded = logmsg_reencode(commit, NULL, encoding);


if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL) if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
indent = 0; indent = 0;

2
revision.c

@ -2292,7 +2292,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
* in it. * in it.
*/ */
encoding = get_log_output_encoding(); encoding = get_log_output_encoding();
message = logmsg_reencode(commit, encoding); message = logmsg_reencode(commit, NULL, encoding);


/* Copy the commit to temporary if we are using "fake" headers */ /* Copy the commit to temporary if we are using "fake" headers */
if (buf.len) if (buf.len)

175
t/t4205-log-pretty-formats.sh

@ -99,4 +99,179 @@ test_expect_failure 'NUL termination with --stat' '
test_i18ncmp expected actual test_i18ncmp expected actual
' '


test_expect_success 'setup more commits' '
test_commit "message one" one one message-one &&
test_commit "message two" two two message-two
'

test_expect_success 'left alignment formatting' '
git log --pretty="format:%<(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
message two Z
message one Z
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting at the nth column' '
git log --pretty="format:%h %<|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two Z
7cd6c63 message one Z
1711bf9 add bar Z
af20c06 initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with no padding' '
git log --pretty="format:%<(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with trunc' '
git log --pretty="format:%<(10,trunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
message ..
message ..
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with ltrunc' '
git log --pretty="format:%<(10,ltrunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
..sage two
..sage one
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'left alignment formatting with mtrunc' '
git log --pretty="format:%<(10,mtrunc)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
mess.. two
mess.. one
add bar Z
initial Z
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting' '
git log --pretty="format:%>(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
Z message two
Z message one
Z add bar
Z initial
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting at the nth column' '
git log --pretty="format:%h %>|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two
7cd6c63 message one
1711bf9 add bar
af20c06 initial
EOF
test_cmp expected actual
'

test_expect_success 'right alignment formatting with no padding' '
git log --pretty="format:%>(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting' '
git log --pretty="format:%><(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
Z message two Z
Z message one Z
Z add bar Z
Z initial Z
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting at the nth column' '
git log --pretty="format:%h %><|(40)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
qz_to_tab_space <<\EOF >expected &&
fa33ab1 message two Z
7cd6c63 message one Z
1711bf9 add bar Z
af20c06 initial Z
EOF
test_cmp expected actual
'

test_expect_success 'center alignment formatting with no padding' '
git log --pretty="format:%><(1)%s" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
message two
message one
add bar
initial
EOF
test_cmp expected actual
'

test_expect_success 'left/right alignment formatting with stealing' '
git commit --amend -m short --author "long long long <long@me.com>" &&
git log --pretty="format:%<(10,trunc)%s%>>(10,ltrunc)% an" >actual &&
# complete the incomplete line at the end
echo >>actual &&
cat <<\EOF >expected &&
short long long long
message .. A U Thor
add bar A U Thor
initial A U Thor
EOF
test_cmp expected actual
'

test_done test_done

8
t/t4207-log-decoration-colors.sh

@ -44,15 +44,15 @@ test_expect_success setup '
' '


cat >expected <<EOF cat >expected <<EOF
${c_commit}COMMIT_ID (${c_HEAD}HEAD${c_reset}${c_commit},\ ${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_HEAD}HEAD${c_reset}${c_commit},\
${c_tag}tag: v1.0${c_reset}${c_commit},\ ${c_tag}tag: v1.0${c_reset}${c_commit},\
${c_tag}tag: B${c_reset}${c_commit},\ ${c_tag}tag: B${c_reset}${c_commit},\
${c_branch}master${c_reset}${c_commit})${c_reset} B ${c_branch}master${c_reset}${c_commit})${c_reset} B
${c_commit}COMMIT_ID (${c_tag}tag: A1${c_reset}${c_commit},\ ${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_tag}tag: A1${c_reset}${c_commit},\
${c_remoteBranch}other/master${c_reset}${c_commit})${c_reset} A1 ${c_remoteBranch}other/master${c_reset}${c_commit})${c_reset} A1
${c_commit}COMMIT_ID (${c_stash}refs/stash${c_reset}${c_commit})${c_reset}\ ${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_stash}refs/stash${c_reset}${c_commit})${c_reset}\
On master: Changes to A.t On master: Changes to A.t
${c_commit}COMMIT_ID (${c_tag}tag: A${c_reset}${c_commit})${c_reset} A ${c_commit}COMMIT_ID${c_reset}${c_commit} (${c_tag}tag: A${c_reset}${c_commit})${c_reset} A
EOF EOF


# We want log to show all, but the second parent to refs/stash is irrelevant # We want log to show all, but the second parent to refs/stash is irrelevant

12
t/t6006-rev-list-format.sh

@ -184,7 +184,7 @@ Test printing of complex bodies


This commit message is much longer than the others, This commit message is much longer than the others,
and it will be encoded in iso8859-1. We should therefore and it will be encoded in iso8859-1. We should therefore
include an iso8859 character: ¡bueno! include an iso8859 character: ¡bueno!
EOF EOF
test_expect_success 'setup complex body' ' test_expect_success 'setup complex body' '
git config i18n.commitencoding iso8859-1 && git config i18n.commitencoding iso8859-1 &&
@ -192,14 +192,14 @@ git config i18n.commitencoding iso8859-1 &&
' '


test_format complex-encoding %e <<'EOF' test_format complex-encoding %e <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
iso8859-1 iso8859-1
commit 131a310eb913d107dd3c09a65d1651175898735d commit 131a310eb913d107dd3c09a65d1651175898735d
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873 commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
EOF EOF


test_format complex-subject %s <<'EOF' test_format complex-subject %s <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
Test printing of complex bodies Test printing of complex bodies
commit 131a310eb913d107dd3c09a65d1651175898735d commit 131a310eb913d107dd3c09a65d1651175898735d
changed foo changed foo
@ -208,17 +208,17 @@ added foo
EOF EOF


test_format complex-body %b <<'EOF' test_format complex-body %b <<'EOF'
commit f58db70b055c5718631e5c61528b28b12090cdea commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
This commit message is much longer than the others, This commit message is much longer than the others,
and it will be encoded in iso8859-1. We should therefore and it will be encoded in iso8859-1. We should therefore
include an iso8859 character: ¡bueno! include an iso8859 character: ¡bueno!


commit 131a310eb913d107dd3c09a65d1651175898735d commit 131a310eb913d107dd3c09a65d1651175898735d
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873 commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
EOF EOF


test_expect_success '%x00 shows NUL' ' test_expect_success '%x00 shows NUL' '
echo >expect commit f58db70b055c5718631e5c61528b28b12090cdea && echo >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
echo >>expect fooQbar && echo >>expect fooQbar &&
git rev-list -1 --format=foo%x00bar HEAD >actual.nul && git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
nul_to_q <actual.nul >actual && nul_to_q <actual.nul >actual &&

104
utf8.c

@ -9,6 +9,20 @@ struct interval {
int last; int last;
}; };


size_t display_mode_esc_sequence_len(const char *s)
{
const char *p = s;
if (*p++ != '\033')
return 0;
if (*p++ != '[')
return 0;
while (isdigit(*p) || *p == ';')
p++;
if (*p++ != 'm')
return 0;
return p - s;
}

/* auxiliary function for binary search in interval table */ /* auxiliary function for binary search in interval table */
static int bisearch(ucs_char_t ucs, const struct interval *table, int max) static int bisearch(ucs_char_t ucs, const struct interval *table, int max)
{ {
@ -252,18 +266,26 @@ int utf8_width(const char **start, size_t *remainder_p)
* string, assuming that the string is utf8. Returns strlen() instead * string, assuming that the string is utf8. Returns strlen() instead
* if the string does not look like a valid utf8 string. * if the string does not look like a valid utf8 string.
*/ */
int utf8_strwidth(const char *string) int utf8_strnwidth(const char *string, int len, int skip_ansi)
{ {
int width = 0; int width = 0;
const char *orig = string; const char *orig = string;


while (1) { if (len == -1)
if (!string) len = strlen(string);
return strlen(orig); while (string && string < orig + len) {
if (!*string) int skip;
return width; while (skip_ansi &&
(skip = display_mode_esc_sequence_len(string)) != 0)
string += skip;
width += utf8_width(&string, NULL); width += utf8_width(&string, NULL);
} }
return string ? width : len;
}

int utf8_strwidth(const char *string)
{
return utf8_strnwidth(string, -1, 0);
} }


int is_utf8(const char *text) int is_utf8(const char *text)
@ -303,20 +325,6 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
} }
} }


static size_t display_mode_esc_sequence_len(const char *s)
{
const char *p = s;
if (*p++ != '\033')
return 0;
if (*p++ != '[')
return 0;
while (isdigit(*p) || *p == ';')
p++;
if (*p++ != 'm')
return 0;
return p - s;
}

/* /*
* Wrap the text, if necessary. The variable indent is the indent for the * Wrap the text, if necessary. The variable indent is the indent for the
* first line, indent2 is the indent for all other lines. * first line, indent2 is the indent for all other lines.
@ -413,6 +421,52 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
free(tmp); free(tmp);
} }


void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
const char *subst)
{
struct strbuf sb_dst = STRBUF_INIT;
char *src = sb_src->buf;
char *end = src + sb_src->len;
char *dst;
int w = 0, subst_len = 0;

if (subst)
subst_len = strlen(subst);
strbuf_grow(&sb_dst, sb_src->len + subst_len);
dst = sb_dst.buf;

while (src < end) {
char *old;
size_t n;

while ((n = display_mode_esc_sequence_len(src))) {
memcpy(dst, src, n);
src += n;
dst += n;
}

old = src;
n = utf8_width((const char**)&src, NULL);
if (!src) /* broken utf-8, do nothing */
return;
if (n && w >= pos && w < pos + width) {
if (subst) {
memcpy(dst, subst, subst_len);
dst += subst_len;
subst = NULL;
}
w += n;
continue;
}
memcpy(dst, old, src - old);
dst += src - old;
w += n;
}
strbuf_setlen(&sb_dst, dst - sb_dst.buf);
strbuf_attach(sb_src, strbuf_detach(&sb_dst, NULL),
sb_dst.len, sb_dst.alloc);
}

int is_encoding_utf8(const char *name) int is_encoding_utf8(const char *name)
{ {
if (!name) if (!name)
@ -460,7 +514,7 @@ int utf8_fprintf(FILE *stream, const char *format, ...)
#else #else
typedef char * iconv_ibp; typedef char * iconv_ibp;
#endif #endif
char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv) char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outsz_p)
{ {
size_t outsz, outalloc; size_t outsz, outalloc;
char *out, *outpos; char *out, *outpos;
@ -494,13 +548,17 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv)
} }
else { else {
*outpos = '\0'; *outpos = '\0';
if (outsz_p)
*outsz_p = outpos - out;
break; break;
} }
} }
return out; return out;
} }


char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) char *reencode_string_len(const char *in, int insz,
const char *out_encoding, const char *in_encoding,
int *outsz)
{ {
iconv_t conv; iconv_t conv;
char *out; char *out;
@ -526,7 +584,7 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
return NULL; return NULL;
} }


out = reencode_string_iconv(in, strlen(in), conv); out = reencode_string_iconv(in, insz, conv, outsz);
iconv_close(conv); iconv_close(conv);
return out; return out;
} }

23
utf8.h

@ -3,7 +3,9 @@


typedef unsigned int ucs_char_t; /* assuming 32bit int */ typedef unsigned int ucs_char_t; /* assuming 32bit int */


size_t display_mode_esc_sequence_len(const char *s);
int utf8_width(const char **start, size_t *remainder_p); int utf8_width(const char **start, size_t *remainder_p);
int utf8_strnwidth(const char *string, int len, int skip_ansi);
int utf8_strwidth(const char *string); int utf8_strwidth(const char *string);
int is_utf8(const char *text); int is_utf8(const char *text);
int is_encoding_utf8(const char *name); int is_encoding_utf8(const char *name);
@ -14,14 +16,29 @@ void strbuf_add_wrapped_text(struct strbuf *buf,
const char *text, int indent, int indent2, int width); const char *text, int indent, int indent2, int width);
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
int indent, int indent2, int width); int indent, int indent2, int width);
void strbuf_utf8_replace(struct strbuf *sb, int pos, int width,
const char *subst);


#ifndef NO_ICONV #ifndef NO_ICONV
char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv); char *reencode_string_iconv(const char *in, size_t insz,
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding); iconv_t conv, int *outsz);
char *reencode_string_len(const char *in, int insz,
const char *out_encoding,
const char *in_encoding,
int *outsz);
#else #else
#define reencode_string(a,b,c) NULL #define reencode_string_len(a,b,c,d,e) NULL
#endif #endif


static inline char *reencode_string(const char *in,
const char *out_encoding,
const char *in_encoding)
{
return reencode_string_len(in, strlen(in),
out_encoding, in_encoding,
NULL);
}

int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);


#endif #endif

Loading…
Cancel
Save