Merge branch 'jc/utf8'
* jc/utf8: t3900: test conversion to non UTF-8 as well Rename t3900 test vector file UTF-8: introduce i18n.logoutputencoding. Teach log family --encoding i18n.logToUTF8: convert commit log message to UTF-8 Move encoding conversion routine out of mailinfo to utf8.c Conflicts: commit.cmaint
commit
eff73751bb
|
@ -267,6 +267,10 @@ i18n.commitEncoding::
|
||||||
browser (and possibly at other places in the future or in other
|
browser (and possibly at other places in the future or in other
|
||||||
porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.
|
porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.
|
||||||
|
|
||||||
|
i18n.logOutputEncoding::
|
||||||
|
Character encoding the commit messages are converted to when
|
||||||
|
running `git-log` and friends.
|
||||||
|
|
||||||
log.showroot::
|
log.showroot::
|
||||||
If true, the initial commit will be shown as a big creation event.
|
If true, the initial commit will be shown as a big creation event.
|
||||||
This is equivalent to a diff against an empty tree.
|
This is equivalent to a diff against an empty tree.
|
||||||
|
|
|
@ -92,6 +92,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
|
||||||
char comment[1000];
|
char comment[1000];
|
||||||
char *buffer;
|
char *buffer;
|
||||||
unsigned int size;
|
unsigned int size;
|
||||||
|
int encoding_is_utf8;
|
||||||
|
|
||||||
setup_ident();
|
setup_ident();
|
||||||
git_config(git_default_config);
|
git_config(git_default_config);
|
||||||
|
@ -117,6 +118,10 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
|
||||||
parents++;
|
parents++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Not having i18n.commitencoding is the same as having utf-8 */
|
||||||
|
encoding_is_utf8 = (!git_commit_encoding ||
|
||||||
|
!strcmp(git_commit_encoding, "utf-8"));
|
||||||
|
|
||||||
init_buffer(&buffer, &size);
|
init_buffer(&buffer, &size);
|
||||||
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
|
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
|
||||||
|
|
||||||
|
@ -130,7 +135,11 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
|
||||||
|
|
||||||
/* Person/date information */
|
/* Person/date information */
|
||||||
add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
|
add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
|
||||||
add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1));
|
add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1));
|
||||||
|
if (!encoding_is_utf8)
|
||||||
|
add_buffer(&buffer, &size,
|
||||||
|
"encoding %s\n", git_commit_encoding);
|
||||||
|
add_buffer(&buffer, &size, "\n");
|
||||||
|
|
||||||
/* And add the comment */
|
/* And add the comment */
|
||||||
while (fgets(comment, sizeof(comment), stdin) != NULL)
|
while (fgets(comment, sizeof(comment), stdin) != NULL)
|
||||||
|
@ -138,7 +147,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
|
||||||
|
|
||||||
/* And check the encoding */
|
/* And check the encoding */
|
||||||
buffer[size] = '\0';
|
buffer[size] = '\0';
|
||||||
if (!strcmp(git_commit_encoding, "utf-8") && !is_utf8(buffer))
|
if (encoding_is_utf8 && !is_utf8(buffer))
|
||||||
fprintf(stderr, commit_utf8_warn);
|
fprintf(stderr, commit_utf8_warn);
|
||||||
|
|
||||||
if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) {
|
if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) {
|
||||||
|
|
|
@ -20,6 +20,8 @@ void add_head(struct rev_info *revs);
|
||||||
static void cmd_log_init(int argc, const char **argv, const char *prefix,
|
static void cmd_log_init(int argc, const char **argv, const char *prefix,
|
||||||
struct rev_info *rev)
|
struct rev_info *rev)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
rev->abbrev = DEFAULT_ABBREV;
|
rev->abbrev = DEFAULT_ABBREV;
|
||||||
rev->commit_format = CMIT_FMT_DEFAULT;
|
rev->commit_format = CMIT_FMT_DEFAULT;
|
||||||
rev->verbose_header = 1;
|
rev->verbose_header = 1;
|
||||||
|
@ -27,8 +29,18 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
|
||||||
argc = setup_revisions(argc, argv, rev, "HEAD");
|
argc = setup_revisions(argc, argv, rev, "HEAD");
|
||||||
if (rev->diffopt.pickaxe || rev->diffopt.filter)
|
if (rev->diffopt.pickaxe || rev->diffopt.filter)
|
||||||
rev->always_show_header = 0;
|
rev->always_show_header = 0;
|
||||||
if (argc > 1)
|
for (i = 1; i < argc; i++) {
|
||||||
die("unrecognized argument: %s", argv[1]);
|
const char *arg = argv[i];
|
||||||
|
if (!strncmp(arg, "--encoding=", 11)) {
|
||||||
|
arg += 11;
|
||||||
|
if (strcmp(arg, "none"))
|
||||||
|
git_log_output_encoding = strdup(arg);
|
||||||
|
else
|
||||||
|
git_log_output_encoding = "";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
die("unrecognized argument: %s", arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cmd_log_walk(struct rev_info *rev)
|
static int cmd_log_walk(struct rev_info *rev)
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
#include "cache.h"
|
#include "cache.h"
|
||||||
#include "builtin.h"
|
#include "builtin.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
static FILE *cmitmsg, *patchfile, *fin, *fout;
|
static FILE *cmitmsg, *patchfile, *fin, *fout;
|
||||||
|
|
||||||
|
@ -510,40 +511,18 @@ static int decode_b_segment(char *in, char *ot, char *ep)
|
||||||
|
|
||||||
static void convert_to_utf8(char *line, char *charset)
|
static void convert_to_utf8(char *line, char *charset)
|
||||||
{
|
{
|
||||||
#ifndef NO_ICONV
|
|
||||||
char *in, *out;
|
|
||||||
size_t insize, outsize, nrc;
|
|
||||||
char outbuf[4096]; /* cheat */
|
|
||||||
static char latin_one[] = "latin1";
|
static char latin_one[] = "latin1";
|
||||||
char *input_charset = *charset ? charset : latin_one;
|
char *input_charset = *charset ? charset : latin_one;
|
||||||
iconv_t conv = iconv_open(metainfo_charset, input_charset);
|
char *out = reencode_string(line, metainfo_charset, input_charset);
|
||||||
|
|
||||||
if (conv == (iconv_t) -1) {
|
if (!out) {
|
||||||
static int warned_latin1_once = 0;
|
|
||||||
if (input_charset != latin_one) {
|
|
||||||
fprintf(stderr, "cannot convert from %s to %s\n",
|
fprintf(stderr, "cannot convert from %s to %s\n",
|
||||||
input_charset, metainfo_charset);
|
input_charset, metainfo_charset);
|
||||||
*charset = 0;
|
*charset = 0;
|
||||||
}
|
|
||||||
else if (!warned_latin1_once) {
|
|
||||||
warned_latin1_once = 1;
|
|
||||||
fprintf(stderr, "tried to convert from %s to %s, "
|
|
||||||
"but your iconv does not work with it.\n",
|
|
||||||
input_charset, metainfo_charset);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
in = line;
|
strcpy(line, out);
|
||||||
insize = strlen(in);
|
free(out);
|
||||||
out = outbuf;
|
|
||||||
outsize = sizeof(outbuf);
|
|
||||||
nrc = iconv(conv, &in, &insize, &out, &outsize);
|
|
||||||
iconv_close(conv);
|
|
||||||
if (nrc == (size_t) -1)
|
|
||||||
return;
|
|
||||||
*out = 0;
|
|
||||||
strcpy(line, outbuf);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int decode_header_bq(char *it)
|
static int decode_header_bq(char *it)
|
||||||
|
@ -827,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
|
||||||
if (!strcmp(argv[1], "-k"))
|
if (!strcmp(argv[1], "-k"))
|
||||||
keep_subject = 1;
|
keep_subject = 1;
|
||||||
else if (!strcmp(argv[1], "-u"))
|
else if (!strcmp(argv[1], "-u"))
|
||||||
metainfo_charset = git_commit_encoding;
|
metainfo_charset = (git_commit_encoding
|
||||||
|
? git_commit_encoding : "utf-8");
|
||||||
else if (!strncmp(argv[1], "--encoding=", 11))
|
else if (!strncmp(argv[1], "--encoding=", 11))
|
||||||
metainfo_charset = argv[1] + 11;
|
metainfo_charset = argv[1] + 11;
|
||||||
else
|
else
|
||||||
|
|
4
cache.h
4
cache.h
|
@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value);
|
||||||
extern char git_default_email[MAX_GITNAME];
|
extern char git_default_email[MAX_GITNAME];
|
||||||
extern char git_default_name[MAX_GITNAME];
|
extern char git_default_name[MAX_GITNAME];
|
||||||
|
|
||||||
#define MAX_ENCODING_LENGTH 64
|
extern char *git_commit_encoding;
|
||||||
extern char git_commit_encoding[MAX_ENCODING_LENGTH];
|
extern char *git_log_output_encoding;
|
||||||
|
|
||||||
extern int copy_fd(int ifd, int ofd);
|
extern int copy_fd(int ifd, int ofd);
|
||||||
extern void write_or_die(int fd, const void *buf, size_t count);
|
extern void write_or_die(int fd, const void *buf, size_t count);
|
||||||
|
|
66
commit.c
66
commit.c
|
@ -2,6 +2,7 @@
|
||||||
#include "tag.h"
|
#include "tag.h"
|
||||||
#include "commit.h"
|
#include "commit.h"
|
||||||
#include "pkt-line.h"
|
#include "pkt-line.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
int save_commit_buffer = 1;
|
int save_commit_buffer = 1;
|
||||||
|
|
||||||
|
@ -597,10 +598,61 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
|
static char *get_header(const struct commit *commit, const char *key)
|
||||||
unsigned long len, char *buf, unsigned long space,
|
{
|
||||||
|
int key_len = strlen(key);
|
||||||
|
const char *line = commit->buffer;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
const char *eol = strchr(line, '\n'), *next;
|
||||||
|
|
||||||
|
if (line == eol)
|
||||||
|
return NULL;
|
||||||
|
if (!eol) {
|
||||||
|
eol = line + strlen(line);
|
||||||
|
next = NULL;
|
||||||
|
} else
|
||||||
|
next = eol + 1;
|
||||||
|
if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
|
||||||
|
int len = eol - line - key_len;
|
||||||
|
char *ret = xmalloc(len);
|
||||||
|
memcpy(ret, line + key_len + 1, len - 1);
|
||||||
|
ret[len - 1] = '\0';
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
line = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *logmsg_reencode(const struct commit *commit)
|
||||||
|
{
|
||||||
|
char *encoding;
|
||||||
|
char *out;
|
||||||
|
char *output_encoding = (git_log_output_encoding
|
||||||
|
? git_log_output_encoding
|
||||||
|
: git_commit_encoding);
|
||||||
|
|
||||||
|
if (!output_encoding)
|
||||||
|
return NULL;
|
||||||
|
encoding = get_header(commit, "encoding");
|
||||||
|
if (!encoding || !strcmp(encoding, output_encoding)) {
|
||||||
|
free(encoding);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
out = reencode_string(commit->buffer, output_encoding, encoding);
|
||||||
|
free(encoding);
|
||||||
|
if (!out)
|
||||||
|
return NULL;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long pretty_print_commit(enum cmit_fmt fmt,
|
||||||
|
const struct commit *commit,
|
||||||
|
unsigned long len,
|
||||||
|
char *buf, unsigned long space,
|
||||||
int abbrev, const char *subject,
|
int abbrev, const char *subject,
|
||||||
const char *after_subject, int relative_date)
|
const char *after_subject,
|
||||||
|
int relative_date)
|
||||||
{
|
{
|
||||||
int hdr = 1, body = 0;
|
int hdr = 1, body = 0;
|
||||||
unsigned long offset = 0;
|
unsigned long offset = 0;
|
||||||
|
@ -608,6 +660,10 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
|
||||||
int parents_shown = 0;
|
int parents_shown = 0;
|
||||||
const char *msg = commit->buffer;
|
const char *msg = commit->buffer;
|
||||||
int plain_non_ascii = 0;
|
int plain_non_ascii = 0;
|
||||||
|
char *reencoded = logmsg_reencode(commit);
|
||||||
|
|
||||||
|
if (reencoded)
|
||||||
|
msg = reencoded;
|
||||||
|
|
||||||
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
|
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
|
||||||
indent = 0;
|
indent = 0;
|
||||||
|
@ -624,7 +680,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
|
||||||
for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
|
for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
|
||||||
if (!in_body) {
|
if (!in_body) {
|
||||||
/* author could be non 7-bit ASCII but
|
/* author could be non 7-bit ASCII but
|
||||||
* the log may so; skip over the
|
* the log may be so; skip over the
|
||||||
* header part first.
|
* header part first.
|
||||||
*/
|
*/
|
||||||
if (ch == '\n' &&
|
if (ch == '\n' &&
|
||||||
|
@ -755,6 +811,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
|
||||||
if (fmt == CMIT_FMT_EMAIL && !body)
|
if (fmt == CMIT_FMT_EMAIL && !body)
|
||||||
buf[offset++] = '\n';
|
buf[offset++] = '\n';
|
||||||
buf[offset] = '\0';
|
buf[offset] = '\0';
|
||||||
|
|
||||||
|
free(reencoded);
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
8
config.c
8
config.c
|
@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strcmp(var, "i18n.commitencoding")) {
|
if (!strcmp(var, "i18n.commitencoding")) {
|
||||||
strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding));
|
git_commit_encoding = strdup(value);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(var, "i18n.logoutputencoding")) {
|
||||||
|
git_log_output_encoding = strdup(value);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
|
if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
|
||||||
pager_use_color = git_config_bool(var,value);
|
pager_use_color = git_config_bool(var,value);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -711,6 +711,7 @@ _git_repo_config ()
|
||||||
core.compression
|
core.compression
|
||||||
core.legacyHeaders
|
core.legacyHeaders
|
||||||
i18n.commitEncoding
|
i18n.commitEncoding
|
||||||
|
i18n.logOutputEncoding
|
||||||
diff.color
|
diff.color
|
||||||
color.diff
|
color.diff
|
||||||
diff.renameLimit
|
diff.renameLimit
|
||||||
|
|
|
@ -18,7 +18,8 @@ int prefer_symlink_refs;
|
||||||
int log_all_ref_updates;
|
int log_all_ref_updates;
|
||||||
int warn_ambiguous_refs = 1;
|
int warn_ambiguous_refs = 1;
|
||||||
int repository_format_version;
|
int repository_format_version;
|
||||||
char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
|
char *git_commit_encoding;
|
||||||
|
char *git_log_output_encoding;
|
||||||
int shared_repository = PERM_UMASK;
|
int shared_repository = PERM_UMASK;
|
||||||
const char *apply_default_whitespace;
|
const char *apply_default_whitespace;
|
||||||
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
|
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
|
||||||
|
|
|
@ -72,6 +72,7 @@ struct rev_info {
|
||||||
const char *ref_message_id;
|
const char *ref_message_id;
|
||||||
const char *add_signoff;
|
const char *add_signoff;
|
||||||
const char *extra_headers;
|
const char *extra_headers;
|
||||||
|
const char *log_reencode;
|
||||||
|
|
||||||
/* Filter by commit log message */
|
/* Filter by commit log message */
|
||||||
struct grep_opt *grep_filter;
|
struct grep_opt *grep_filter;
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright (c) 2006 Junio C Hamano
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='commit and log output encodings'
|
||||||
|
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
compare_with () {
|
||||||
|
git-show -s "$1" | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current &&
|
||||||
|
diff -u current "$2"
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success setup '
|
||||||
|
: >F &&
|
||||||
|
git-add F &&
|
||||||
|
T=$(git-write-tree) &&
|
||||||
|
C=$(git-commit-tree $T <../t3900/1-UTF-8.txt) &&
|
||||||
|
git-update-ref HEAD $C &&
|
||||||
|
git-tag C0
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'no encoding header for base case' '
|
||||||
|
E=$(git-cat-file commit C0 | sed -ne "s/^encoding //p") &&
|
||||||
|
test z = "z$E"
|
||||||
|
'
|
||||||
|
|
||||||
|
for H in ISO-8859-1 EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "$H setup" '
|
||||||
|
git-repo-config i18n.commitencoding $H &&
|
||||||
|
git-checkout -b $H C0 &&
|
||||||
|
echo $H >F &&
|
||||||
|
git-commit -a -F ../t3900/$H.txt
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
for H in ISO-8859-1 EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "check encoding header for $H" '
|
||||||
|
E=$(git-cat-file commit '$H' | sed -ne "s/^encoding //p") &&
|
||||||
|
test "z$E" = "z'$H'"
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
test_expect_success 'repo-config to remove customization' '
|
||||||
|
git-repo-config --unset-all i18n.commitencoding &&
|
||||||
|
if Z=$(git-repo-config --get-all i18n.commitencoding)
|
||||||
|
then
|
||||||
|
echo Oops, should have failed.
|
||||||
|
false
|
||||||
|
else
|
||||||
|
test z = "z$Z"
|
||||||
|
fi &&
|
||||||
|
git-repo-config i18n.commitencoding utf-8
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
|
||||||
|
compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
for H in EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "$H should be shown in UTF-8 now" '
|
||||||
|
compare_with '$H' ../t3900/2-UTF-8.txt
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
test_expect_success 'repo-config to add customization' '
|
||||||
|
git-repo-config --unset-all i18n.commitencoding &&
|
||||||
|
if Z=$(git-repo-config --get-all i18n.commitencoding)
|
||||||
|
then
|
||||||
|
echo Oops, should have failed.
|
||||||
|
false
|
||||||
|
else
|
||||||
|
test z = "z$Z"
|
||||||
|
fi
|
||||||
|
'
|
||||||
|
|
||||||
|
for H in ISO-8859-1 EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "$H should be shown in itself now" '
|
||||||
|
git-repo-config i18n.commitencoding '$H' &&
|
||||||
|
compare_with '$H' ../t3900/'$H'.txt
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
test_expect_success 'repo-config to tweak customization' '
|
||||||
|
git-repo-config i18n.logoutputencoding utf-8
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
|
||||||
|
compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
for H in EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "$H should be shown in UTF-8 now" '
|
||||||
|
compare_with '$H' ../t3900/2-UTF-8.txt
|
||||||
|
'
|
||||||
|
done
|
||||||
|
|
||||||
|
for J in EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
git-repo-config i18n.logoutputencoding $J
|
||||||
|
for H in EUCJP ISO-2022-JP
|
||||||
|
do
|
||||||
|
test_expect_success "$H should be shown in $J now" '
|
||||||
|
compare_with '$H' ../t3900/'$J'.txt
|
||||||
|
'
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
test_done
|
|
@ -0,0 +1,3 @@
|
||||||
|
ÄËÑÏÖ
|
||||||
|
|
||||||
|
Ábçdèfg
|
|
@ -0,0 +1,4 @@
|
||||||
|
はれひほふ
|
||||||
|
|
||||||
|
しているのが、いるので。
|
||||||
|
濱浜ほれぷりぽれまびぐりろへ。
|
|
@ -0,0 +1,4 @@
|
||||||
|
はれひほふ
|
||||||
|
|
||||||
|
しているのが、いるので。
|
||||||
|
濱浜ほれぷりぽれまびぐりろへ。
|
|
@ -0,0 +1,4 @@
|
||||||
|
$B$O$l$R$[$U(B
|
||||||
|
|
||||||
|
$B$7$F$$$k$N$,!"$$$k$N$G!#(B
|
||||||
|
$B_@IM$[$l$W$j$]$l$^$S$0$j$m$X!#(B
|
|
@ -0,0 +1,3 @@
|
||||||
|
トヒムマヨ
|
||||||
|
|
||||||
|
チb軼鑁g
|
54
utf8.c
54
utf8.c
|
@ -276,3 +276,57 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a buffer and its encoding, return it re-encoded
|
||||||
|
* with iconv. If the conversion fails, returns NULL.
|
||||||
|
*/
|
||||||
|
#ifndef NO_ICONV
|
||||||
|
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
|
||||||
|
{
|
||||||
|
iconv_t conv;
|
||||||
|
size_t insz, outsz, outalloc;
|
||||||
|
char *out, *outpos, *cp;
|
||||||
|
|
||||||
|
if (!in_encoding)
|
||||||
|
return NULL;
|
||||||
|
conv = iconv_open(out_encoding, in_encoding);
|
||||||
|
if (conv == (iconv_t) -1)
|
||||||
|
return NULL;
|
||||||
|
insz = strlen(in);
|
||||||
|
outsz = insz;
|
||||||
|
outalloc = outsz + 1; /* for terminating NUL */
|
||||||
|
out = xmalloc(outalloc);
|
||||||
|
outpos = out;
|
||||||
|
cp = (char *)in;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz);
|
||||||
|
|
||||||
|
if (cnt == -1) {
|
||||||
|
size_t sofar;
|
||||||
|
if (errno != E2BIG) {
|
||||||
|
free(out);
|
||||||
|
iconv_close(conv);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/* insz has remaining number of bytes.
|
||||||
|
* since we started outsz the same as insz,
|
||||||
|
* it is likely that insz is not enough for
|
||||||
|
* converting the rest.
|
||||||
|
*/
|
||||||
|
sofar = outpos - out;
|
||||||
|
outalloc = sofar + insz * 2 + 32;
|
||||||
|
out = xrealloc(out, outalloc);
|
||||||
|
outpos = out + sofar;
|
||||||
|
outsz = outalloc - sofar - 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*outpos = '\0';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iconv_close(conv);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
6
utf8.h
6
utf8.h
|
@ -5,4 +5,10 @@ int utf8_width(const char **start);
|
||||||
int is_utf8(const char *text);
|
int is_utf8(const char *text);
|
||||||
void print_wrapped_text(const char *text, int indent, int indent2, int len);
|
void print_wrapped_text(const char *text, int indent, int indent2, int len);
|
||||||
|
|
||||||
|
#ifndef NO_ICONV
|
||||||
|
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding);
|
||||||
|
#else
|
||||||
|
#define reencode_string(a,b,c) NULL
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue