You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
80 lines
1.7 KiB
80 lines
1.7 KiB
#include "test-tool.h" |
|
|
|
static const char *utf8_replace_character = "�"; |
|
|
|
/* |
|
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded |
|
* in an XML file. |
|
*/ |
|
int cmd__xml_encode(int argc, const char **argv) |
|
{ |
|
unsigned char buf[1024], tmp[4], *tmp2 = NULL; |
|
ssize_t cur = 0, len = 1, remaining = 0; |
|
unsigned char ch; |
|
|
|
for (;;) { |
|
if (++cur == len) { |
|
len = xread(0, buf, sizeof(buf)); |
|
if (!len) |
|
return 0; |
|
if (len < 0) |
|
die_errno("Could not read <stdin>"); |
|
cur = 0; |
|
} |
|
ch = buf[cur]; |
|
|
|
if (tmp2) { |
|
if ((ch & 0xc0) != 0x80) { |
|
fputs(utf8_replace_character, stdout); |
|
tmp2 = NULL; |
|
cur--; |
|
continue; |
|
} |
|
*tmp2 = ch; |
|
tmp2++; |
|
if (--remaining == 0) { |
|
fwrite(tmp, tmp2 - tmp, 1, stdout); |
|
tmp2 = NULL; |
|
} |
|
continue; |
|
} |
|
|
|
if (!(ch & 0x80)) { |
|
/* 0xxxxxxx */ |
|
if (ch == '&') |
|
fputs("&", stdout); |
|
else if (ch == '\'') |
|
fputs("'", stdout); |
|
else if (ch == '"') |
|
fputs(""", stdout); |
|
else if (ch == '<') |
|
fputs("<", stdout); |
|
else if (ch == '>') |
|
fputs(">", stdout); |
|
else if (ch >= 0x20) |
|
fputc(ch, stdout); |
|
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) |
|
fprintf(stdout, "&#x%02x;", ch); |
|
else |
|
fputs(utf8_replace_character, stdout); |
|
} else if ((ch & 0xe0) == 0xc0) { |
|
/* 110XXXXx 10xxxxxx */ |
|
tmp[0] = ch; |
|
remaining = 1; |
|
tmp2 = tmp + 1; |
|
} else if ((ch & 0xf0) == 0xe0) { |
|
/* 1110XXXX 10Xxxxxx 10xxxxxx */ |
|
tmp[0] = ch; |
|
remaining = 2; |
|
tmp2 = tmp + 1; |
|
} else if ((ch & 0xf8) == 0xf0) { |
|
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ |
|
tmp[0] = ch; |
|
remaining = 3; |
|
tmp2 = tmp + 1; |
|
} else |
|
fputs(utf8_replace_character, stdout); |
|
} |
|
|
|
return 0; |
|
}
|
|
|