Browse Source
Teaches git to normalize pathnames read from readdir(3) and all arguments from the command line into precomposed UTF-8 (assuming that they come as decomposed UTF-8) to work around issues on Mac OS. I think there still are other places that need conversion (e.g. paths that are read from stdin for some commands), but this should be a good first step in the right direction. * tb/sanitize-decomposed-utf-8-pathname: git on Mac OS and precomposed unicodemaint
Junio C Hamano
12 years ago
13 changed files with 446 additions and 10 deletions
@ -0,0 +1,190 @@
@@ -0,0 +1,190 @@
|
||||
/* |
||||
* Converts filenames from decomposed unicode into precomposed unicode. |
||||
* Used on MacOS X. |
||||
*/ |
||||
|
||||
|
||||
#define PRECOMPOSE_UNICODE_C |
||||
|
||||
#include "cache.h" |
||||
#include "utf8.h" |
||||
#include "precompose_utf8.h" |
||||
|
||||
typedef char *iconv_ibp; |
||||
const static char *repo_encoding = "UTF-8"; |
||||
const static char *path_encoding = "UTF-8-MAC"; |
||||
|
||||
|
||||
static size_t has_utf8(const char *s, size_t maxlen, size_t *strlen_c) |
||||
{ |
||||
const uint8_t *utf8p = (const uint8_t*) s; |
||||
size_t strlen_chars = 0; |
||||
size_t ret = 0; |
||||
|
||||
if ((!utf8p) || (!*utf8p)) { |
||||
return 0; |
||||
} |
||||
|
||||
while((*utf8p) && maxlen) { |
||||
if (*utf8p & 0x80) |
||||
ret++; |
||||
strlen_chars++; |
||||
utf8p++; |
||||
maxlen--; |
||||
} |
||||
if (strlen_c) |
||||
*strlen_c = strlen_chars; |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
|
||||
void probe_utf8_pathname_composition(char *path, int len) |
||||
{ |
||||
const static char *auml_nfc = "\xc3\xa4"; |
||||
const static char *auml_nfd = "\x61\xcc\x88"; |
||||
int output_fd; |
||||
if (precomposed_unicode != -1) |
||||
return; /* We found it defined in the global config, respect it */ |
||||
path[len] = 0; |
||||
strcpy(path + len, auml_nfc); |
||||
output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); |
||||
if (output_fd >=0) { |
||||
close(output_fd); |
||||
path[len] = 0; |
||||
strcpy(path + len, auml_nfd); |
||||
/* Indicate to the user, that we can configure it to true */ |
||||
if (0 == access(path, R_OK)) |
||||
git_config_set("core.precomposeunicode", "false"); |
||||
/* To be backward compatible, set precomposed_unicode to 0 */ |
||||
precomposed_unicode = 0; |
||||
path[len] = 0; |
||||
strcpy(path + len, auml_nfc); |
||||
unlink(path); |
||||
} |
||||
} |
||||
|
||||
|
||||
void precompose_argv(int argc, const char **argv) |
||||
{ |
||||
int i = 0; |
||||
const char *oldarg; |
||||
char *newarg; |
||||
iconv_t ic_precompose; |
||||
|
||||
if (precomposed_unicode != 1) |
||||
return; |
||||
|
||||
ic_precompose = iconv_open(repo_encoding, path_encoding); |
||||
if (ic_precompose == (iconv_t) -1) |
||||
return; |
||||
|
||||
while (i < argc) { |
||||
size_t namelen; |
||||
oldarg = argv[i]; |
||||
if (has_utf8(oldarg, (size_t)-1, &namelen)) { |
||||
newarg = reencode_string_iconv(oldarg, namelen, ic_precompose); |
||||
if (newarg) |
||||
argv[i] = newarg; |
||||
} |
||||
i++; |
||||
} |
||||
iconv_close(ic_precompose); |
||||
} |
||||
|
||||
|
||||
PREC_DIR *precompose_utf8_opendir(const char *dirname) |
||||
{ |
||||
PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR)); |
||||
prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx)); |
||||
prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name); |
||||
|
||||
prec_dir->dirp = opendir(dirname); |
||||
if (!prec_dir->dirp) { |
||||
free(prec_dir->dirent_nfc); |
||||
free(prec_dir); |
||||
return NULL; |
||||
} else { |
||||
int ret_errno = errno; |
||||
prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding); |
||||
/* if iconv_open() fails, die() in readdir() if needed */ |
||||
errno = ret_errno; |
||||
} |
||||
|
||||
return prec_dir; |
||||
} |
||||
|
||||
struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir) |
||||
{ |
||||
struct dirent *res; |
||||
res = readdir(prec_dir->dirp); |
||||
if (res) { |
||||
size_t namelenz = strlen(res->d_name) + 1; /* \0 */ |
||||
size_t new_maxlen = namelenz; |
||||
|
||||
int ret_errno = errno; |
||||
|
||||
if (new_maxlen > prec_dir->dirent_nfc->max_name_len) { |
||||
size_t new_len = sizeof(dirent_prec_psx) + new_maxlen - |
||||
sizeof(prec_dir->dirent_nfc->d_name); |
||||
|
||||
prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len); |
||||
prec_dir->dirent_nfc->max_name_len = new_maxlen; |
||||
} |
||||
|
||||
prec_dir->dirent_nfc->d_ino = res->d_ino; |
||||
prec_dir->dirent_nfc->d_type = res->d_type; |
||||
|
||||
if ((precomposed_unicode == 1) && has_utf8(res->d_name, (size_t)-1, NULL)) { |
||||
if (prec_dir->ic_precompose == (iconv_t)-1) { |
||||
die("iconv_open(%s,%s) failed, but needed:\n" |
||||
" precomposed unicode is not supported.\n" |
||||
" If you wnat to use decomposed unicode, run\n" |
||||
" \"git config core.precomposeunicode false\"\n", |
||||
repo_encoding, path_encoding); |
||||
} else { |
||||
iconv_ibp cp = (iconv_ibp)res->d_name; |
||||
size_t inleft = namelenz; |
||||
char *outpos = &prec_dir->dirent_nfc->d_name[0]; |
||||
size_t outsz = prec_dir->dirent_nfc->max_name_len; |
||||
size_t cnt; |
||||
errno = 0; |
||||
cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); |
||||
if (errno || inleft) { |
||||
/* |
||||
* iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF |
||||
* MacOS X avoids illegal byte sequemces. |
||||
* If they occur on a mounted drive (e.g. NFS) it is not worth to |
||||
* die() for that, but rather let the user see the original name |
||||
*/ |
||||
namelenz = 0; /* trigger strlcpy */ |
||||
} |
||||
} |
||||
} |
||||
else |
||||
namelenz = 0; |
||||
|
||||
if (!namelenz) |
||||
strlcpy(prec_dir->dirent_nfc->d_name, res->d_name, |
||||
prec_dir->dirent_nfc->max_name_len); |
||||
|
||||
errno = ret_errno; |
||||
return prec_dir->dirent_nfc; |
||||
} |
||||
return NULL; |
||||
} |
||||
|
||||
|
||||
int precompose_utf8_closedir(PREC_DIR *prec_dir) |
||||
{ |
||||
int ret_value; |
||||
int ret_errno; |
||||
ret_value = closedir(prec_dir->dirp); |
||||
ret_errno = errno; |
||||
if (prec_dir->ic_precompose != (iconv_t)-1) |
||||
iconv_close(prec_dir->ic_precompose); |
||||
free(prec_dir->dirent_nfc); |
||||
free(prec_dir); |
||||
errno = ret_errno; |
||||
return ret_value; |
||||
} |
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
#ifndef PRECOMPOSE_UNICODE_H |
||||
#include <sys/stat.h> |
||||
#include <sys/types.h> |
||||
#include <dirent.h> |
||||
#include <iconv.h> |
||||
|
||||
|
||||
typedef struct dirent_prec_psx { |
||||
ino_t d_ino; /* Posix */ |
||||
size_t max_name_len; /* See below */ |
||||
unsigned char d_type; /* available on all systems git runs on */ |
||||
|
||||
/* |
||||
* See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html |
||||
* NAME_MAX + 1 should be enough, but some systems have |
||||
* NAME_MAX=255 and strlen(d_name) may return 508 or 510 |
||||
* Solution: allocate more when needed, see precompose_utf8_readdir() |
||||
*/ |
||||
char d_name[NAME_MAX+1]; |
||||
} dirent_prec_psx; |
||||
|
||||
|
||||
typedef struct { |
||||
iconv_t ic_precompose; |
||||
DIR *dirp; |
||||
struct dirent_prec_psx *dirent_nfc; |
||||
} PREC_DIR; |
||||
|
||||
void precompose_argv(int argc, const char **argv); |
||||
void probe_utf8_pathname_composition(char *, int); |
||||
|
||||
PREC_DIR *precompose_utf8_opendir(const char *dirname); |
||||
struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *dirp); |
||||
int precompose_utf8_closedir(PREC_DIR *dirp); |
||||
|
||||
#ifndef PRECOMPOSE_UNICODE_C |
||||
#define dirent dirent_prec_psx |
||||
#define opendir(n) precompose_utf8_opendir(n) |
||||
#define readdir(d) precompose_utf8_readdir(d) |
||||
#define closedir(d) precompose_utf8_closedir(d) |
||||
#define DIR PREC_DIR |
||||
#endif /* PRECOMPOSE_UNICODE_C */ |
||||
|
||||
#define PRECOMPOSE_UNICODE_H |
||||
#endif /* PRECOMPOSE_UNICODE_H */ |
@ -0,0 +1,164 @@
@@ -0,0 +1,164 @@
|
||||
#!/bin/sh |
||||
# |
||||
# Copyright (c) 2012 Torsten Bögershausen |
||||
# |
||||
|
||||
test_description='utf-8 decomposed (nfd) converted to precomposed (nfc)' |
||||
|
||||
. ./test-lib.sh |
||||
|
||||
Adiarnfc=`printf '\303\204'` |
||||
Adiarnfd=`printf 'A\314\210'` |
||||
|
||||
# check if the feature is compiled in |
||||
mkdir junk && |
||||
>junk/"$Adiarnfc" && |
||||
case "$(cd junk && echo *)" in |
||||
"$Adiarnfd") |
||||
test_nfd=1 |
||||
;; |
||||
*) ;; |
||||
esac |
||||
rm -rf junk |
||||
|
||||
|
||||
if test "$test_nfd" |
||||
then |
||||
# create more utf-8 variables |
||||
Odiarnfc=`printf '\303\226'` |
||||
Odiarnfd=`printf 'O\314\210'` |
||||
AEligatu=`printf '\303\206'` |
||||
Invalidu=`printf '\303\377'` |
||||
|
||||
|
||||
#Create a string with 255 bytes (decomposed) |
||||
Alongd=$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd #21 Byte |
||||
Alongd=$Alongd$Alongd$Alongd #63 Byte |
||||
Alongd=$Alongd$Alongd$Alongd$Alongd$Adiarnfd #255 Byte |
||||
|
||||
#Create a string with 254 bytes (precomposed) |
||||
Alongc=$AEligatu$AEligatu$AEligatu$AEligatu$AEligatu #10 Byte |
||||
Alongc=$Alongc$Alongc$Alongc$Alongc$Alongc #50 Byte |
||||
Alongc=$Alongc$Alongc$Alongc$Alongc$Alongc #250 Byte |
||||
Alongc=$Alongc$AEligatu$AEligatu #254 Byte |
||||
|
||||
test_expect_success "detect if nfd needed" ' |
||||
precomposeunicode=`git config core.precomposeunicode` && |
||||
test "$precomposeunicode" = false && |
||||
git config core.precomposeunicode true |
||||
' |
||||
test_expect_success "setup" ' |
||||
>x && |
||||
git add x && |
||||
git commit -m "1st commit" && |
||||
git rm x && |
||||
git commit -m "rm x" |
||||
' |
||||
test_expect_success "setup case mac" ' |
||||
git checkout -b mac_os |
||||
' |
||||
# This will test nfd2nfc in readdir() |
||||
test_expect_success "add file Adiarnfc" ' |
||||
echo f.Adiarnfc >f.$Adiarnfc && |
||||
git add f.$Adiarnfc && |
||||
git commit -m "add f.$Adiarnfc" |
||||
' |
||||
# This will test nfd2nfc in git stage() |
||||
test_expect_success "stage file d.Adiarnfd/f.Adiarnfd" ' |
||||
mkdir d.$Adiarnfd && |
||||
echo d.$Adiarnfd/f.$Adiarnfd >d.$Adiarnfd/f.$Adiarnfd && |
||||
git stage d.$Adiarnfd/f.$Adiarnfd && |
||||
git commit -m "add d.$Adiarnfd/f.$Adiarnfd" |
||||
' |
||||
test_expect_success "add link Adiarnfc" ' |
||||
ln -s d.$Adiarnfd/f.$Adiarnfd l.$Adiarnfc && |
||||
git add l.$Adiarnfc && |
||||
git commit -m "add l.Adiarnfc" |
||||
' |
||||
# This will test git log |
||||
test_expect_success "git log f.Adiar" ' |
||||
git log f.$Adiarnfc > f.Adiarnfc.log && |
||||
git log f.$Adiarnfd > f.Adiarnfd.log && |
||||
test -s f.Adiarnfc.log && |
||||
test -s f.Adiarnfd.log && |
||||
test_cmp f.Adiarnfc.log f.Adiarnfd.log && |
||||
rm f.Adiarnfc.log f.Adiarnfd.log |
||||
' |
||||
# This will test git ls-files |
||||
test_expect_success "git lsfiles f.Adiar" ' |
||||
git ls-files f.$Adiarnfc > f.Adiarnfc.log && |
||||
git ls-files f.$Adiarnfd > f.Adiarnfd.log && |
||||
test -s f.Adiarnfc.log && |
||||
test -s f.Adiarnfd.log && |
||||
test_cmp f.Adiarnfc.log f.Adiarnfd.log && |
||||
rm f.Adiarnfc.log f.Adiarnfd.log |
||||
' |
||||
# This will test git mv |
||||
test_expect_success "git mv" ' |
||||
git mv f.$Adiarnfd f.$Odiarnfc && |
||||
git mv d.$Adiarnfd d.$Odiarnfc && |
||||
git mv l.$Adiarnfd l.$Odiarnfc && |
||||
git commit -m "mv Adiarnfd Odiarnfc" |
||||
' |
||||
# Files can be checked out as nfc |
||||
# And the link has been corrected from nfd to nfc |
||||
test_expect_success "git checkout nfc" ' |
||||
rm f.$Odiarnfc && |
||||
git checkout f.$Odiarnfc |
||||
' |
||||
# Make it possible to checkout files with their NFD names |
||||
test_expect_success "git checkout file nfd" ' |
||||
rm -f f.* && |
||||
git checkout f.$Odiarnfd |
||||
' |
||||
# Make it possible to checkout links with their NFD names |
||||
test_expect_success "git checkout link nfd" ' |
||||
rm l.* && |
||||
git checkout l.$Odiarnfd |
||||
' |
||||
test_expect_success "setup case mac2" ' |
||||
git checkout master && |
||||
git reset --hard && |
||||
git checkout -b mac_os_2 |
||||
' |
||||
# This will test nfd2nfc in git commit |
||||
test_expect_success "commit file d2.Adiarnfd/f.Adiarnfd" ' |
||||
mkdir d2.$Adiarnfd && |
||||
echo d2.$Adiarnfd/f.$Adiarnfd >d2.$Adiarnfd/f.$Adiarnfd && |
||||
git add d2.$Adiarnfd/f.$Adiarnfd && |
||||
git commit -m "add d2.$Adiarnfd/f.$Adiarnfd" -- d2.$Adiarnfd/f.$Adiarnfd |
||||
' |
||||
test_expect_success "setup for long decomposed filename" ' |
||||
git checkout master && |
||||
git reset --hard && |
||||
git checkout -b mac_os_long_nfd_fn |
||||
' |
||||
test_expect_success "Add long decomposed filename" ' |
||||
echo longd >$Alongd && |
||||
git add * && |
||||
git commit -m "Long filename" |
||||
' |
||||
test_expect_success "setup for long precomposed filename" ' |
||||
git checkout master && |
||||
git reset --hard && |
||||
git checkout -b mac_os_long_nfc_fn |
||||
' |
||||
test_expect_success "Add long precomposed filename" ' |
||||
echo longc >$Alongc && |
||||
git add * && |
||||
git commit -m "Long filename" |
||||
' |
||||
# Test if the global core.precomposeunicode stops autosensing |
||||
# Must be the last test case |
||||
test_expect_success "respect git config --global core.precomposeunicode" ' |
||||
git config --global core.precomposeunicode true && |
||||
rm -rf .git && |
||||
git init && |
||||
precomposeunicode=`git config core.precomposeunicode` && |
||||
test "$precomposeunicode" = "true" |
||||
' |
||||
else |
||||
say "Skipping nfc/nfd tests" |
||||
fi |
||||
|
||||
test_done |
Loading…
Reference in new issue