|
|
|
#ifdef __MINGW64_VERSION_MAJOR
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <wchar.h>
|
|
|
|
typedef _sigset_t sigset_t;
|
|
|
|
#endif
|
|
|
|
#include <winsock2.h>
|
|
|
|
#include <ws2tcpip.h>
|
|
|
|
|
|
|
|
/* MinGW-w64 reports to have flockfile, but it does not actually have it. */
|
|
|
|
#ifdef __MINGW64_VERSION_MAJOR
|
|
|
|
#undef _POSIX_THREAD_SAFE_FUNCTIONS
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int mingw_core_config(const char *var, const char *value, void *cb);
|
|
|
|
#define platform_core_config mingw_core_config
|
|
|
|
|
|
|
|
/*
|
|
|
|
* things that are not available in header files
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef int uid_t;
|
|
|
|
typedef int socklen_t;
|
|
|
|
#ifndef __MINGW64_VERSION_MAJOR
|
|
|
|
typedef int pid_t;
|
|
|
|
#define hstrerror strerror
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define S_IFLNK 0120000 /* Symbolic link */
|
|
|
|
#define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK)
|
|
|
|
#define S_ISSOCK(x) 0
|
|
|
|
|
|
|
|
#ifndef S_IRWXG
|
|
|
|
#define S_IRGRP 0
|
|
|
|
#define S_IWGRP 0
|
|
|
|
#define S_IXGRP 0
|
|
|
|
#define S_IRWXG (S_IRGRP | S_IWGRP | S_IXGRP)
|
|
|
|
#endif
|
|
|
|
#ifndef S_IRWXO
|
|
|
|
#define S_IROTH 0
|
|
|
|
#define S_IWOTH 0
|
|
|
|
#define S_IXOTH 0
|
|
|
|
#define S_IRWXO (S_IROTH | S_IWOTH | S_IXOTH)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define S_ISUID 0004000
|
|
|
|
#define S_ISGID 0002000
|
|
|
|
#define S_ISVTX 0001000
|
|
|
|
|
|
|
|
#define WIFEXITED(x) 1
|
|
|
|
#define WIFSIGNALED(x) 0
|
|
|
|
#define WEXITSTATUS(x) ((x) & 0xff)
|
|
|
|
#define WTERMSIG(x) SIGTERM
|
|
|
|
|
|
|
|
#ifndef EWOULDBLOCK
|
|
|
|
#define EWOULDBLOCK EAGAIN
|
|
|
|
#endif
|
|
|
|
#ifndef ELOOP
|
|
|
|
#define ELOOP EMLINK
|
|
|
|
#endif
|
|
|
|
#define SHUT_WR SD_SEND
|
|
|
|
|
|
|
|
#define SIGHUP 1
|
|
|
|
#define SIGQUIT 3
|
|
|
|
#define SIGKILL 9
|
|
|
|
#define SIGPIPE 13
|
|
|
|
#define SIGALRM 14
|
|
|
|
#define SIGCHLD 17
|
|
|
|
|
|
|
|
#define F_GETFD 1
|
|
|
|
#define F_SETFD 2
|
|
|
|
#define FD_CLOEXEC 0x1
|
|
|
|
|
|
|
|
#if !defined O_CLOEXEC && defined O_NOINHERIT
|
|
|
|
#define O_CLOEXEC O_NOINHERIT
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef EAFNOSUPPORT
|
|
|
|
#define EAFNOSUPPORT WSAEAFNOSUPPORT
|
|
|
|
#endif
|
|
|
|
#ifndef ECONNABORTED
|
|
|
|
#define ECONNABORTED WSAECONNABORTED
|
|
|
|
#endif
|
|
|
|
#ifndef ENOTSOCK
|
|
|
|
#define ENOTSOCK WSAENOTSOCK
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct passwd {
|
|
|
|
char *pw_name;
|
|
|
|
char *pw_gecos;
|
|
|
|
char *pw_dir;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef void (__cdecl *sig_handler_t)(int);
|
|
|
|
struct sigaction {
|
|
|
|
sig_handler_t sa_handler;
|
|
|
|
unsigned sa_flags;
|
|
|
|
};
|
|
|
|
#define SA_RESTART 0
|
|
|
|
|
|
|
|
struct itimerval {
|
|
|
|
struct timeval it_value, it_interval;
|
|
|
|
};
|
|
|
|
#define ITIMER_REAL 0
|
|
|
|
|
|
|
|
struct utsname {
|
|
|
|
char sysname[16];
|
|
|
|
char nodename[1];
|
|
|
|
char release[16];
|
|
|
|
char version[16];
|
|
|
|
char machine[1];
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sanitize preprocessor namespace polluted by Windows headers defining
|
|
|
|
* macros which collide with git local versions
|
|
|
|
*/
|
|
|
|
#undef HELP_COMMAND /* from winuser.h */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* trivial stubs
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline int readlink(const char *path, char *buf, size_t bufsiz)
|
|
|
|
{ errno = ENOSYS; return -1; }
|
|
|
|
static inline int symlink(const char *oldpath, const char *newpath)
|
|
|
|
{ errno = ENOSYS; return -1; }
|
|
|
|
static inline int fchmod(int fildes, mode_t mode)
|
|
|
|
{ errno = ENOSYS; return -1; }
|
|
|
|
#ifndef __MINGW64_VERSION_MAJOR
|
|
|
|
static inline pid_t fork(void)
|
|
|
|
{ errno = ENOSYS; return -1; }
|
|
|
|
#endif
|
|
|
|
static inline unsigned int alarm(unsigned int seconds)
|
|
|
|
{ return 0; }
|
|
|
|
static inline int fsync(int fd)
|
|
|
|
{ return _commit(fd); }
|
|
|
|
static inline void sync(void)
|
|
|
|
{}
|
|
|
|
static inline uid_t getuid(void)
|
|
|
|
{ return 1; }
|
|
|
|
static inline struct passwd *getpwnam(const char *name)
|
|
|
|
{ return NULL; }
|
|
|
|
static inline int fcntl(int fd, int cmd, ...)
|
|
|
|
{
|
|
|
|
if (cmd == F_GETFD || cmd == F_SETFD)
|
|
|
|
return 0;
|
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define sigemptyset(x) (void)0
|
|
|
|
static inline int sigaddset(sigset_t *set, int signum)
|
|
|
|
{ return 0; }
|
|
|
|
#define SIG_BLOCK 0
|
|
|
|
#define SIG_UNBLOCK 0
|
|
|
|
static inline int sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
|
|
|
|
{ return 0; }
|
|
|
|
static inline pid_t getppid(void)
|
|
|
|
{ return 1; }
|
|
|
|
static inline pid_t getpgid(pid_t pid)
|
|
|
|
{ return pid == 0 ? getpid() : pid; }
|
|
|
|
static inline pid_t tcgetpgrp(int fd)
|
|
|
|
{ return getpid(); }
|
|
|
|
|
|
|
|
/*
|
|
|
|
* simple adaptors
|
|
|
|
*/
|
|
|
|
|
|
|
|
int mingw_mkdir(const char *path, int mode);
|
|
|
|
#define mkdir mingw_mkdir
|
|
|
|
|
|
|
|
#define WNOHANG 1
|
|
|
|
pid_t waitpid(pid_t pid, int *status, int options);
|
|
|
|
|
|
|
|
#define kill mingw_kill
|
|
|
|
int mingw_kill(pid_t pid, int sig);
|
|
|
|
|
|
|
|
#ifndef NO_OPENSSL
|
|
|
|
#include <openssl/ssl.h>
|
|
|
|
static inline int mingw_SSL_set_fd(SSL *ssl, int fd)
|
|
|
|
{
|
|
|
|
return SSL_set_fd(ssl, _get_osfhandle(fd));
|
|
|
|
}
|
|
|
|
#define SSL_set_fd mingw_SSL_set_fd
|
|
|
|
|
|
|
|
static inline int mingw_SSL_set_rfd(SSL *ssl, int fd)
|
|
|
|
{
|
|
|
|
return SSL_set_rfd(ssl, _get_osfhandle(fd));
|
|
|
|
}
|
|
|
|
#define SSL_set_rfd mingw_SSL_set_rfd
|
|
|
|
|
|
|
|
static inline int mingw_SSL_set_wfd(SSL *ssl, int fd)
|
|
|
|
{
|
|
|
|
return SSL_set_wfd(ssl, _get_osfhandle(fd));
|
|
|
|
}
|
|
|
|
#define SSL_set_wfd mingw_SSL_set_wfd
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* implementations of missing functions
|
|
|
|
*/
|
|
|
|
|
Windows: A pipe() replacement whose ends are not inherited to children.
On Unix the idiom to use a pipe is as follows:
pipe(fd);
pid = fork();
if (!pid) {
dup2(fd[1], 1);
close(fd[1]);
close(fd[0]);
...
}
close(fd[1]);
i.e. the child process closes the both pipe ends after duplicating one
to the file descriptors where they are needed.
On Windows, which does not have fork(), we never have an opportunity to
(1) duplicate a pipe end in the child, (2) close unused pipe ends. Instead,
we must use this idiom:
save1 = dup(1);
pipe(fd);
dup2(fd[1], 1);
spawn(...);
dup2(save1, 1);
close(fd[1]);
i.e. save away the descriptor at the destination slot, replace by the pipe
end, spawn process, restore the saved file.
But there is a problem: Notice that the child did not only inherit the
dup2()ed descriptor, but also *both* original pipe ends. Although the one
end that was dup()ed could be closed before the spawn(), we cannot close
the other end - the child inherits it, no matter what.
The solution is to generate non-inheritable pipes. At the first glance,
this looks strange: The purpose of pipes is usually to be inherited to
child processes. But notice that in the course of actions as outlined
above, the pipe descriptor that we want to inherit to the child is
dup2()ed, and as it so happens, Windows's dup2() creates inheritable
duplicates.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
17 years ago
|
|
|
int pipe(int filedes[2]);
|
|
|
|
unsigned int sleep (unsigned int seconds);
|
|
|
|
int mkstemp(char *template);
|
|
|
|
int gettimeofday(struct timeval *tv, void *tz);
|
|
|
|
#ifndef __MINGW64_VERSION_MAJOR
|
|
|
|
struct tm *gmtime_r(const time_t *timep, struct tm *result);
|
|
|
|
struct tm *localtime_r(const time_t *timep, struct tm *result);
|
|
|
|
#endif
|
|
|
|
int getpagesize(void); /* defined in MinGW's libgcc.a */
|
|
|
|
struct passwd *getpwuid(uid_t uid);
|
|
|
|
int setitimer(int type, struct itimerval *in, struct itimerval *out);
|
|
|
|
int sigaction(int sig, struct sigaction *in, struct sigaction *out);
|
|
|
|
int link(const char *oldpath, const char *newpath);
|
|
|
|
int uname(struct utsname *buf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replacements of existing functions
|
|
|
|
*/
|
|
|
|
|
|
|
|
int mingw_unlink(const char *pathname);
|
|
|
|
#define unlink mingw_unlink
|
|
|
|
|
|
|
|
int mingw_rmdir(const char *path);
|
|
|
|
#define rmdir mingw_rmdir
|
|
|
|
|
|
|
|
int mingw_open (const char *filename, int oflags, ...);
|
|
|
|
#define open mingw_open
|
|
|
|
|
|
|
|
int mingw_fgetc(FILE *stream);
|
|
|
|
#define fgetc mingw_fgetc
|
|
|
|
|
|
|
|
FILE *mingw_fopen (const char *filename, const char *otype);
|
|
|
|
#define fopen mingw_fopen
|
|
|
|
|
|
|
|
FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream);
|
|
|
|
#define freopen mingw_freopen
|
|
|
|
|
|
|
|
int mingw_fflush(FILE *stream);
|
|
|
|
#define fflush mingw_fflush
|
|
|
|
|
|
|
|
ssize_t mingw_write(int fd, const void *buf, size_t len);
|
|
|
|
#define write mingw_write
|
|
|
|
|
|
|
|
int mingw_access(const char *filename, int mode);
|
|
|
|
#undef access
|
|
|
|
#define access mingw_access
|
|
|
|
|
|
|
|
int mingw_chdir(const char *dirname);
|
|
|
|
#define chdir mingw_chdir
|
|
|
|
|
|
|
|
int mingw_chmod(const char *filename, int mode);
|
|
|
|
#define chmod mingw_chmod
|
|
|
|
|
|
|
|
char *mingw_mktemp(char *template);
|
|
|
|
#define mktemp mingw_mktemp
|
|
|
|
|
|
|
|
char *mingw_getcwd(char *pointer, int len);
|
|
|
|
#define getcwd mingw_getcwd
|
|
|
|
|
mingw: reencode environment variables on the fly (UTF-16 <-> UTF-8)
On Windows, the authoritative environment is encoded in UTF-16. In Git
for Windows, we convert that to UTF-8 (because UTF-16 is *such* a
foreign idea to Git that its source code is unprepared for it).
Previously, out of performance concerns, we converted the entire
environment to UTF-8 in one fell swoop at the beginning, and upon
putenv() and run_command() converted it back.
Having a private copy of the environment comes with its own perils: when
a library used by Git's source code tries to modify the environment, it
does not really work (in Git for Windows' case, libcurl, see
https://github.com/git-for-windows/git/compare/bcad1e6d58^...bcad1e6d58^2
for a glimpse of the issues).
Hence, it makes our environment handling substantially more robust if we
switch to on-the-fly-conversion in `getenv()`/`putenv()` calls. Based
on an initial version in the MSVC context by Jeff Hostetler, this patch
makes it so.
Surprisingly, this has a *positive* effect on speed: at the time when
the current code was written, we tested the performance, and there were
*so many* `getenv()` calls that it seemed better to convert everything
in one go. In the meantime, though, Git has obviously been cleaned up a
bit with regards to `getenv()` calls so that the Git processes spawned
by the test suite use an average of only 40 `getenv()`/`putenv()` calls
over the process lifetime.
Speaking of the entire test suite: the total time spent in the
re-encoding in the current code takes about 32.4 seconds (out of 113
minutes runtime), whereas the code introduced in this patch takes only
about 8.2 seconds in total. Not much, but it proves that we need not be
concerned about the performance impact introduced by this patch.
Helped-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
#ifdef NO_UNSETENV
|
|
|
|
#error "NO_UNSETENV is incompatible with the Windows-specific startup code!"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We bind *env() routines (even the mingw_ ones) to private mingw_ versions.
|
|
|
|
* These talk to the CRT using UNICODE/wchar_t, but maintain the original
|
|
|
|
* narrow-char API.
|
|
|
|
*
|
|
|
|
* Note that the MSCRT maintains both ANSI (getenv()) and UNICODE (_wgetenv())
|
|
|
|
* routines and stores both versions of each environment variable in parallel
|
|
|
|
* (and secretly updates both when you set one or the other), but it uses CP_ACP
|
|
|
|
* to do the conversion rather than CP_UTF8.
|
|
|
|
*
|
|
|
|
* Since everything in the git code base is UTF8, we define the mingw_ routines
|
|
|
|
* to access the CRT using the UNICODE routines and manually convert them to
|
|
|
|
* UTF8. This also avoids round-trip problems.
|
|
|
|
*
|
|
|
|
* This also helps with our linkage, since "_wenviron" is publicly exported
|
|
|
|
* from the CRT. But to access "_environ" we would have to statically link
|
|
|
|
* to the CRT (/MT).
|
|
|
|
*
|
|
|
|
* We require NO_SETENV (and let gitsetenv() call our mingw_putenv).
|
|
|
|
*/
|
|
|
|
#define getenv mingw_getenv
|
|
|
|
#define putenv mingw_putenv
|
|
|
|
#define unsetenv mingw_putenv
|
|
|
|
char *mingw_getenv(const char *name);
|
mingw: reencode environment variables on the fly (UTF-16 <-> UTF-8)
On Windows, the authoritative environment is encoded in UTF-16. In Git
for Windows, we convert that to UTF-8 (because UTF-16 is *such* a
foreign idea to Git that its source code is unprepared for it).
Previously, out of performance concerns, we converted the entire
environment to UTF-8 in one fell swoop at the beginning, and upon
putenv() and run_command() converted it back.
Having a private copy of the environment comes with its own perils: when
a library used by Git's source code tries to modify the environment, it
does not really work (in Git for Windows' case, libcurl, see
https://github.com/git-for-windows/git/compare/bcad1e6d58^...bcad1e6d58^2
for a glimpse of the issues).
Hence, it makes our environment handling substantially more robust if we
switch to on-the-fly-conversion in `getenv()`/`putenv()` calls. Based
on an initial version in the MSVC context by Jeff Hostetler, this patch
makes it so.
Surprisingly, this has a *positive* effect on speed: at the time when
the current code was written, we tested the performance, and there were
*so many* `getenv()` calls that it seemed better to convert everything
in one go. In the meantime, though, Git has obviously been cleaned up a
bit with regards to `getenv()` calls so that the Git processes spawned
by the test suite use an average of only 40 `getenv()`/`putenv()` calls
over the process lifetime.
Speaking of the entire test suite: the total time spent in the
re-encoding in the current code takes about 32.4 seconds (out of 113
minutes runtime), whereas the code introduced in this patch takes only
about 8.2 seconds in total. Not much, but it proves that we need not be
concerned about the performance impact introduced by this patch.
Helped-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
int mingw_putenv(const char *name);
|
|
|
|
|
|
|
|
int mingw_gethostname(char *host, int namelen);
|
|
|
|
#define gethostname mingw_gethostname
|
|
|
|
|
|
|
|
struct hostent *mingw_gethostbyname(const char *host);
|
|
|
|
#define gethostbyname mingw_gethostbyname
|
|
|
|
|
|
|
|
int mingw_getaddrinfo(const char *node, const char *service,
|
|
|
|
const struct addrinfo *hints, struct addrinfo **res);
|
|
|
|
#define getaddrinfo mingw_getaddrinfo
|
|
|
|
|
|
|
|
int mingw_socket(int domain, int type, int protocol);
|
|
|
|
#define socket mingw_socket
|
|
|
|
|
|
|
|
int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz);
|
|
|
|
#define connect mingw_connect
|
|
|
|
|
|
|
|
int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz);
|
|
|
|
#define bind mingw_bind
|
|
|
|
|
|
|
|
int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen);
|
|
|
|
#define setsockopt mingw_setsockopt
|
|
|
|
|
|
|
|
int mingw_shutdown(int sockfd, int how);
|
|
|
|
#define shutdown mingw_shutdown
|
|
|
|
|
|
|
|
int mingw_listen(int sockfd, int backlog);
|
|
|
|
#define listen mingw_listen
|
|
|
|
|
|
|
|
int mingw_accept(int sockfd, struct sockaddr *sa, socklen_t *sz);
|
|
|
|
#define accept mingw_accept
|
|
|
|
|
|
|
|
int mingw_rename(const char*, const char*);
|
|
|
|
#define rename mingw_rename
|
|
|
|
|
|
|
|
#if defined(USE_WIN32_MMAP) || defined(_MSC_VER)
|
|
|
|
int mingw_getpagesize(void);
|
|
|
|
#define getpagesize mingw_getpagesize
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct rlimit {
|
|
|
|
unsigned int rlim_cur;
|
|
|
|
};
|
|
|
|
#define RLIMIT_NOFILE 0
|
|
|
|
|
|
|
|
static inline int getrlimit(int resource, struct rlimit *rlp)
|
|
|
|
{
|
|
|
|
if (resource != RLIMIT_NOFILE) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
rlp->rlim_cur = 2048;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
MSVC: fix stat definition hell
In msvc.h, there's a couple of stat related functions defined diffently
from mingw.h. When we remove these definitions, the only problem we get is
"warning C4005: '_stati64' : macro redefinition" for this line in mingw.h:
#define _stati64(x,y) mingw_stat(x,y)
The reason is that as of MSVCR80.dll (distributed with MSVC 2005), the
original _stati64 family of functions was renamed to _stat32i64, and the
former function names became macros (pointing to the appropriate function
based on the definition of _USE_32BIT_TIME_T).
Defining _stati64 works on MinGW because MinGW by default compiles against
the MSVCRT.DLL that is part of Windows (i.e. _stati64 is a function rather
than a macro).
Note: MinGW *can* compile for newer MSVC runtime versions, and MSVC
apparently can also compile for the Windows MSVCRT.DLL via the DDK (see
http://www.syndicateofideas.com/posts/fighting-the-msvcrt-dll-hell ).
Remove the stat definitions from msvc.h, as they are not compiler related.
In mingw.h, determine the runtime version in use from the definitions of
_stati64 and _USE_32BIT_TIME_T, and define stat() accordingly.
This also fixes that stat() in MSVC builds still resolves to mingw_lstat()
instead of mingw_stat().
Signed-off-by: Karsten Blees <blees@dcon.de>
Acked-by: Sebastian Schuberth <sschuberth@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
/*
|
|
|
|
* Use mingw specific stat()/lstat()/fstat() implementations on Windows,
|
|
|
|
* including our own struct stat with 64 bit st_size and nanosecond-precision
|
|
|
|
* file times.
|
|
|
|
*/
|
|
|
|
#ifndef __MINGW64_VERSION_MAJOR
|
|
|
|
#define off_t off64_t
|
|
|
|
#define lseek _lseeki64
|
|
|
|
#ifndef _MSC_VER
|
|
|
|
struct timespec {
|
|
|
|
time_t tv_sec;
|
|
|
|
long tv_nsec;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
#endif
|
MSVC: fix stat definition hell
In msvc.h, there's a couple of stat related functions defined diffently
from mingw.h. When we remove these definitions, the only problem we get is
"warning C4005: '_stati64' : macro redefinition" for this line in mingw.h:
#define _stati64(x,y) mingw_stat(x,y)
The reason is that as of MSVCR80.dll (distributed with MSVC 2005), the
original _stati64 family of functions was renamed to _stat32i64, and the
former function names became macros (pointing to the appropriate function
based on the definition of _USE_32BIT_TIME_T).
Defining _stati64 works on MinGW because MinGW by default compiles against
the MSVCRT.DLL that is part of Windows (i.e. _stati64 is a function rather
than a macro).
Note: MinGW *can* compile for newer MSVC runtime versions, and MSVC
apparently can also compile for the Windows MSVCRT.DLL via the DDK (see
http://www.syndicateofideas.com/posts/fighting-the-msvcrt-dll-hell ).
Remove the stat definitions from msvc.h, as they are not compiler related.
In mingw.h, determine the runtime version in use from the definitions of
_stati64 and _USE_32BIT_TIME_T, and define stat() accordingly.
This also fixes that stat() in MSVC builds still resolves to mingw_lstat()
instead of mingw_stat().
Signed-off-by: Karsten Blees <blees@dcon.de>
Acked-by: Sebastian Schuberth <sschuberth@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
struct mingw_stat {
|
|
|
|
_dev_t st_dev;
|
|
|
|
_ino_t st_ino;
|
|
|
|
_mode_t st_mode;
|
|
|
|
short st_nlink;
|
|
|
|
short st_uid;
|
|
|
|
short st_gid;
|
|
|
|
_dev_t st_rdev;
|
|
|
|
off64_t st_size;
|
|
|
|
struct timespec st_atim;
|
|
|
|
struct timespec st_mtim;
|
|
|
|
struct timespec st_ctim;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define st_atime st_atim.tv_sec
|
|
|
|
#define st_mtime st_mtim.tv_sec
|
|
|
|
#define st_ctime st_ctim.tv_sec
|
|
|
|
|
|
|
|
#ifdef stat
|
|
|
|
#undef stat
|
|
|
|
#endif
|
|
|
|
#define stat mingw_stat
|
|
|
|
int mingw_lstat(const char *file_name, struct stat *buf);
|
|
|
|
int mingw_stat(const char *file_name, struct stat *buf);
|
|
|
|
int mingw_fstat(int fd, struct stat *buf);
|
|
|
|
#ifdef fstat
|
|
|
|
#undef fstat
|
|
|
|
#endif
|
|
|
|
#define fstat mingw_fstat
|
|
|
|
#ifdef lstat
|
|
|
|
#undef lstat
|
|
|
|
#endif
|
|
|
|
#define lstat mingw_lstat
|
MSVC: fix stat definition hell
In msvc.h, there's a couple of stat related functions defined diffently
from mingw.h. When we remove these definitions, the only problem we get is
"warning C4005: '_stati64' : macro redefinition" for this line in mingw.h:
#define _stati64(x,y) mingw_stat(x,y)
The reason is that as of MSVCR80.dll (distributed with MSVC 2005), the
original _stati64 family of functions was renamed to _stat32i64, and the
former function names became macros (pointing to the appropriate function
based on the definition of _USE_32BIT_TIME_T).
Defining _stati64 works on MinGW because MinGW by default compiles against
the MSVCRT.DLL that is part of Windows (i.e. _stati64 is a function rather
than a macro).
Note: MinGW *can* compile for newer MSVC runtime versions, and MSVC
apparently can also compile for the Windows MSVCRT.DLL via the DDK (see
http://www.syndicateofideas.com/posts/fighting-the-msvcrt-dll-hell ).
Remove the stat definitions from msvc.h, as they are not compiler related.
In mingw.h, determine the runtime version in use from the definitions of
_stati64 and _USE_32BIT_TIME_T, and define stat() accordingly.
This also fixes that stat() in MSVC builds still resolves to mingw_lstat()
instead of mingw_stat().
Signed-off-by: Karsten Blees <blees@dcon.de>
Acked-by: Sebastian Schuberth <sschuberth@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
|
|
|
|
int mingw_utime(const char *file_name, const struct utimbuf *times);
|
|
|
|
#define utime mingw_utime
|
mingw: abort on invalid strftime formats
On Windows, strftime() does not silently ignore invalid formats, but
warns about them and then returns 0 and sets errno to EINVAL.
Unfortunately, Git does not expect such a behavior, as it disagrees
with strftime()'s semantics on Linux. As a consequence, Git
misinterprets the return value 0 as "I need more space" and grows the
buffer. As the larger buffer does not fix the format, the buffer grows
and grows and grows until we are out of memory and abort.
Ideally, we would switch off the parameter validation just for
strftime(), but we cannot even override the invalid parameter handler
via _set_thread_local_invalid_parameter_handler() using MINGW because
that function is not declared. Even _set_invalid_parameter_handler(),
which *is* declared, does not help, as it simply does... nothing.
So let's just bite the bullet and override strftime() for MINGW and
abort on an invalid format string. While this does not provide the
best user experience, it is the best we can do.
See https://msdn.microsoft.com/en-us/library/fe06s4ak.aspx for more
details.
This fixes https://github.com/git-for-windows/git/issues/863
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
size_t mingw_strftime(char *s, size_t max,
|
|
|
|
const char *format, const struct tm *tm);
|
|
|
|
#define strftime mingw_strftime
|
|
|
|
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
|
|
|
pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env,
|
|
|
|
const char *dir,
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
|
|
|
int fhin, int fhout, int fherr);
|
|
|
|
int mingw_execvp(const char *cmd, char *const *argv);
|
|
|
|
#define execvp mingw_execvp
|
|
|
|
int mingw_execv(const char *cmd, char *const *argv);
|
|
|
|
#define execv mingw_execv
|
|
|
|
|
|
|
|
static inline unsigned int git_ntohl(unsigned int x)
|
|
|
|
{ return (unsigned int)ntohl(x); }
|
|
|
|
#define ntohl git_ntohl
|
|
|
|
|
|
|
|
sig_handler_t mingw_signal(int sig, sig_handler_t handler);
|
|
|
|
#define signal mingw_signal
|
|
|
|
|
|
|
|
int mingw_raise(int sig);
|
|
|
|
#define raise mingw_raise
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ANSI emulation wrappers
|
|
|
|
*/
|
|
|
|
|
mingw: intercept isatty() to handle /dev/null as Git expects it
When Git's source code calls isatty(), it really asks whether the
respective file descriptor is connected to an interactive terminal.
Windows' _isatty() function, however, determines whether the file
descriptor is associated with a character device. And NUL, Windows'
equivalent of /dev/null, is a character device.
Which means that for years, Git mistakenly detected an associated
interactive terminal when being run through the test suite, which
almost always redirects stdin, stdout and stderr to /dev/null.
This bug only became obvious, and painfully so, when the new
bisect--helper entered the `pu` branch and made the automatic build & test
time out because t6030 was waiting for an answer.
For details, see
https://msdn.microsoft.com/en-us/library/f4s0ddew.aspx
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
int winansi_isatty(int fd);
|
|
|
|
#define isatty winansi_isatty
|
|
|
|
|
|
|
|
int winansi_dup2(int oldfd, int newfd);
|
|
|
|
#define dup2 winansi_dup2
|
|
|
|
|
Win32: Thread-safe windows console output
Winansi.c has many static variables that are accessed and modified from
the [v][f]printf / fputs functions overridden in the file. This may cause
multi threaded git commands that print to the console to produce corrupted
output or even crash.
Additionally, winansi.c doesn't override all functions that can be used to
print to the console (e.g. fwrite, write, fputc are missing), so that ANSI
escapes don't work properly for some git commands (e.g. git-grep).
Instead of doing ANSI emulation in just a few wrapped functions on top of
the IO API, let's plug into the IO system and take advantage of the thread
safety inherent to the IO system.
Redirect stdout and stderr to a pipe if they point to the console. A
background thread reads from the pipe, handles ANSI escape sequences and
UTF-8 to UTF-16 conversion, then writes to the console.
The pipe-based stdout and stderr replacements must be set to unbuffered, as
MSVCRT doesn't support line buffering and fully buffered streams are
inappropriate for console output.
Due to the byte-oriented pipe, ANSI escape sequences and multi-byte UTF-8
sequences can no longer be expected to arrive in one piece. Replace the
string-based ansi_emulate() with a simple stateful parser (this also fixes
colored diff hunk headers, which were broken as of commit 2efcc977).
Override isatty to return true for the pipes redirecting to the console.
Exec/spawn obtain the original console handle to pass to the next process
via winansi_get_osfhandle().
All other overrides are gone, the default stdio implementations work as
expected with the piped stdout/stderr descriptors.
Global variables are either initialized on startup (single threaded) or
exclusively modified by the background thread. Threads communicate through
the pipe, no further synchronization is necessary.
The background thread is terminated by disonnecting the pipe after flushing
the stdio and pipe buffers. This doesn't work for anonymous pipes (created
via CreatePipe), as DisconnectNamedPipe only works on the read end, which
discards remaining data. Thus we have to setup the pipe manually, with the
write end beeing the server (opened with CreateNamedPipe) and the read end
the client (opened with CreateFile).
Limitations: doesn't track reopened or duped file descriptors, i.e.:
- fdopen(1/2) returns fully buffered streams
- dup(1/2), dup2(1/2) returns normal pipe descriptors (i.e. isatty() =
false, winansi_get_osfhandle won't return the original console handle)
Currently, only the git-format-patch command uses xfdopen(xdup(1)) (see
"realstdout" in builtin/log.c), but works well with these limitations.
Many thanks to Atsushi Nakagawa <atnak@chejz.com> for suggesting and
reviewing the thread-exit-mechanism.
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Stepan Kasal <kasal@ucw.cz>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
void winansi_init(void);
|
|
|
|
HANDLE winansi_get_osfhandle(int fd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* git specific compatibility
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void convert_slashes(char *path)
|
|
|
|
{
|
|
|
|
for (; *path; path++)
|
|
|
|
if (*path == '\\')
|
|
|
|
*path = '/';
|
|
|
|
}
|
|
|
|
#define PATH_SEP ';'
|
|
|
|
char *mingw_query_user_email(void);
|
|
|
|
#define query_user_email mingw_query_user_email
|
|
|
|
#if !defined(__MINGW64_VERSION_MAJOR) && (!defined(_MSC_VER) || _MSC_VER < 1800)
|
|
|
|
#define PRIuMAX "I64u"
|
|
|
|
#define PRId64 "I64d"
|
|
|
|
#else
|
|
|
|
#include <inttypes.h>
|
|
|
|
#endif
|
|
|
|
|
mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.
This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.
Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).
Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.
As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.
For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.
Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).
While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.
The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.
Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
5 years ago
|
|
|
/**
|
|
|
|
* Verifies that the given path is a valid one on Windows.
|
|
|
|
*
|
mingw: refuse to access paths with illegal characters
Certain characters are not admissible in file names on Windows, even if
Cygwin/MSYS2 (and therefore, Git for Windows' Bash) pretend that they
are, e.g. `:`, `<`, `>`, etc
Let's disallow those characters explicitly in Windows builds of Git.
Note: just like trailing spaces or periods, it _is_ possible on Windows
to create commits adding files with such illegal characters, as long as
the operation leaves the worktree untouched. To allow for that, we
continue to guard `is_valid_win32_path()` behind the config setting
`core.protectNTFS`, so that users _can_ continue to do that, as long as
they turn the protections off via that config setting.
Among other problems, this prevents Git from trying to write to an "NTFS
Alternate Data Stream" (which refers to metadata stored alongside a
file, under a special name: "<filename>:<stream-name>"). This fix
therefore also prevents an attack vector that was exploited in
demonstrations of a number of recently-fixed security bugs.
Further reading on illegal characters in Win32 filenames:
https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
5 years ago
|
|
|
* In particular, path segments are disallowed which
|
|
|
|
*
|
|
|
|
* - end in a period or a space (except the special directories `.` and `..`).
|
|
|
|
*
|
|
|
|
* - contain any of the reserved characters, e.g. `:`, `;`, `*`, etc
|
mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.
This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.
Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).
Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.
As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.
For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.
Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).
While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.
The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.
Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
5 years ago
|
|
|
*
|
|
|
|
* - correspond to reserved names (such as `AUX`, `PRN`, etc)
|
|
|
|
*
|
|
|
|
* The `allow_literal_nul` parameter controls whether the path `NUL` should
|
|
|
|
* be considered valid (this makes sense e.g. before opening files, as it is
|
|
|
|
* perfectly legitimate to open `NUL` on Windows, just as it is to open
|
|
|
|
* `/dev/null` on Unix/Linux).
|
|
|
|
*
|
mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.
This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.
Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).
Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.
As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.
For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.
Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).
While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.
The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.
Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
5 years ago
|
|
|
* Returns 1 upon success, otherwise 0.
|
|
|
|
*/
|
|
|
|
int is_valid_win32_path(const char *path, int allow_literal_nul);
|
|
|
|
#define is_valid_path(path) is_valid_win32_path(path, 0)
|
mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.
This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.
Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).
Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.
As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.
For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.
Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).
While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.
The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.
Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
5 years ago
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts UTF-8 encoded string to UTF-16LE.
|
|
|
|
*
|
|
|
|
* To support repositories with legacy-encoded file names, invalid UTF-8 bytes
|
|
|
|
* 0xa0 - 0xff are converted to corresponding printable Unicode chars \u00a0 -
|
|
|
|
* \u00ff, and invalid UTF-8 bytes 0x80 - 0x9f (which would make non-printable
|
|
|
|
* Unicode) are converted to hex-code.
|
|
|
|
*
|
|
|
|
* Lead-bytes not followed by an appropriate number of trail-bytes, over-long
|
|
|
|
* encodings and 4-byte encodings > \u10ffff are detected as invalid UTF-8.
|
|
|
|
*
|
|
|
|
* Maximum space requirement for the target buffer is two wide chars per UTF-8
|
|
|
|
* char (((strlen(utf) * 2) + 1) [* sizeof(wchar_t)]).
|
|
|
|
*
|
|
|
|
* The maximum space is needed only if the entire input string consists of
|
|
|
|
* invalid UTF-8 bytes in range 0x80-0x9f, as per the following table:
|
|
|
|
*
|
|
|
|
* | | UTF-8 | UTF-16 |
|
|
|
|
* Code point | UTF-8 sequence | bytes | words | ratio
|
|
|
|
* --------------+-------------------+-------+--------+-------
|
|
|
|
* 000000-00007f | 0-7f | 1 | 1 | 1
|
|
|
|
* 000080-0007ff | c2-df + 80-bf | 2 | 1 | 0.5
|
|
|
|
* 000800-00ffff | e0-ef + 2 * 80-bf | 3 | 1 | 0.33
|
|
|
|
* 010000-10ffff | f0-f4 + 3 * 80-bf | 4 | 2 (a) | 0.5
|
|
|
|
* invalid | 80-9f | 1 | 2 (b) | 2
|
|
|
|
* invalid | a0-ff | 1 | 1 | 1
|
|
|
|
*
|
|
|
|
* (a) encoded as UTF-16 surrogate pair
|
|
|
|
* (b) encoded as two hex digits
|
|
|
|
*
|
|
|
|
* Note that, while the UTF-8 encoding scheme can be extended to 5-byte, 6-byte
|
|
|
|
* or even indefinite-byte sequences, the largest valid code point \u10ffff
|
|
|
|
* encodes as only 4 UTF-8 bytes.
|
|
|
|
*
|
|
|
|
* Parameters:
|
|
|
|
* wcs: wide char target buffer
|
|
|
|
* utf: string to convert
|
|
|
|
* wcslen: size of target buffer (in wchar_t's)
|
|
|
|
* utflen: size of string to convert, or -1 if 0-terminated
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* length of converted string (_wcslen(wcs)), or -1 on failure
|
|
|
|
*
|
|
|
|
* Errors:
|
|
|
|
* EINVAL: one of the input parameters is invalid (e.g. NULL)
|
|
|
|
* ERANGE: the output buffer is too small
|
|
|
|
*/
|
|
|
|
int xutftowcsn(wchar_t *wcs, const char *utf, size_t wcslen, int utflen);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Simplified variant of xutftowcsn, assumes input string is \0-terminated.
|
|
|
|
*/
|
|
|
|
static inline int xutftowcs(wchar_t *wcs, const char *utf, size_t wcslen)
|
|
|
|
{
|
|
|
|
return xutftowcsn(wcs, utf, wcslen, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Simplified file system specific variant of xutftowcsn, assumes output
|
|
|
|
* buffer size is MAX_PATH wide chars and input string is \0-terminated,
|
|
|
|
* fails with ENAMETOOLONG if input string is too long.
|
|
|
|
*/
|
|
|
|
static inline int xutftowcs_path(wchar_t *wcs, const char *utf)
|
|
|
|
{
|
|
|
|
int result = xutftowcsn(wcs, utf, MAX_PATH, -1);
|
|
|
|
if (result < 0 && errno == ERANGE)
|
|
|
|
errno = ENAMETOOLONG;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts UTF-16LE encoded string to UTF-8.
|
|
|
|
*
|
|
|
|
* Maximum space requirement for the target buffer is three UTF-8 chars per
|
|
|
|
* wide char ((_wcslen(wcs) * 3) + 1).
|
|
|
|
*
|
|
|
|
* The maximum space is needed only if the entire input string consists of
|
|
|
|
* UTF-16 words in range 0x0800-0xd7ff or 0xe000-0xffff (i.e. \u0800-\uffff
|
|
|
|
* modulo surrogate pairs), as per the following table:
|
|
|
|
*
|
|
|
|
* | | UTF-16 | UTF-8 |
|
|
|
|
* Code point | UTF-16 sequence | words | bytes | ratio
|
|
|
|
* --------------+-----------------------+--------+-------+-------
|
|
|
|
* 000000-00007f | 0000-007f | 1 | 1 | 1
|
|
|
|
* 000080-0007ff | 0080-07ff | 1 | 2 | 2
|
|
|
|
* 000800-00ffff | 0800-d7ff / e000-ffff | 1 | 3 | 3
|
|
|
|
* 010000-10ffff | d800-dbff + dc00-dfff | 2 | 4 | 2
|
|
|
|
*
|
|
|
|
* Note that invalid code points > 10ffff cannot be represented in UTF-16.
|
|
|
|
*
|
|
|
|
* Parameters:
|
|
|
|
* utf: target buffer
|
|
|
|
* wcs: wide string to convert
|
|
|
|
* utflen: size of target buffer
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* length of converted string, or -1 on failure
|
|
|
|
*
|
|
|
|
* Errors:
|
|
|
|
* EINVAL: one of the input parameters is invalid (e.g. NULL)
|
|
|
|
* ERANGE: the output buffer is too small
|
|
|
|
*/
|
|
|
|
int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A critical section used in the implementation of the spawn
|
|
|
|
* functions (mingw_spawnv[p]e()) and waitpid(). Initialised in
|
|
|
|
* the replacement main() macro below.
|
|
|
|
*/
|
|
|
|
extern CRITICAL_SECTION pinfo_cs;
|
|
|
|
|
|
|
|
/*
|
mingw: replace mingw_startup() hack
Git for Windows has special code to retrieve the command-line parameters
(and even the environment) in UTF-16 encoding, so that they can be
converted to UTF-8. This is necessary because Git for Windows wants to
use UTF-8 encoded strings throughout its code, and the main() function
does not get the parameters in that encoding.
To do that, we used the __wgetmainargs() function, which is not even a
Win32 API function, but provided by the MINGW "runtime" instead.
Obviously, this method would not work with any compiler other than GCC,
and in preparation for compiling with Visual C++, we would like to avoid
precisely that.
Lucky us, there is a much more elegant way: we can simply implement the
UTF-16 variant of `main()`: `wmain()`.
To make that work, we need to link with -municode. The command-line
parameters are passed to `wmain()` encoded in UTF-16, as desired, and
this method also works with GCC, and also with Visual C++ after
adjusting the MSVC linker flags to force it to use `wmain()`.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
* Git, like most portable C applications, implements a main() function. On
|
|
|
|
* Windows, this main() function would receive parameters encoded in the
|
|
|
|
* current locale, but Git for Windows would prefer UTF-8 encoded parameters.
|
|
|
|
*
|
|
|
|
* To make that happen, we still declare main() here, and then declare and
|
|
|
|
* implement wmain() (which is the Unicode variant of main()) and compile with
|
|
|
|
* -municode. This wmain() function reencodes the parameters from UTF-16 to
|
|
|
|
* UTF-8 format, sets up a couple of other things as required on Windows, and
|
|
|
|
* then hands off to the main() function.
|
|
|
|
*/
|
mingw: replace mingw_startup() hack
Git for Windows has special code to retrieve the command-line parameters
(and even the environment) in UTF-16 encoding, so that they can be
converted to UTF-8. This is necessary because Git for Windows wants to
use UTF-8 encoded strings throughout its code, and the main() function
does not get the parameters in that encoding.
To do that, we used the __wgetmainargs() function, which is not even a
Win32 API function, but provided by the MINGW "runtime" instead.
Obviously, this method would not work with any compiler other than GCC,
and in preparation for compiling with Visual C++, we would like to avoid
precisely that.
Lucky us, there is a much more elegant way: we can simply implement the
UTF-16 variant of `main()`: `wmain()`.
To make that work, we need to link with -municode. The command-line
parameters are passed to `wmain()` encoded in UTF-16, as desired, and
this method also works with GCC, and also with Visual C++ after
adjusting the MSVC linker flags to force it to use `wmain()`.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
int wmain(int argc, const wchar_t **w_argv);
|
|
|
|
int main(int argc, const char **argv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For debugging: if a problem occurs, say, in a Git process that is spawned
|
|
|
|
* from another Git process which in turn is spawned from yet another Git
|
|
|
|
* process, it can be quite daunting to figure out what is going on.
|
|
|
|
*
|
|
|
|
* Call this function to open a new MinTTY (this assumes you are in Git for
|
|
|
|
* Windows' SDK) with a GDB that attaches to the current process right away.
|
|
|
|
*/
|
|
|
|
void open_in_gdb(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used by Pthread API implementation for Windows
|
|
|
|
*/
|
|
|
|
int err_win_to_posix(DWORD winerr);
|