diff --git a/Documentation/technical/api-run-command.txt b/Documentation/technical/api-run-command.txt
index 44876fa703..f18b4f4817 100644
--- a/Documentation/technical/api-run-command.txt
+++ b/Documentation/technical/api-run-command.txt
@@ -231,8 +231,9 @@ The function pointer in .proc has the following signature:
 
 
 There are serious restrictions on what the asynchronous function can do
-because this facility is implemented by a pipe to a forked process on
-UNIX, but by a thread in the same address space on Windows:
+because this facility is implemented by a thread in the same address
+space on most platforms (when pthreads is available), but by a pipe to
+a forked process otherwise:
 
 . It cannot change the program's state (global variables, environment,
   etc.) in a way that the caller notices; in other words, .in and .out
diff --git a/compat/mingw.h b/compat/mingw.h
index f465566b7a..3b2477be5f 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -89,7 +89,7 @@ static inline int getuid()
 { return 1; }
 static inline struct passwd *getpwnam(const char *name)
 { return NULL; }
-static inline int fcntl(int fd, int cmd, long arg)
+static inline int fcntl(int fd, int cmd, ...)
 {
 	if (cmd == F_GETFD || cmd == F_SETFD)
 		return 0;
diff --git a/compat/win32/pthread.c b/compat/win32/pthread.c
index 0f949fc425..010e875ec4 100644
--- a/compat/win32/pthread.c
+++ b/compat/win32/pthread.c
@@ -16,6 +16,7 @@
 static unsigned __stdcall win32_start_routine(void *arg)
 {
 	pthread_t *thread = arg;
+	thread->tid = GetCurrentThreadId();
 	thread->arg = thread->start_routine(thread->arg);
 	return 0;
 }
@@ -49,6 +50,13 @@ int win32_pthread_join(pthread_t *thread, void **value_ptr)
 	}
 }
 
+pthread_t pthread_self(void)
+{
+	pthread_t t = { 0 };
+	t.tid = GetCurrentThreadId();
+	return t;
+}
+
 int pthread_cond_init(pthread_cond_t *cond, const void *unused)
 {
 	cond->waiters = 0;
diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h
index a45f8d66df..2e20548557 100644
--- a/compat/win32/pthread.h
+++ b/compat/win32/pthread.h
@@ -58,6 +58,7 @@ typedef struct {
 	HANDLE handle;
 	void *(*start_routine)(void*);
 	void *arg;
+	DWORD tid;
 } pthread_t;
 
 extern int pthread_create(pthread_t *thread, const void *unused,
@@ -71,4 +72,28 @@ extern int pthread_create(pthread_t *thread, const void *unused,
 
 extern int win32_pthread_join(pthread_t *thread, void **value_ptr);
 
+#define pthread_equal(t1, t2) ((t1).tid == (t2).tid)
+extern pthread_t pthread_self(void);
+
+static inline int pthread_exit(void *ret)
+{
+	ExitThread((DWORD)ret);
+}
+
+typedef DWORD pthread_key_t;
+static inline int pthread_key_create(pthread_key_t *keyp, void (*destructor)(void *value))
+{
+	return (*keyp = TlsAlloc()) == TLS_OUT_OF_INDEXES ? EAGAIN : 0;
+}
+
+static inline int pthread_setspecific(pthread_key_t key, const void *value)
+{
+	return TlsSetValue(key, (void *)value) ? 0 : EINVAL;
+}
+
+static inline void *pthread_getspecific(pthread_key_t key)
+{
+	return TlsGetValue(key);
+}
+
 #endif /* PTHREAD_H */
diff --git a/git-compat-util.h b/git-compat-util.h
index c9d53397ad..02a73eeb66 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -200,6 +200,7 @@ extern char *gitbasename(char *);
 #include "compat/bswap.h"
 
 /* General helper functions */
+extern void vreportf(const char *prefix, const char *err, va_list params);
 extern NORETURN void usage(const char *err);
 extern NORETURN void usagef(const char *err, ...) __attribute__((format (printf, 1, 2)));
 extern NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2)));
diff --git a/http-backend.c b/http-backend.c
index f0e787e37d..44ce6bb32b 100644
--- a/http-backend.c
+++ b/http-backend.c
@@ -488,14 +488,12 @@ static NORETURN void die_webcgi(const char *err, va_list params)
 	static int dead;
 
 	if (!dead) {
-		char buffer[1000];
 		dead = 1;
-
-		vsnprintf(buffer, sizeof(buffer), err, params);
-		fprintf(stderr, "fatal: %s\n", buffer);
 		http_status(500, "Internal Server Error");
 		hdr_nocache();
 		end_headers();
+
+		vreportf("fatal: ", err, params);
 	}
 	exit(0); /* we successfully reported a failure ;-) */
 }
diff --git a/run-command.c b/run-command.c
index c7793f50fb..2a1041ef65 100644
--- a/run-command.c
+++ b/run-command.c
@@ -84,6 +84,7 @@ static NORETURN void die_child(const char *err, va_list params)
 	unused = write(child_err, "\n", 1);
 	exit(128);
 }
+#endif
 
 static inline void set_cloexec(int fd)
 {
@@ -91,7 +92,6 @@ static inline void set_cloexec(int fd)
 	if (flags >= 0)
 		fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
 }
-#endif
 
 static int wait_or_whine(pid_t pid, const char *argv0, int silent_exec_failure)
 {
@@ -449,11 +449,35 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const
 	return run_command(&cmd);
 }
 
-#ifdef WIN32
-static unsigned __stdcall run_thread(void *data)
+#ifndef NO_PTHREADS
+static pthread_t main_thread;
+static int main_thread_set;
+static pthread_key_t async_key;
+
+static void *run_thread(void *data)
 {
 	struct async *async = data;
-	return async->proc(async->proc_in, async->proc_out, async->data);
+	intptr_t ret;
+
+	pthread_setspecific(async_key, async);
+	ret = async->proc(async->proc_in, async->proc_out, async->data);
+	return (void *)ret;
+}
+
+static NORETURN void die_async(const char *err, va_list params)
+{
+	vreportf("fatal: ", err, params);
+
+	if (!pthread_equal(main_thread, pthread_self())) {
+		struct async *async = pthread_getspecific(async_key);
+		if (async->proc_in >= 0)
+			close(async->proc_in);
+		if (async->proc_out >= 0)
+			close(async->proc_out);
+		pthread_exit((void *)128);
+	}
+
+	exit(128);
 }
 #endif
 
@@ -499,7 +523,7 @@ int start_async(struct async *async)
 	else
 		proc_out = -1;
 
-#ifndef WIN32
+#ifdef NO_PTHREADS
 	/* Flush stdio before fork() to avoid cloning buffers */
 	fflush(NULL);
 
@@ -526,12 +550,29 @@ int start_async(struct async *async)
 	else if (async->out)
 		close(async->out);
 #else
+	if (!main_thread_set) {
+		/*
+		 * We assume that the first time that start_async is called
+		 * it is from the main thread.
+		 */
+		main_thread_set = 1;
+		main_thread = pthread_self();
+		pthread_key_create(&async_key, NULL);
+		set_die_routine(die_async);
+	}
+
+	if (proc_in >= 0)
+		set_cloexec(proc_in);
+	if (proc_out >= 0)
+		set_cloexec(proc_out);
 	async->proc_in = proc_in;
 	async->proc_out = proc_out;
-	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
-	if (!async->tid) {
-		error("cannot create thread: %s", strerror(errno));
-		goto error;
+	{
+		int err = pthread_create(&async->tid, NULL, run_thread, async);
+		if (err) {
+			error("cannot create thread: %s", strerror(err));
+			goto error;
+		}
 	}
 #endif
 	return 0;
@@ -551,17 +592,15 @@ error:
 
 int finish_async(struct async *async)
 {
-#ifndef WIN32
-	int ret = wait_or_whine(async->pid, "child process", 0);
+#ifdef NO_PTHREADS
+	return wait_or_whine(async->pid, "child process", 0);
 #else
-	DWORD ret = 0;
-	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
-		ret = error("waiting for thread failed: %lu", GetLastError());
-	else if (!GetExitCodeThread(async->tid, &ret))
-		ret = error("cannot get thread exit code: %lu", GetLastError());
-	CloseHandle(async->tid);
+	void *ret = (void *)(intptr_t)(-1);
+
+	if (pthread_join(async->tid, &ret))
+		error("pthread_join failed");
+	return (int)(intptr_t)ret;
 #endif
-	return ret;
 }
 
 int run_hook(const char *index_file, const char *name, ...)
diff --git a/run-command.h b/run-command.h
index 94619f52d9..56491b9f23 100644
--- a/run-command.h
+++ b/run-command.h
@@ -1,6 +1,10 @@
 #ifndef RUN_COMMAND_H
 #define RUN_COMMAND_H
 
+#ifndef NO_PTHREADS
+#include <pthread.h>
+#endif
+
 struct child_process {
 	const char **argv;
 	pid_t pid;
@@ -74,10 +78,10 @@ struct async {
 	void *data;
 	int in;		/* caller writes here and closes it */
 	int out;	/* caller reads from here and closes it */
-#ifndef WIN32
+#ifdef NO_PTHREADS
 	pid_t pid;
 #else
-	HANDLE tid;
+	pthread_t tid;
 	int proc_in;
 	int proc_out;
 #endif
diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh
index a696b8791b..044603c26e 100755
--- a/t/t5530-upload-pack-error.sh
+++ b/t/t5530-upload-pack-error.sh
@@ -32,9 +32,9 @@ test_expect_success 'fsck fails' '
 
 test_expect_success 'upload-pack fails due to error in pack-objects packing' '
 
-	! echo "0032want $(git rev-parse HEAD)
-00000009done
-0000" | git upload-pack . > /dev/null 2> output.err &&
+	printf "0032want %s\n00000009done\n0000" \
+		$(git rev-parse HEAD) >input &&
+	test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
 	grep "unable to read" output.err &&
 	grep "pack-objects died" output.err
 '
@@ -51,9 +51,9 @@ test_expect_success 'fsck fails' '
 '
 test_expect_success 'upload-pack fails due to error in rev-list' '
 
-	! echo "0032want $(git rev-parse HEAD)
-0034shallow $(git rev-parse HEAD^)00000009done
-0000" | git upload-pack . > /dev/null 2> output.err &&
+	printf "0032want %s\n0034shallow %s00000009done\n0000" \
+		$(git rev-parse HEAD) $(git rev-parse HEAD^) >input &&
+	test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
 	# pack-objects survived
 	grep "Total.*, reused" output.err &&
 	# but there was an error, which must have been in rev-list
@@ -62,9 +62,9 @@ test_expect_success 'upload-pack fails due to error in rev-list' '
 
 test_expect_success 'upload-pack fails due to error in pack-objects enumeration' '
 
-	! echo "0032want $(git rev-parse HEAD)
-00000009done
-0000" | git upload-pack . > /dev/null 2> output.err &&
+	printf "0032want %s\n00000009done\n0000" \
+		$(git rev-parse HEAD) >input &&
+	test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
 	grep "bad tree object" output.err &&
 	grep "pack-objects died" output.err
 '
diff --git a/usage.c b/usage.c
index 79856a2b9f..ec4cf53b6b 100644
--- a/usage.c
+++ b/usage.c
@@ -5,7 +5,7 @@
  */
 #include "git-compat-util.h"
 
-static void report(const char *prefix, const char *err, va_list params)
+void vreportf(const char *prefix, const char *err, va_list params)
 {
 	char msg[4096];
 	vsnprintf(msg, sizeof(msg), err, params);
@@ -14,24 +14,24 @@ static void report(const char *prefix, const char *err, va_list params)
 
 static NORETURN void usage_builtin(const char *err, va_list params)
 {
-	report("usage: ", err, params);
+	vreportf("usage: ", err, params);
 	exit(129);
 }
 
 static NORETURN void die_builtin(const char *err, va_list params)
 {
-	report("fatal: ", err, params);
+	vreportf("fatal: ", err, params);
 	exit(128);
 }
 
 static void error_builtin(const char *err, va_list params)
 {
-	report("error: ", err, params);
+	vreportf("error: ", err, params);
 }
 
 static void warn_builtin(const char *warn, va_list params)
 {
-	report("warning: ", warn, params);
+	vreportf("warning: ", warn, params);
 }
 
 /* If we are in a dlopen()ed .so write to a global variable would segfault