diff --git a/Makefile b/Makefile
index e8adeb09f1..224e193b66 100644
--- a/Makefile
+++ b/Makefile
@@ -918,6 +918,7 @@ LIB_OBJS += combine-diff.o
 LIB_OBJS += commit-graph.o
 LIB_OBJS += commit-reach.o
 LIB_OBJS += commit.o
+LIB_OBJS += compat/nonblock.o
 LIB_OBJS += compat/obstack.o
 LIB_OBJS += compat/terminal.o
 LIB_OBJS += compat/zlib-uncompress2.o
diff --git a/compat/nonblock.c b/compat/nonblock.c
new file mode 100644
index 0000000000..9694ebdb1d
--- /dev/null
+++ b/compat/nonblock.c
@@ -0,0 +1,50 @@
+#include "git-compat-util.h"
+#include "nonblock.h"
+
+#ifdef O_NONBLOCK
+
+int enable_pipe_nonblock(int fd)
+{
+	int flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		return -1;
+	flags |= O_NONBLOCK;
+	return fcntl(fd, F_SETFL, flags);
+}
+
+#elif defined(GIT_WINDOWS_NATIVE)
+
+#include "win32.h"
+
+int enable_pipe_nonblock(int fd)
+{
+	HANDLE h = (HANDLE)_get_osfhandle(fd);
+	DWORD mode;
+	DWORD type = GetFileType(h);
+	if (type == FILE_TYPE_UNKNOWN && GetLastError() != NO_ERROR) {
+		errno = EBADF;
+		return -1;
+	}
+	if (type != FILE_TYPE_PIPE)
+		BUG("unsupported file type: %lu", type);
+	if (!GetNamedPipeHandleState(h, &mode, NULL, NULL, NULL, NULL, 0)) {
+		errno = err_win_to_posix(GetLastError());
+		return -1;
+	}
+	mode |= PIPE_NOWAIT;
+	if (!SetNamedPipeHandleState(h, &mode, NULL, NULL)) {
+		errno = err_win_to_posix(GetLastError());
+		return -1;
+	}
+	return 0;
+}
+
+#else
+
+int enable_pipe_nonblock(int fd)
+{
+	errno = ENOSYS;
+	return -1;
+}
+
+#endif
diff --git a/compat/nonblock.h b/compat/nonblock.h
new file mode 100644
index 0000000000..af1a331301
--- /dev/null
+++ b/compat/nonblock.h
@@ -0,0 +1,9 @@
+#ifndef COMPAT_NONBLOCK_H
+#define COMPAT_NONBLOCK_H
+
+/*
+ * Enable non-blocking I/O for the pipe specified by the passed-in descriptor.
+ */
+int enable_pipe_nonblock(int fd);
+
+#endif
diff --git a/git-compat-util.h b/git-compat-util.h
index 36a25ae252..6aee4d92e7 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -998,6 +998,28 @@ static inline unsigned long cast_size_t_to_ulong(size_t a)
 	return (unsigned long)a;
 }
 
+/*
+ * Limit size of IO chunks, because huge chunks only cause pain.  OS X
+ * 64-bit is buggy, returning EINVAL if len >= INT_MAX; and even in
+ * the absence of bugs, large chunks can result in bad latencies when
+ * you decide to kill the process.
+ *
+ * We pick 8 MiB as our default, but if the platform defines SSIZE_MAX
+ * that is smaller than that, clip it to SSIZE_MAX, as a call to
+ * read(2) or write(2) larger than that is allowed to fail.  As the last
+ * resort, we allow a port to pass via CFLAGS e.g. "-DMAX_IO_SIZE=value"
+ * to override this, if the definition of SSIZE_MAX given by the platform
+ * is broken.
+ */
+#ifndef MAX_IO_SIZE
+# define MAX_IO_SIZE_DEFAULT (8*1024*1024)
+# if defined(SSIZE_MAX) && (SSIZE_MAX < MAX_IO_SIZE_DEFAULT)
+#  define MAX_IO_SIZE SSIZE_MAX
+# else
+#  define MAX_IO_SIZE MAX_IO_SIZE_DEFAULT
+# endif
+#endif
+
 #ifdef HAVE_ALLOCA_H
 # include <alloca.h>
 # define xalloca(size)      (alloca(size))
diff --git a/run-command.c b/run-command.c
index 14f17830f5..5ec3a46dcc 100644
--- a/run-command.c
+++ b/run-command.c
@@ -10,6 +10,7 @@
 #include "config.h"
 #include "packfile.h"
 #include "hook.h"
+#include "compat/nonblock.h"
 
 void child_process_init(struct child_process *child)
 {
@@ -1364,12 +1365,25 @@ static int pump_io_round(struct io_pump *slots, int nr, struct pollfd *pfd)
 			continue;
 
 		if (io->type == POLLOUT) {
-			ssize_t len = xwrite(io->fd,
-					     io->u.out.buf, io->u.out.len);
+			ssize_t len;
+
+			/*
+			 * Don't use xwrite() here. It loops forever on EAGAIN,
+			 * and we're in our own poll() loop here.
+			 *
+			 * Note that we lose xwrite()'s handling of MAX_IO_SIZE
+			 * and EINTR, so we have to implement those ourselves.
+			 */
+			len = write(io->fd, io->u.out.buf,
+				    io->u.out.len <= MAX_IO_SIZE ?
+				    io->u.out.len : MAX_IO_SIZE);
 			if (len < 0) {
-				io->error = errno;
-				close(io->fd);
-				io->fd = -1;
+				if (errno != EINTR && errno != EAGAIN &&
+				    errno != ENOSPC) {
+					io->error = errno;
+					close(io->fd);
+					io->fd = -1;
+				}
 			} else {
 				io->u.out.buf += len;
 				io->u.out.len -= len;
@@ -1438,6 +1452,15 @@ int pipe_command(struct child_process *cmd,
 		return -1;
 
 	if (in) {
+		if (enable_pipe_nonblock(cmd->in) < 0) {
+			error_errno("unable to make pipe non-blocking");
+			close(cmd->in);
+			if (out)
+				close(cmd->out);
+			if (err)
+				close(cmd->err);
+			return -1;
+		}
 		io[nr].fd = cmd->in;
 		io[nr].type = POLLOUT;
 		io[nr].u.out.buf = in;
diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh
index b354fb39de..3b7df9bed5 100755
--- a/t/t3701-add-interactive.sh
+++ b/t/t3701-add-interactive.sh
@@ -766,6 +766,19 @@ test_expect_success 'detect bogus diffFilter output' '
 	force_color test_must_fail git add -p <y
 '
 
+test_expect_success 'handle very large filtered diff' '
+	git reset --hard &&
+	# The specific number here is not important, but it must
+	# be large enough that the output of "git diff --color"
+	# fills up the pipe buffer. 10,000 results in ~200k of
+	# colored output.
+	test_seq 10000 >test &&
+	test_config interactive.diffFilter cat &&
+	printf y >y &&
+	force_color git add -p >output 2>&1 <y &&
+	git diff-files --exit-code -- test
+'
+
 test_expect_success 'diff.algorithm is passed to `git diff-files`' '
 	git reset --hard &&
 
diff --git a/wrapper.c b/wrapper.c
index cfe79bd081..299d6489a6 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -161,28 +161,6 @@ void xsetenv(const char *name, const char *value, int overwrite)
 		die_errno(_("could not setenv '%s'"), name ? name : "(null)");
 }
 
-/*
- * Limit size of IO chunks, because huge chunks only cause pain.  OS X
- * 64-bit is buggy, returning EINVAL if len >= INT_MAX; and even in
- * the absence of bugs, large chunks can result in bad latencies when
- * you decide to kill the process.
- *
- * We pick 8 MiB as our default, but if the platform defines SSIZE_MAX
- * that is smaller than that, clip it to SSIZE_MAX, as a call to
- * read(2) or write(2) larger than that is allowed to fail.  As the last
- * resort, we allow a port to pass via CFLAGS e.g. "-DMAX_IO_SIZE=value"
- * to override this, if the definition of SSIZE_MAX given by the platform
- * is broken.
- */
-#ifndef MAX_IO_SIZE
-# define MAX_IO_SIZE_DEFAULT (8*1024*1024)
-# if defined(SSIZE_MAX) && (SSIZE_MAX < MAX_IO_SIZE_DEFAULT)
-#  define MAX_IO_SIZE SSIZE_MAX
-# else
-#  define MAX_IO_SIZE MAX_IO_SIZE_DEFAULT
-# endif
-#endif
-
 /**
  * xopen() is the same as open(), but it die()s if the open() fails.
  */