From cf3c6352100a0d302276e46e3f9a7f0804e224d8 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 12 Dec 2016 14:52:22 -0500
Subject: [PATCH 1/4] alternates: accept double-quoted paths

We read lists of alternates from objects/info/alternates
files (delimited by newline), as well as from the
GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable
(delimited by colon or semi-colon, depending on the
platform).

There's no mechanism for quoting the delimiters, so it's
impossible to specify an alternate path that contains a
colon in the environment, or one that contains a newline in
a file. We've lived with that restriction for ages because
both alternates and filenames with colons are relatively
rare, and it's only a problem when the two meet. But since
722ff7f87 (receive-pack: quarantine objects until
pre-receive accepts, 2016-10-03), which builds on the
alternates system, every push causes the receiver to set
GIT_ALTERNATE_OBJECT_DIRECTORIES internally.

It would be convenient to have some way to quote the
delimiter so that we can represent arbitrary paths.

The simplest thing would be an escape character before a
quoted delimiter (e.g., "\:" as a literal colon). But that
creates a backwards compatibility problem: any path which
uses that escape character is now broken, and we've just
shifted the problem. We could choose an unlikely escape
character (e.g., something from the non-printable ASCII
range), but that's awkward to use.

Instead, let's treat names as unquoted unless they begin
with a double-quote, in which case they are interpreted via
our usual C-stylke quoting rules. This also breaks
backwards-compatibility, but in a smaller way: it only
matters if your file has a double-quote as the very _first_
character in the path (whereas an escape character is a
problem anywhere in the path).  It's also consistent with
many other parts of git, which accept either a bare pathname
or a double-quoted one, and the sender can choose to quote
or not as required.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git.txt    |  6 +++++
 sha1_file.c              | 47 ++++++++++++++++++++++++++++++----------
 t/t5615-alternate-env.sh | 18 +++++++++++++++
 3 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/Documentation/git.txt b/Documentation/git.txt
index b8bec711f4..5f7826bf39 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -859,6 +859,12 @@ Git so take care if using a foreign front-end.
 	specifies a ":" separated (on Windows ";" separated) list
 	of Git object directories which can be used to search for Git
 	objects. New objects will not be written to these directories.
++
+	Entries that begin with `"` (double-quote) will be interpreted
+	as C-style quoted paths, removing leading and trailing
+	double-quotes and respecting backslash escapes. E.g., the value
+	`"path-with-\"-and-:-in-it":vanilla-path` has two paths:
+	`path-with-"-and-:-in-it` and `vanilla-path`.
 
 `GIT_DIR`::
 	If the `GIT_DIR` environment variable is set then it
diff --git a/sha1_file.c b/sha1_file.c
index fbafdbed94..fc6d864fda 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -26,6 +26,7 @@
 #include "mru.h"
 #include "list.h"
 #include "mergesort.h"
+#include "quote.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -329,13 +330,40 @@ static int link_alt_odb_entry(const char *entry, const char *relative_base,
 	return 0;
 }
 
+static const char *parse_alt_odb_entry(const char *string,
+				       int sep,
+				       struct strbuf *out)
+{
+	const char *end;
+
+	strbuf_reset(out);
+
+	if (*string == '#') {
+		/* comment; consume up to next separator */
+		end = strchrnul(string, sep);
+	} else if (*string == '"' && !unquote_c_style(out, string, &end)) {
+		/*
+		 * quoted path; unquote_c_style has copied the
+		 * data for us and set "end". Broken quoting (e.g.,
+		 * an entry that doesn't end with a quote) falls
+		 * back to the unquoted case below.
+		 */
+	} else {
+		/* normal, unquoted path */
+		end = strchrnul(string, sep);
+		strbuf_add(out, string, end - string);
+	}
+
+	if (*end)
+		end++;
+	return end;
+}
+
 static void link_alt_odb_entries(const char *alt, int len, int sep,
 				 const char *relative_base, int depth)
 {
-	struct string_list entries = STRING_LIST_INIT_NODUP;
-	char *alt_copy;
-	int i;
 	struct strbuf objdirbuf = STRBUF_INIT;
+	struct strbuf entry = STRBUF_INIT;
 
 	if (depth > 5) {
 		error("%s: ignoring alternate object stores, nesting too deep.",
@@ -348,16 +376,13 @@ static void link_alt_odb_entries(const char *alt, int len, int sep,
 		die("unable to normalize object directory: %s",
 		    objdirbuf.buf);
 
-	alt_copy = xmemdupz(alt, len);
-	string_list_split_in_place(&entries, alt_copy, sep, -1);
-	for (i = 0; i < entries.nr; i++) {
-		const char *entry = entries.items[i].string;
-		if (entry[0] == '\0' || entry[0] == '#')
+	while (*alt) {
+		alt = parse_alt_odb_entry(alt, sep, &entry);
+		if (!entry.len)
 			continue;
-		link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf);
+		link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf);
 	}
-	string_list_clear(&entries, 0);
-	free(alt_copy);
+	strbuf_release(&entry);
 	strbuf_release(&objdirbuf);
 }
 
diff --git a/t/t5615-alternate-env.sh b/t/t5615-alternate-env.sh
index 22d9d8178b..c33d089980 100755
--- a/t/t5615-alternate-env.sh
+++ b/t/t5615-alternate-env.sh
@@ -68,4 +68,22 @@ test_expect_success 'access alternate via relative path (subdir)' '
 	EOF
 '
 
+# set variables outside test to avoid quote insanity; the \057 is '/',
+# which doesn't need quoting, but just confirms that de-quoting
+# is working.
+quoted='"one.git\057objects"'
+unquoted='two.git/objects'
+test_expect_success 'mix of quoted and unquoted alternates' '
+	check_obj "$quoted:$unquoted" <<-EOF
+	$one blob
+	$two blob
+'
+
+test_expect_success 'broken quoting falls back to interpreting raw' '
+	mv one.git \"one.git &&
+	check_obj \"one.git/objects <<-EOF
+	$one blob
+	EOF
+'
+
 test_done

From aae2ae4f74f91f434f7f5c3ac25f37d80a9b319e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Mon, 12 Dec 2016 14:53:55 -0500
Subject: [PATCH 2/4] tmp-objdir: quote paths we add to alternates

Commit 722ff7f87 (receive-pack: quarantine objects until
pre-receive accepts, 2016-10-03) regressed pushes to
repositories with colon (or semi-colon in Windows in them)
because it adds the repository's main object directory to
GIT_ALTERNATE_OBJECT_DIRECTORIES. The receiver interprets
the colon as a delimiter, not as part of the path, and
index-pack is unable to find objects which it needs to
resolve deltas.

The previous commit introduced a quoting mechanism for the
alternates list; let's use it here to cover this case. We'll
avoid quoting when we can, though. This alternate setup is
also used when calling hooks, so it's possible that the user
may call older git implementations which don't understand
the quoting mechanism. By quoting only when necessary, this
setup will continue to work unless the user _also_ has a
repository whose path contains the delimiter.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5547-push-quarantine.sh | 19 +++++++++++++++++++
 tmp-objdir.c               | 18 +++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/t/t5547-push-quarantine.sh b/t/t5547-push-quarantine.sh
index 1e5d32d068..6275ec807b 100755
--- a/t/t5547-push-quarantine.sh
+++ b/t/t5547-push-quarantine.sh
@@ -33,4 +33,23 @@ test_expect_success 'rejected objects are removed' '
 	test_cmp expect actual
 '
 
+# MINGW does not allow colons in pathnames in the first place
+test_expect_success !MINGW 'push to repo path with colon' '
+	# The interesting failure case here is when the
+	# receiving end cannot access its original object directory,
+	# so make it likely for us to generate a delta by having
+	# a non-trivial file with multiple versions.
+
+	test-genrandom foo 4096 >file.bin &&
+	git add file.bin &&
+	git commit -m bin &&
+	git clone --bare . xxx:yyy.git &&
+
+	echo change >>file.bin &&
+	git commit -am change &&
+	# Note that we have to use the full path here, or it gets confused
+	# with the ssh host:path syntax.
+	git push "$PWD/xxx:yyy.git" HEAD
+'
+
 test_done
diff --git a/tmp-objdir.c b/tmp-objdir.c
index 64435f23a4..b2d9280f10 100644
--- a/tmp-objdir.c
+++ b/tmp-objdir.c
@@ -5,6 +5,7 @@
 #include "string-list.h"
 #include "strbuf.h"
 #include "argv-array.h"
+#include "quote.h"
 
 struct tmp_objdir {
 	struct strbuf path;
@@ -79,12 +80,27 @@ static void remove_tmp_objdir_on_signal(int signo)
  */
 static void env_append(struct argv_array *env, const char *key, const char *val)
 {
-	const char *old = getenv(key);
+	struct strbuf quoted = STRBUF_INIT;
+	const char *old;
 
+	/*
+	 * Avoid quoting if it's not necessary, for maximum compatibility
+	 * with older parsers which don't understand the quoting.
+	 */
+	if (*val == '"' || strchr(val, PATH_SEP)) {
+		strbuf_addch(&quoted, '"');
+		quote_c_style(val, &quoted, NULL, 1);
+		strbuf_addch(&quoted, '"');
+		val = quoted.buf;
+	}
+
+	old = getenv(key);
 	if (!old)
 		argv_array_pushf(env, "%s=%s", key, val);
 	else
 		argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP, val);
+
+	strbuf_release(&quoted);
 }
 
 static void env_replace(struct argv_array *env, const char *key, const char *val)

From eaa76de0dff57f13a6fb7f791f3a486a407f5c27 Mon Sep 17 00:00:00 2001
From: Johannes Sixt <j6t@kdbg.org>
Date: Tue, 13 Dec 2016 20:09:31 +0100
Subject: [PATCH 3/4] t5547-push-quarantine: run the path separator test on
 Windows, too

To perform the test case on Windows in a way that corresponds to the
POSIX version, inject the semicolon in a directory name.

Typically, an absolute POSIX style path, such as the one in $PWD, is
translated into a Windows style path by bash when it invokes git.exe.
However, the presence of the semicolon suppresses this translation;
but the untranslated POSIX style path is useless for git.exe.
Therefore, instead of $PWD pass the Windows style path that $(pwd)
produces.

Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5547-push-quarantine.sh | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/t/t5547-push-quarantine.sh b/t/t5547-push-quarantine.sh
index 6275ec807b..af9fcd833a 100755
--- a/t/t5547-push-quarantine.sh
+++ b/t/t5547-push-quarantine.sh
@@ -33,8 +33,7 @@ test_expect_success 'rejected objects are removed' '
 	test_cmp expect actual
 '
 
-# MINGW does not allow colons in pathnames in the first place
-test_expect_success !MINGW 'push to repo path with colon' '
+test_expect_success 'push to repo path with path separator (colon)' '
 	# The interesting failure case here is when the
 	# receiving end cannot access its original object directory,
 	# so make it likely for us to generate a delta by having
@@ -43,13 +42,20 @@ test_expect_success !MINGW 'push to repo path with colon' '
 	test-genrandom foo 4096 >file.bin &&
 	git add file.bin &&
 	git commit -m bin &&
-	git clone --bare . xxx:yyy.git &&
+
+	if test_have_prereq MINGW
+	then
+		pathsep=";"
+	else
+		pathsep=":"
+	fi &&
+	git clone --bare . "xxx${pathsep}yyy.git" &&
 
 	echo change >>file.bin &&
 	git commit -am change &&
 	# Note that we have to use the full path here, or it gets confused
 	# with the ssh host:path syntax.
-	git push "$PWD/xxx:yyy.git" HEAD
+	git push "$(pwd)/xxx${pathsep}yyy.git" HEAD
 '
 
 test_done

From 5e74824fac646e2ebe335a00bcecd91641a7f7ca Mon Sep 17 00:00:00 2001
From: Johannes Sixt <j6t@kdbg.org>
Date: Wed, 21 Dec 2016 22:33:43 +0100
Subject: [PATCH 4/4] t5615-alternate-env: double-quotes in file names do not
 work on Windows

Protect a recently added test case with !MINGW.

Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5615-alternate-env.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t5615-alternate-env.sh b/t/t5615-alternate-env.sh
index c33d089980..79628db3ec 100755
--- a/t/t5615-alternate-env.sh
+++ b/t/t5615-alternate-env.sh
@@ -79,7 +79,7 @@ test_expect_success 'mix of quoted and unquoted alternates' '
 	$two blob
 '
 
-test_expect_success 'broken quoting falls back to interpreting raw' '
+test_expect_success !MINGW 'broken quoting falls back to interpreting raw' '
 	mv one.git \"one.git &&
 	check_obj \"one.git/objects <<-EOF
 	$one blob