Fix sparse warnings
Fix warnings from 'make check'.
- These files don't include 'builtin.h' causing sparse to complain that
cmd_* isn't declared:
builtin/clone.c:364, builtin/fetch-pack.c:797,
builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78,
builtin/merge-index.c:69, builtin/merge-recursive.c:22
builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426
builtin/notes.c:822, builtin/pack-redundant.c:596,
builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149,
builtin/remote.c:1512, builtin/remote-ext.c:240,
builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384,
builtin/unpack-file.c:25, builtin/var.c:75
- These files have symbols which should be marked static since they're
only file scope:
submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13,
submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79,
unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123,
url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48
- These files redeclare symbols to be different types:
builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571,
usage.c:49, usage.c:58, usage.c:63, usage.c:72
- These files use a literal integer 0 when they really should use a NULL
pointer:
daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362
While we're in the area, clean up some unused #includes in builtin files
(mostly exec_cmd.h).
Signed-off-by: Stephen Boyd <bebarino@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
#include "builtin.h"
|
|
|
|
#include "pkt-line.h"
|
|
|
|
#include "fetch-pack.h"
|
|
|
|
#include "remote.h"
|
|
|
|
#include "connect.h"
|
|
|
|
#include "sha1-array.h"
|
|
|
|
|
|
|
|
static const char fetch_pack_usage[] =
|
fetch-pack: new --stdin option to read refs from stdin
If a remote repo has too many tags (or branches), cloning it over the
smart HTTP transport can fail because remote-curl.c puts all the refs
from the remote repo on the fetch-pack command line. This can make the
command line longer than the global OS command line limit, causing
fetch-pack to fail.
This is especially a problem on Windows where the command line limit is
orders of magnitude shorter than Linux. There are already real repos out
there that msysGit cannot clone over smart HTTP due to this problem.
Here is an easy way to trigger this problem:
git init too-many-refs
cd too-many-refs
echo bla > bla.txt
git add .
git commit -m test
sha=$(git rev-parse HEAD)
tag=$(perl -e 'print "bla" x 30')
for i in `seq 50000`; do
echo $sha refs/tags/$tag-$i >> .git/packed-refs
done
Then share this repo over the smart HTTP protocol and try cloning it:
$ git clone http://localhost/.../too-many-refs/.git
Cloning into 'too-many-refs'...
fatal: cannot exec 'fetch-pack': Argument list too long
50k tags is obviously an absurd number, but it is required to
demonstrate the problem on Linux because it has a much more generous
command line limit. On Windows the clone fails with as little as 500
tags in the above loop, which is getting uncomfortably close to the
number of tags you might see in real long lived repos.
This is not just theoretical, msysGit is already failing to clone our
company repo due to this. It's a large repo converted from CVS, nearly
10 years of history.
Four possible solutions were discussed on the Git mailing list (in no
particular order):
1) Call fetch-pack multiple times with smaller batches of refs.
This was dismissed as inefficient and inelegant.
2) Add option --refs-fd=$n to pass a an fd from where to read the refs.
This was rejected because inheriting descriptors other than
stdin/stdout/stderr through exec() is apparently problematic on Windows,
plus it would require changes to the run-command API to open extra
pipes.
3) Add option --refs-from=$tmpfile to pass the refs using a temp file.
This was not favored because of the temp file requirement.
4) Add option --stdin to pass the refs on stdin, one per line.
In the end this option was chosen as the most efficient and most
desirable from scripting perspective.
There was however a small complication when using stdin to pass refs to
fetch-pack. The --stateless-rpc option to fetch-pack also uses stdin for
communication with the remote server.
If we are going to sneak refs on stdin line by line, it would have to be
done very carefully in the presence of --stateless-rpc, because when
reading refs line by line we might read ahead too much data into our
buffer and eat some of the remote protocol data which is also coming on
stdin.
One way to solve this would be to refactor get_remote_heads() in
fetch-pack.c to accept a residual buffer from our stdin line parsing
above, but this function is used in several places so other callers
would be burdened by this residual buffer interface even when most of
them don't need it.
In the end we settled on the following solution:
If --stdin is specified without --stateless-rpc, fetch-pack would read
the refs from stdin one per line, in a script friendly format.
However if --stdin is specified together with --stateless-rpc,
fetch-pack would read the refs from stdin in packetized format
(pkt-line) with a flush packet terminating the list of refs. This way we
can read the exact number of bytes that we need from stdin, and then
get_remote_heads() can continue reading from the same fd without losing
a single byte of remote protocol data.
This way the --stdin option only loses generality and scriptability when
used together with --stateless-rpc, which is not easily scriptable
anyway because it also uses pkt-line when talking to the remote server.
Signed-off-by: Ivan Todoroski <grnch@gmx.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
"git fetch-pack [--all] [--stdin] [--quiet|-q] [--keep|-k] [--thin] "
|
|
|
|
"[--include-tag] [--upload-pack=<git-upload-pack>] [--depth=<n>] "
|
|
|
|
"[--no-progress] [--diag-url] [-v] [<host>:]<directory> [<refs>...]";
|
|
|
|
|
|
|
|
static void add_sought_entry_mem(struct ref ***sought, int *nr, int *alloc,
|
|
|
|
const char *name, int namelen)
|
|
|
|
{
|
|
|
|
struct ref *ref = xcalloc(1, sizeof(*ref) + namelen + 1);
|
|
|
|
unsigned char sha1[20];
|
|
|
|
|
|
|
|
if (namelen > 41 && name[40] == ' ' && !get_sha1_hex(name, sha1)) {
|
|
|
|
hashcpy(ref->old_sha1, sha1);
|
|
|
|
name += 41;
|
|
|
|
namelen -= 41;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(ref->name, name, namelen);
|
|
|
|
ref->name[namelen] = '\0';
|
|
|
|
(*nr)++;
|
|
|
|
ALLOC_GROW(*sought, *nr, *alloc);
|
|
|
|
(*sought)[*nr - 1] = ref;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_sought_entry(struct ref ***sought, int *nr, int *alloc,
|
|
|
|
const char *string)
|
|
|
|
{
|
|
|
|
add_sought_entry_mem(sought, nr, alloc, string, strlen(string));
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
int i, ret;
|
|
|
|
struct ref *ref = NULL;
|
|
|
|
const char *dest = NULL;
|
|
|
|
struct ref **sought = NULL;
|
|
|
|
int nr_sought = 0, alloc_sought = 0;
|
|
|
|
int fd[2];
|
|
|
|
char *pack_lockfile = NULL;
|
|
|
|
char **pack_lockfile_ptr = NULL;
|
|
|
|
struct child_process *conn;
|
|
|
|
struct fetch_pack_args args;
|
|
|
|
struct sha1_array shallow = SHA1_ARRAY_INIT;
|
|
|
|
|
|
|
|
packet_trace_identity("fetch-pack");
|
|
|
|
|
|
|
|
memset(&args, 0, sizeof(args));
|
|
|
|
args.uploadpack = "git-upload-pack";
|
|
|
|
|
|
|
|
for (i = 1; i < argc && *argv[i] == '-'; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
|
|
|
|
if (starts_with(arg, "--upload-pack=")) {
|
|
|
|
args.uploadpack = arg + 14;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (starts_with(arg, "--exec=")) {
|
|
|
|
args.uploadpack = arg + 7;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--quiet", arg) || !strcmp("-q", arg)) {
|
|
|
|
args.quiet = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--keep", arg) || !strcmp("-k", arg)) {
|
|
|
|
args.lock_pack = args.keep_pack;
|
|
|
|
args.keep_pack = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--thin", arg)) {
|
|
|
|
args.use_thin_pack = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--include-tag", arg)) {
|
|
|
|
args.include_tag = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--all", arg)) {
|
|
|
|
args.fetch_all = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--stdin", arg)) {
|
|
|
|
args.stdin_refs = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--diag-url", arg)) {
|
|
|
|
args.diag_url = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("-v", arg)) {
|
|
|
|
args.verbose = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (starts_with(arg, "--depth=")) {
|
|
|
|
args.depth = strtol(arg + 8, NULL, 0);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--no-progress", arg)) {
|
|
|
|
args.no_progress = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--stateless-rpc", arg)) {
|
|
|
|
args.stateless_rpc = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--lock-pack", arg)) {
|
|
|
|
args.lock_pack = 1;
|
|
|
|
pack_lockfile_ptr = &pack_lockfile;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--check-self-contained-and-connected", arg)) {
|
|
|
|
args.check_self_contained_and_connected = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--cloning", arg)) {
|
|
|
|
args.cloning = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp("--update-shallow", arg)) {
|
|
|
|
args.update_shallow = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
usage(fetch_pack_usage);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i < argc)
|
|
|
|
dest = argv[i++];
|
|
|
|
else
|
|
|
|
usage(fetch_pack_usage);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy refs from cmdline to growable list, then append any
|
|
|
|
* refs from the standard input:
|
|
|
|
*/
|
|
|
|
for (; i < argc; i++)
|
|
|
|
add_sought_entry(&sought, &nr_sought, &alloc_sought, argv[i]);
|
fetch-pack: new --stdin option to read refs from stdin
If a remote repo has too many tags (or branches), cloning it over the
smart HTTP transport can fail because remote-curl.c puts all the refs
from the remote repo on the fetch-pack command line. This can make the
command line longer than the global OS command line limit, causing
fetch-pack to fail.
This is especially a problem on Windows where the command line limit is
orders of magnitude shorter than Linux. There are already real repos out
there that msysGit cannot clone over smart HTTP due to this problem.
Here is an easy way to trigger this problem:
git init too-many-refs
cd too-many-refs
echo bla > bla.txt
git add .
git commit -m test
sha=$(git rev-parse HEAD)
tag=$(perl -e 'print "bla" x 30')
for i in `seq 50000`; do
echo $sha refs/tags/$tag-$i >> .git/packed-refs
done
Then share this repo over the smart HTTP protocol and try cloning it:
$ git clone http://localhost/.../too-many-refs/.git
Cloning into 'too-many-refs'...
fatal: cannot exec 'fetch-pack': Argument list too long
50k tags is obviously an absurd number, but it is required to
demonstrate the problem on Linux because it has a much more generous
command line limit. On Windows the clone fails with as little as 500
tags in the above loop, which is getting uncomfortably close to the
number of tags you might see in real long lived repos.
This is not just theoretical, msysGit is already failing to clone our
company repo due to this. It's a large repo converted from CVS, nearly
10 years of history.
Four possible solutions were discussed on the Git mailing list (in no
particular order):
1) Call fetch-pack multiple times with smaller batches of refs.
This was dismissed as inefficient and inelegant.
2) Add option --refs-fd=$n to pass a an fd from where to read the refs.
This was rejected because inheriting descriptors other than
stdin/stdout/stderr through exec() is apparently problematic on Windows,
plus it would require changes to the run-command API to open extra
pipes.
3) Add option --refs-from=$tmpfile to pass the refs using a temp file.
This was not favored because of the temp file requirement.
4) Add option --stdin to pass the refs on stdin, one per line.
In the end this option was chosen as the most efficient and most
desirable from scripting perspective.
There was however a small complication when using stdin to pass refs to
fetch-pack. The --stateless-rpc option to fetch-pack also uses stdin for
communication with the remote server.
If we are going to sneak refs on stdin line by line, it would have to be
done very carefully in the presence of --stateless-rpc, because when
reading refs line by line we might read ahead too much data into our
buffer and eat some of the remote protocol data which is also coming on
stdin.
One way to solve this would be to refactor get_remote_heads() in
fetch-pack.c to accept a residual buffer from our stdin line parsing
above, but this function is used in several places so other callers
would be burdened by this residual buffer interface even when most of
them don't need it.
In the end we settled on the following solution:
If --stdin is specified without --stateless-rpc, fetch-pack would read
the refs from stdin one per line, in a script friendly format.
However if --stdin is specified together with --stateless-rpc,
fetch-pack would read the refs from stdin in packetized format
(pkt-line) with a flush packet terminating the list of refs. This way we
can read the exact number of bytes that we need from stdin, and then
get_remote_heads() can continue reading from the same fd without losing
a single byte of remote protocol data.
This way the --stdin option only loses generality and scriptability when
used together with --stateless-rpc, which is not easily scriptable
anyway because it also uses pkt-line when talking to the remote server.
Signed-off-by: Ivan Todoroski <grnch@gmx.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
if (args.stdin_refs) {
|
|
|
|
if (args.stateless_rpc) {
|
|
|
|
/* in stateless RPC mode we use pkt-line to read
|
|
|
|
* from stdin, until we get a flush packet
|
|
|
|
*/
|
|
|
|
for (;;) {
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
char *line = packet_read_line(0, NULL);
|
|
|
|
if (!line)
|
fetch-pack: new --stdin option to read refs from stdin
If a remote repo has too many tags (or branches), cloning it over the
smart HTTP transport can fail because remote-curl.c puts all the refs
from the remote repo on the fetch-pack command line. This can make the
command line longer than the global OS command line limit, causing
fetch-pack to fail.
This is especially a problem on Windows where the command line limit is
orders of magnitude shorter than Linux. There are already real repos out
there that msysGit cannot clone over smart HTTP due to this problem.
Here is an easy way to trigger this problem:
git init too-many-refs
cd too-many-refs
echo bla > bla.txt
git add .
git commit -m test
sha=$(git rev-parse HEAD)
tag=$(perl -e 'print "bla" x 30')
for i in `seq 50000`; do
echo $sha refs/tags/$tag-$i >> .git/packed-refs
done
Then share this repo over the smart HTTP protocol and try cloning it:
$ git clone http://localhost/.../too-many-refs/.git
Cloning into 'too-many-refs'...
fatal: cannot exec 'fetch-pack': Argument list too long
50k tags is obviously an absurd number, but it is required to
demonstrate the problem on Linux because it has a much more generous
command line limit. On Windows the clone fails with as little as 500
tags in the above loop, which is getting uncomfortably close to the
number of tags you might see in real long lived repos.
This is not just theoretical, msysGit is already failing to clone our
company repo due to this. It's a large repo converted from CVS, nearly
10 years of history.
Four possible solutions were discussed on the Git mailing list (in no
particular order):
1) Call fetch-pack multiple times with smaller batches of refs.
This was dismissed as inefficient and inelegant.
2) Add option --refs-fd=$n to pass a an fd from where to read the refs.
This was rejected because inheriting descriptors other than
stdin/stdout/stderr through exec() is apparently problematic on Windows,
plus it would require changes to the run-command API to open extra
pipes.
3) Add option --refs-from=$tmpfile to pass the refs using a temp file.
This was not favored because of the temp file requirement.
4) Add option --stdin to pass the refs on stdin, one per line.
In the end this option was chosen as the most efficient and most
desirable from scripting perspective.
There was however a small complication when using stdin to pass refs to
fetch-pack. The --stateless-rpc option to fetch-pack also uses stdin for
communication with the remote server.
If we are going to sneak refs on stdin line by line, it would have to be
done very carefully in the presence of --stateless-rpc, because when
reading refs line by line we might read ahead too much data into our
buffer and eat some of the remote protocol data which is also coming on
stdin.
One way to solve this would be to refactor get_remote_heads() in
fetch-pack.c to accept a residual buffer from our stdin line parsing
above, but this function is used in several places so other callers
would be burdened by this residual buffer interface even when most of
them don't need it.
In the end we settled on the following solution:
If --stdin is specified without --stateless-rpc, fetch-pack would read
the refs from stdin one per line, in a script friendly format.
However if --stdin is specified together with --stateless-rpc,
fetch-pack would read the refs from stdin in packetized format
(pkt-line) with a flush packet terminating the list of refs. This way we
can read the exact number of bytes that we need from stdin, and then
get_remote_heads() can continue reading from the same fd without losing
a single byte of remote protocol data.
This way the --stdin option only loses generality and scriptability when
used together with --stateless-rpc, which is not easily scriptable
anyway because it also uses pkt-line when talking to the remote server.
Signed-off-by: Ivan Todoroski <grnch@gmx.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
break;
|
|
|
|
add_sought_entry(&sought, &nr_sought, &alloc_sought, line);
|
fetch-pack: new --stdin option to read refs from stdin
If a remote repo has too many tags (or branches), cloning it over the
smart HTTP transport can fail because remote-curl.c puts all the refs
from the remote repo on the fetch-pack command line. This can make the
command line longer than the global OS command line limit, causing
fetch-pack to fail.
This is especially a problem on Windows where the command line limit is
orders of magnitude shorter than Linux. There are already real repos out
there that msysGit cannot clone over smart HTTP due to this problem.
Here is an easy way to trigger this problem:
git init too-many-refs
cd too-many-refs
echo bla > bla.txt
git add .
git commit -m test
sha=$(git rev-parse HEAD)
tag=$(perl -e 'print "bla" x 30')
for i in `seq 50000`; do
echo $sha refs/tags/$tag-$i >> .git/packed-refs
done
Then share this repo over the smart HTTP protocol and try cloning it:
$ git clone http://localhost/.../too-many-refs/.git
Cloning into 'too-many-refs'...
fatal: cannot exec 'fetch-pack': Argument list too long
50k tags is obviously an absurd number, but it is required to
demonstrate the problem on Linux because it has a much more generous
command line limit. On Windows the clone fails with as little as 500
tags in the above loop, which is getting uncomfortably close to the
number of tags you might see in real long lived repos.
This is not just theoretical, msysGit is already failing to clone our
company repo due to this. It's a large repo converted from CVS, nearly
10 years of history.
Four possible solutions were discussed on the Git mailing list (in no
particular order):
1) Call fetch-pack multiple times with smaller batches of refs.
This was dismissed as inefficient and inelegant.
2) Add option --refs-fd=$n to pass a an fd from where to read the refs.
This was rejected because inheriting descriptors other than
stdin/stdout/stderr through exec() is apparently problematic on Windows,
plus it would require changes to the run-command API to open extra
pipes.
3) Add option --refs-from=$tmpfile to pass the refs using a temp file.
This was not favored because of the temp file requirement.
4) Add option --stdin to pass the refs on stdin, one per line.
In the end this option was chosen as the most efficient and most
desirable from scripting perspective.
There was however a small complication when using stdin to pass refs to
fetch-pack. The --stateless-rpc option to fetch-pack also uses stdin for
communication with the remote server.
If we are going to sneak refs on stdin line by line, it would have to be
done very carefully in the presence of --stateless-rpc, because when
reading refs line by line we might read ahead too much data into our
buffer and eat some of the remote protocol data which is also coming on
stdin.
One way to solve this would be to refactor get_remote_heads() in
fetch-pack.c to accept a residual buffer from our stdin line parsing
above, but this function is used in several places so other callers
would be burdened by this residual buffer interface even when most of
them don't need it.
In the end we settled on the following solution:
If --stdin is specified without --stateless-rpc, fetch-pack would read
the refs from stdin one per line, in a script friendly format.
However if --stdin is specified together with --stateless-rpc,
fetch-pack would read the refs from stdin in packetized format
(pkt-line) with a flush packet terminating the list of refs. This way we
can read the exact number of bytes that we need from stdin, and then
get_remote_heads() can continue reading from the same fd without losing
a single byte of remote protocol data.
This way the --stdin option only loses generality and scriptability when
used together with --stateless-rpc, which is not easily scriptable
anyway because it also uses pkt-line when talking to the remote server.
Signed-off-by: Ivan Todoroski <grnch@gmx.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* read from stdin one ref per line, until EOF */
|
|
|
|
struct strbuf line = STRBUF_INIT;
|
|
|
|
while (strbuf_getline(&line, stdin, '\n') != EOF)
|
|
|
|
add_sought_entry(&sought, &nr_sought, &alloc_sought, line.buf);
|
fetch-pack: new --stdin option to read refs from stdin
If a remote repo has too many tags (or branches), cloning it over the
smart HTTP transport can fail because remote-curl.c puts all the refs
from the remote repo on the fetch-pack command line. This can make the
command line longer than the global OS command line limit, causing
fetch-pack to fail.
This is especially a problem on Windows where the command line limit is
orders of magnitude shorter than Linux. There are already real repos out
there that msysGit cannot clone over smart HTTP due to this problem.
Here is an easy way to trigger this problem:
git init too-many-refs
cd too-many-refs
echo bla > bla.txt
git add .
git commit -m test
sha=$(git rev-parse HEAD)
tag=$(perl -e 'print "bla" x 30')
for i in `seq 50000`; do
echo $sha refs/tags/$tag-$i >> .git/packed-refs
done
Then share this repo over the smart HTTP protocol and try cloning it:
$ git clone http://localhost/.../too-many-refs/.git
Cloning into 'too-many-refs'...
fatal: cannot exec 'fetch-pack': Argument list too long
50k tags is obviously an absurd number, but it is required to
demonstrate the problem on Linux because it has a much more generous
command line limit. On Windows the clone fails with as little as 500
tags in the above loop, which is getting uncomfortably close to the
number of tags you might see in real long lived repos.
This is not just theoretical, msysGit is already failing to clone our
company repo due to this. It's a large repo converted from CVS, nearly
10 years of history.
Four possible solutions were discussed on the Git mailing list (in no
particular order):
1) Call fetch-pack multiple times with smaller batches of refs.
This was dismissed as inefficient and inelegant.
2) Add option --refs-fd=$n to pass a an fd from where to read the refs.
This was rejected because inheriting descriptors other than
stdin/stdout/stderr through exec() is apparently problematic on Windows,
plus it would require changes to the run-command API to open extra
pipes.
3) Add option --refs-from=$tmpfile to pass the refs using a temp file.
This was not favored because of the temp file requirement.
4) Add option --stdin to pass the refs on stdin, one per line.
In the end this option was chosen as the most efficient and most
desirable from scripting perspective.
There was however a small complication when using stdin to pass refs to
fetch-pack. The --stateless-rpc option to fetch-pack also uses stdin for
communication with the remote server.
If we are going to sneak refs on stdin line by line, it would have to be
done very carefully in the presence of --stateless-rpc, because when
reading refs line by line we might read ahead too much data into our
buffer and eat some of the remote protocol data which is also coming on
stdin.
One way to solve this would be to refactor get_remote_heads() in
fetch-pack.c to accept a residual buffer from our stdin line parsing
above, but this function is used in several places so other callers
would be burdened by this residual buffer interface even when most of
them don't need it.
In the end we settled on the following solution:
If --stdin is specified without --stateless-rpc, fetch-pack would read
the refs from stdin one per line, in a script friendly format.
However if --stdin is specified together with --stateless-rpc,
fetch-pack would read the refs from stdin in packetized format
(pkt-line) with a flush packet terminating the list of refs. This way we
can read the exact number of bytes that we need from stdin, and then
get_remote_heads() can continue reading from the same fd without losing
a single byte of remote protocol data.
This way the --stdin option only loses generality and scriptability when
used together with --stateless-rpc, which is not easily scriptable
anyway because it also uses pkt-line when talking to the remote server.
Signed-off-by: Ivan Todoroski <grnch@gmx.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
strbuf_release(&line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (args.stateless_rpc) {
|
|
|
|
conn = NULL;
|
|
|
|
fd[0] = 0;
|
|
|
|
fd[1] = 1;
|
|
|
|
} else {
|
|
|
|
int flags = args.verbose ? CONNECT_VERBOSE : 0;
|
|
|
|
if (args.diag_url)
|
|
|
|
flags |= CONNECT_DIAG_URL;
|
|
|
|
conn = git_connect(fd, dest, args.uploadpack,
|
|
|
|
flags);
|
|
|
|
if (!conn)
|
|
|
|
return args.diag_url ? 0 : 1;
|
|
|
|
}
|
|
|
|
get_remote_heads(fd[0], NULL, 0, &ref, 0, NULL, &shallow);
|
|
|
|
|
|
|
|
ref = fetch_pack(&args, fd, conn, ref, dest, sought, nr_sought,
|
|
|
|
&shallow, pack_lockfile_ptr);
|
|
|
|
if (pack_lockfile) {
|
|
|
|
printf("lock %s\n", pack_lockfile);
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
|
|
|
if (args.check_self_contained_and_connected &&
|
|
|
|
args.self_contained_and_connected) {
|
|
|
|
printf("connectivity-ok\n");
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
|
|
|
close(fd[0]);
|
|
|
|
close(fd[1]);
|
|
|
|
if (finish_connect(conn))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
ret = !ref;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the heads to pull were given, we should have consumed
|
|
|
|
* all of them by matching the remote. Otherwise, 'git fetch
|
|
|
|
* remote no-such-ref' would silently succeed without issuing
|
|
|
|
* an error.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nr_sought; i++) {
|
|
|
|
if (!sought[i] || sought[i]->matched)
|
|
|
|
continue;
|
|
|
|
error("no such remote ref %s", sought[i]->name);
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (ref) {
|
|
|
|
printf("%s %s\n",
|
|
|
|
sha1_to_hex(ref->old_sha1), ref->name);
|
|
|
|
ref = ref->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|