You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1658 lines
44 KiB

#include "cache.h"
#include "repository.h"
#include "config.h"
#include "lockfile.h"
#include "refs.h"
#include "pkt-line.h"
#include "commit.h"
#include "tag.h"
#include "exec-cmd.h"
#include "pack.h"
#include "sideband.h"
#include "fetch-pack.h"
#include "remote.h"
#include "run-command.h"
#include "connect.h"
#include "transport.h"
#include "version.h"
#include "sha1-array.h"
#include "oidset.h"
#include "packfile.h"
#include "object-store.h"
fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
#include "connected.h"
#include "fetch-negotiator.h"
fetch: implement fetch.fsck.* Implement support for fetch.fsck.* corresponding with the existing receive.fsck.*. This allows for pedantically cloning repositories with specific issues without turning off fetch.fsckObjects. One such repository is https://github.com/robbyrussell/oh-my-zsh.git which before this change will emit this error when cloned with fetch.fsckObjects: error: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes fatal: Error in object fatal: index-pack failed Now with fetch.fsck.zeroPaddedFilemode=warn we'll warn about that issue, but the clone will succeed: warning: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes warning: object a18c4d13c2a5fa2d4ecd5346c50e119b999b807d: zeroPaddedFilemode: contains zero-padded file modes warning: object 84df066176c8da3fd59b13731a86d90f4f1e5c9d: zeroPaddedFilemode: contains zero-padded file modes The motivation for this is to be able to turn on fetch.fsckObjects globally across a fleet of computers but still be able to manually clone various legacy repositories by either white-listing specific issues, or better yet whitelist specific objects. The use of --git-dir=* instead of -C in the tests could be considered somewhat archaic, but the tests I'm adding here are duplicating the corresponding receive.* tests with as few changes as possible. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
#include "fsck.h"
static int transfer_unpack_limit = -1;
static int fetch_unpack_limit = -1;
static int unpack_limit = 100;
static int prefer_ofs_delta = 1;
static int no_done;
static int deepen_since_ok;
static int deepen_not_ok;
static int fetch_fsck_objects = -1;
static int transfer_fsck_objects = -1;
static int agent_supported;
static int server_supports_filtering;
static struct lock_file shallow_lock;
static const char *alternate_shallow_file;
static char *negotiation_algorithm;
fetch: implement fetch.fsck.* Implement support for fetch.fsck.* corresponding with the existing receive.fsck.*. This allows for pedantically cloning repositories with specific issues without turning off fetch.fsckObjects. One such repository is https://github.com/robbyrussell/oh-my-zsh.git which before this change will emit this error when cloned with fetch.fsckObjects: error: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes fatal: Error in object fatal: index-pack failed Now with fetch.fsck.zeroPaddedFilemode=warn we'll warn about that issue, but the clone will succeed: warning: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes warning: object a18c4d13c2a5fa2d4ecd5346c50e119b999b807d: zeroPaddedFilemode: contains zero-padded file modes warning: object 84df066176c8da3fd59b13731a86d90f4f1e5c9d: zeroPaddedFilemode: contains zero-padded file modes The motivation for this is to be able to turn on fetch.fsckObjects globally across a fleet of computers but still be able to manually clone various legacy repositories by either white-listing specific issues, or better yet whitelist specific objects. The use of --git-dir=* instead of -C in the tests could be considered somewhat archaic, but the tests I'm adding here are duplicating the corresponding receive.* tests with as few changes as possible. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
static struct strbuf fsck_msg_types = STRBUF_INIT;
/* Remember to update object flag allocation in object.h */
#define COMPLETE (1U << 0)
#define ALTERNATE (1U << 1)
/*
* After sending this many "have"s if we do not get any new ACK , we
* give up traversing our history.
*/
#define MAX_IN_VAIN 256
static int multi_ack, use_sideband;
/* Allow specifying sha1 if it is a ref tip. */
#define ALLOW_TIP_SHA1 01
/* Allow request of a sha1 if it is reachable from a ref (possibly hidden ref). */
#define ALLOW_REACHABLE_SHA1 02
static unsigned int allow_unadvertised_object_request;
__attribute__((format (printf, 2, 3)))
static inline void print_verbose(const struct fetch_pack_args *args,
const char *fmt, ...)
{
va_list params;
if (!args->verbose)
return;
va_start(params, fmt);
vfprintf(stderr, fmt, params);
va_end(params);
fputc('\n', stderr);
}
fetch-pack: cache results of for_each_alternate_ref We may run for_each_alternate_ref() twice, once in find_common() and once in everything_local(). This operation can be expensive, because it involves running a sub-process which must freshly load all of the alternate's refs from disk. Let's cache and reuse the results between the two calls. We can make some optimizations based on the particular use pattern in fetch-pack to keep our memory usage down. The first is that we only care about the sha1s, not the refs themselves. So it's OK to store only the sha1s, and to suppress duplicates. The natural fit would therefore be a sha1_array. However, sha1_array's de-duplication happens only after it has read and sorted all entries. It still stores each duplicate. For an alternate with a large number of refs pointing to the same commits, this is a needless expense. Instead, we'd prefer to eliminate duplicates before putting them in the cache, which implies using a hash. We can further note that fetch-pack will call parse_object() on each alternate sha1. We can therefore keep our cache as a set of pointers to "struct object". That gives us a place to put our "already seen" bit with an optimized hash lookup. And as a bonus, the object stores the sha1 for us, so pointer-to-object is all we need. There are two extra optimizations I didn't do here: - we actually store an array of pointer-to-object. Technically we could just walk the obj_hash table looking for entries with the ALTERNATE flag set (because our use case doesn't care about the order here). But that hash table may be mostly composed of non-ALTERNATE entries, so we'd waste time walking over them. So it would be a slight win in memory use, but a loss in CPU. - the items we pull out of the cache are actual "struct object"s, but then we feed "obj->sha1" to our sub-functions, which promptly call parse_object(). This second parse is cheap, because it starts with lookup_object() and will bail immediately when it sees we've already parsed the object. We could save the extra hash lookup, but it would involve refactoring the functions we call. It may or may not be worth the trouble. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
struct alternate_object_cache {
struct object **items;
size_t nr, alloc;
};
static void cache_one_alternate(const char *refname,
const struct object_id *oid,
void *vcache)
{
struct alternate_object_cache *cache = vcache;
struct object *obj = parse_object(the_repository, oid);
fetch-pack: cache results of for_each_alternate_ref We may run for_each_alternate_ref() twice, once in find_common() and once in everything_local(). This operation can be expensive, because it involves running a sub-process which must freshly load all of the alternate's refs from disk. Let's cache and reuse the results between the two calls. We can make some optimizations based on the particular use pattern in fetch-pack to keep our memory usage down. The first is that we only care about the sha1s, not the refs themselves. So it's OK to store only the sha1s, and to suppress duplicates. The natural fit would therefore be a sha1_array. However, sha1_array's de-duplication happens only after it has read and sorted all entries. It still stores each duplicate. For an alternate with a large number of refs pointing to the same commits, this is a needless expense. Instead, we'd prefer to eliminate duplicates before putting them in the cache, which implies using a hash. We can further note that fetch-pack will call parse_object() on each alternate sha1. We can therefore keep our cache as a set of pointers to "struct object". That gives us a place to put our "already seen" bit with an optimized hash lookup. And as a bonus, the object stores the sha1 for us, so pointer-to-object is all we need. There are two extra optimizations I didn't do here: - we actually store an array of pointer-to-object. Technically we could just walk the obj_hash table looking for entries with the ALTERNATE flag set (because our use case doesn't care about the order here). But that hash table may be mostly composed of non-ALTERNATE entries, so we'd waste time walking over them. So it would be a slight win in memory use, but a loss in CPU. - the items we pull out of the cache are actual "struct object"s, but then we feed "obj->sha1" to our sub-functions, which promptly call parse_object(). This second parse is cheap, because it starts with lookup_object() and will bail immediately when it sees we've already parsed the object. We could save the extra hash lookup, but it would involve refactoring the functions we call. It may or may not be worth the trouble. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
if (!obj || (obj->flags & ALTERNATE))
return;
obj->flags |= ALTERNATE;
ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc);
cache->items[cache->nr++] = obj;
}
static void for_each_cached_alternate(struct fetch_negotiator *negotiator,
void (*cb)(struct fetch_negotiator *,
struct object *))
fetch-pack: cache results of for_each_alternate_ref We may run for_each_alternate_ref() twice, once in find_common() and once in everything_local(). This operation can be expensive, because it involves running a sub-process which must freshly load all of the alternate's refs from disk. Let's cache and reuse the results between the two calls. We can make some optimizations based on the particular use pattern in fetch-pack to keep our memory usage down. The first is that we only care about the sha1s, not the refs themselves. So it's OK to store only the sha1s, and to suppress duplicates. The natural fit would therefore be a sha1_array. However, sha1_array's de-duplication happens only after it has read and sorted all entries. It still stores each duplicate. For an alternate with a large number of refs pointing to the same commits, this is a needless expense. Instead, we'd prefer to eliminate duplicates before putting them in the cache, which implies using a hash. We can further note that fetch-pack will call parse_object() on each alternate sha1. We can therefore keep our cache as a set of pointers to "struct object". That gives us a place to put our "already seen" bit with an optimized hash lookup. And as a bonus, the object stores the sha1 for us, so pointer-to-object is all we need. There are two extra optimizations I didn't do here: - we actually store an array of pointer-to-object. Technically we could just walk the obj_hash table looking for entries with the ALTERNATE flag set (because our use case doesn't care about the order here). But that hash table may be mostly composed of non-ALTERNATE entries, so we'd waste time walking over them. So it would be a slight win in memory use, but a loss in CPU. - the items we pull out of the cache are actual "struct object"s, but then we feed "obj->sha1" to our sub-functions, which promptly call parse_object(). This second parse is cheap, because it starts with lookup_object() and will bail immediately when it sees we've already parsed the object. We could save the extra hash lookup, but it would involve refactoring the functions we call. It may or may not be worth the trouble. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
{
static int initialized;
static struct alternate_object_cache cache;
size_t i;
if (!initialized) {
for_each_alternate_ref(cache_one_alternate, &cache);
initialized = 1;
}
for (i = 0; i < cache.nr; i++)
cb(negotiator, cache.items[i]);
}
static int rev_list_insert_ref(struct fetch_negotiator *negotiator,
const char *refname,
const struct object_id *oid)
{
struct object *o = deref_tag(the_repository,
parse_object(the_repository, oid),
refname, 0);
if (o && o->type == OBJ_COMMIT)
negotiator->add_tip(negotiator, (struct commit *)o);
return 0;
}
static int rev_list_insert_ref_oid(const char *refname, const struct object_id *oid,
int flag, void *cb_data)
{
return rev_list_insert_ref(cb_data, refname, oid);
}
enum ack_type {
NAK = 0,
ACK,
ACK_continue,
ACK_common,
ACK_ready
};
static void consume_shallow_list(struct fetch_pack_args *args, int fd)
{
if (args->stateless_rpc && args->deepen) {
/* If we sent a depth we will get back "duplicate"
* shallow and unshallow commands every time there
* is a block of have lines exchanged.
*/
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
char *line;
while ((line = packet_read_line(fd, NULL))) {
if (starts_with(line, "shallow "))
continue;
if (starts_with(line, "unshallow "))
continue;
die(_("git fetch-pack: expected shallow list"));
}
}
}
static enum ack_type get_ack(int fd, struct object_id *result_oid)
{
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
int len;
char *line = packet_read_line(fd, &len);
const char *arg;
if (!line)
die(_("git fetch-pack: expected ACK/NAK, got a flush packet"));
if (!strcmp(line, "NAK"))
return NAK;
if (skip_prefix(line, "ACK ", &arg)) {
if (!get_oid_hex(arg, result_oid)) {
arg += 40;
len -= arg - line;
if (len < 1)
return ACK;
if (strstr(arg, "continue"))
return ACK_continue;
if (strstr(arg, "common"))
return ACK_common;
if (strstr(arg, "ready"))
return ACK_ready;
return ACK;
}
}
if (skip_prefix(line, "ERR ", &arg))
die(_("remote error: %s"), arg);
die(_("git fetch-pack: expected ACK/NAK, got '%s'"), line);
}
static void send_request(struct fetch_pack_args *args,
int fd, struct strbuf *buf)
{
if (args->stateless_rpc) {
send_sideband(fd, -1, buf->buf, buf->len, LARGE_PACKET_MAX);
packet_flush(fd);
} else
write_or_die(fd, buf->buf, buf->len);
}
static void insert_one_alternate_object(struct fetch_negotiator *negotiator,
struct object *obj)
{
rev_list_insert_ref(negotiator, NULL, &obj->oid);
}
#define INITIAL_FLUSH 16
#define PIPESAFE_FLUSH 32
#define LARGE_FLUSH 16384
static int next_flush(int stateless_rpc, int count)
{
if (stateless_rpc) {
if (count < LARGE_FLUSH)
count <<= 1;
else
count = count * 11 / 10;
} else {
if (count < PIPESAFE_FLUSH)
count <<= 1;
else
count += PIPESAFE_FLUSH;
}
return count;
}
static void mark_tips(struct fetch_negotiator *negotiator,
const struct oid_array *negotiation_tips)
{
int i;
if (!negotiation_tips) {
for_each_ref(rev_list_insert_ref_oid, negotiator);
return;
}
for (i = 0; i < negotiation_tips->nr; i++)
rev_list_insert_ref(negotiator, NULL,
&negotiation_tips->oid[i]);
return;
}
static int find_common(struct fetch_negotiator *negotiator,
struct fetch_pack_args *args,
int fd[2], struct object_id *result_oid,
struct ref *refs)
{
int fetching;
int count = 0, flushes = 0, flush_at = INITIAL_FLUSH, retval;
const struct object_id *oid;
unsigned in_vain = 0;
int got_continue = 0;
int got_ready = 0;
struct strbuf req_buf = STRBUF_INIT;
size_t state_len = 0;
if (args->stateless_rpc && multi_ack == 1)
die(_("--stateless-rpc requires multi_ack_detailed"));
mark_tips(negotiator, args->negotiation_tips);
for_each_cached_alternate(negotiator, insert_one_alternate_object);
fetching = 0;
for ( ; refs ; refs = refs->next) {
struct object_id *remote = &refs->old_oid;
const char *remote_hex;
struct object *o;
/*
* If that object is complete (i.e. it is an ancestor of a
* local ref), we tell them we have it but do not have to
* tell them about its ancestors, which they already know
* about.
*
* We use lookup_object here because we are only
* interested in the case we *know* the object is
* reachable and we have already scanned it.
*/
if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
(o->flags & COMPLETE)) {
continue;
}
remote_hex = oid_to_hex(remote);
if (!fetching) {
struct strbuf c = STRBUF_INIT;
if (multi_ack == 2) strbuf_addstr(&c, " multi_ack_detailed");
if (multi_ack == 1) strbuf_addstr(&c, " multi_ack");
if (no_done) strbuf_addstr(&c, " no-done");
if (use_sideband == 2) strbuf_addstr(&c, " side-band-64k");
if (use_sideband == 1) strbuf_addstr(&c, " side-band");
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
if (args->deepen_relative) strbuf_addstr(&c, " deepen-relative");
if (args->use_thin_pack) strbuf_addstr(&c, " thin-pack");
if (args->no_progress) strbuf_addstr(&c, " no-progress");
if (args->include_tag) strbuf_addstr(&c, " include-tag");
if (prefer_ofs_delta) strbuf_addstr(&c, " ofs-delta");
if (deepen_since_ok) strbuf_addstr(&c, " deepen-since");
if (deepen_not_ok) strbuf_addstr(&c, " deepen-not");
if (agent_supported) strbuf_addf(&c, " agent=%s",
git_user_agent_sanitized());
if (args->filter_options.choice)
strbuf_addstr(&c, " filter");
packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf);
strbuf_release(&c);
} else
packet_buf_write(&req_buf, "want %s\n", remote_hex);
fetching++;
}
if (!fetching) {
strbuf_release(&req_buf);
packet_flush(fd[1]);
return 1;
}
if (is_repository_shallow(the_repository))
write_shallow_commits(&req_buf, 1, NULL);
if (args->depth > 0)
packet_buf_write(&req_buf, "deepen %d", args->depth);
if (args->deepen_since) {
timestamp_t max_age = approxidate(args->deepen_since);
packet_buf_write(&req_buf, "deepen-since %"PRItime, max_age);
}
if (args->deepen_not) {
int i;
for (i = 0; i < args->deepen_not->nr; i++) {
struct string_list_item *s = args->deepen_not->items + i;
packet_buf_write(&req_buf, "deepen-not %s", s->string);
}
}
if (server_supports_filtering && args->filter_options.choice)
packet_buf_write(&req_buf, "filter %s",
args->filter_options.filter_spec);
packet_buf_flush(&req_buf);
state_len = req_buf.len;
if (args->deepen) {
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
char *line;
const char *arg;
struct object_id oid;
send_request(args, fd[1], &req_buf);
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
while ((line = packet_read_line(fd[0], NULL))) {
if (skip_prefix(line, "shallow ", &arg)) {
if (get_oid_hex(arg, &oid))
die(_("invalid shallow line: %s"), line);
register_shallow(the_repository, &oid);
continue;
}
if (skip_prefix(line, "unshallow ", &arg)) {
if (get_oid_hex(arg, &oid))
die(_("invalid unshallow line: %s"), line);
if (!lookup_object(the_repository, oid.hash))
die(_("object not found: %s"), line);
/* make sure that it is parsed as shallow */
if (!parse_object(the_repository, &oid))
die(_("error in object: %s"), line);
if (unregister_shallow(&oid))
die(_("no shallow found: %s"), line);
continue;
}
die(_("expected shallow/unshallow, got %s"), line);
}
} else if (!args->stateless_rpc)
send_request(args, fd[1], &req_buf);
if (!args->stateless_rpc) {
/* If we aren't using the stateless-rpc interface
* we don't need to retain the headers.
*/
strbuf_setlen(&req_buf, 0);
state_len = 0;
}
flushes = 0;
retval = -1;
if (args->no_dependents)
goto done;
while ((oid = negotiator->next(negotiator))) {
packet_buf_write(&req_buf, "have %s\n", oid_to_hex(oid));
print_verbose(args, "have %s", oid_to_hex(oid));
in_vain++;
if (flush_at <= ++count) {
int ack;
packet_buf_flush(&req_buf);
send_request(args, fd[1], &req_buf);
strbuf_setlen(&req_buf, state_len);
flushes++;
flush_at = next_flush(args->stateless_rpc, count);
/*
* We keep one window "ahead" of the other side, and
* will wait for an ACK only on the next one
*/
if (!args->stateless_rpc && count == INITIAL_FLUSH)
continue;
consume_shallow_list(args, fd[0]);
do {
ack = get_ack(fd[0], result_oid);
if (ack)
print_verbose(args, _("got %s %d %s"), "ack",
ack, oid_to_hex(result_oid));
switch (ack) {
case ACK:
flushes = 0;
multi_ack = 0;
retval = 0;
goto done;
case ACK_common:
case ACK_ready:
case ACK_continue: {
struct commit *commit =
lookup_commit(the_repository,
result_oid);
int was_common;
if (!commit)
die(_("invalid commit %s"), oid_to_hex(result_oid));
was_common = negotiator->ack(negotiator, commit);
if (args->stateless_rpc
&& ack == ACK_common
&& !was_common) {
/* We need to replay the have for this object
* on the next RPC request so the peer knows
* it is in common with us.
*/
const char *hex = oid_to_hex(result_oid);
packet_buf_write(&req_buf, "have %s\n", hex);
state_len = req_buf.len;
/*
* Reset in_vain because an ack
* for this commit has not been
* seen.
*/
in_vain = 0;
} else if (!args->stateless_rpc
|| ack != ACK_common)
in_vain = 0;
retval = 0;
got_continue = 1;
if (ack == ACK_ready)
got_ready = 1;
break;
}
}
} while (ack);
flushes--;
if (got_continue && MAX_IN_VAIN < in_vain) {
print_verbose(args, _("giving up"));
break; /* give up */
}
if (got_ready)
break;
}
}
done:
if (!got_ready || !no_done) {
packet_buf_write(&req_buf, "done\n");
send_request(args, fd[1], &req_buf);
}
print_verbose(args, _("done"));
if (retval != 0) {
multi_ack = 0;
flushes++;
}
strbuf_release(&req_buf);
if (!got_ready || !no_done)
consume_shallow_list(args, fd[0]);
while (flushes || multi_ack) {
int ack = get_ack(fd[0], result_oid);
if (ack) {
print_verbose(args, _("got %s (%d) %s"), "ack",
ack, oid_to_hex(result_oid));
if (ack == ACK)
return 0;
multi_ack = 1;
continue;
}
flushes--;
}
/* it is no error to fetch into a completely empty repo */
return count ? retval : 0;
}
static struct commit_list *complete;
static int mark_complete(const struct object_id *oid)
{
struct object *o = parse_object(the_repository, oid);
while (o && o->type == OBJ_TAG) {
struct tag *t = (struct tag *) o;
if (!t->tagged)
break; /* broken repository */
o->flags |= COMPLETE;
o = parse_object(the_repository, &t->tagged->oid);
}
if (o && o->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *)o;
if (!(commit->object.flags & COMPLETE)) {
commit->object.flags |= COMPLETE;
commit_list_insert(commit, &complete);
}
}
return 0;
}
static int mark_complete_oid(const char *refname, const struct object_id *oid,
int flag, void *cb_data)
{
return mark_complete(oid);
}
static void mark_recent_complete_commits(struct fetch_pack_args *args,
timestamp_t cutoff)
{
while (complete && cutoff <= complete->item->date) {
print_verbose(args, _("Marking %s as complete"),
oid_to_hex(&complete->item->object.oid));
pop_most_recent_commit(&complete, COMPLETE);
}
}
static void add_refs_to_oidset(struct oidset *oids, struct ref *refs)
{
for (; refs; refs = refs->next)
oidset_insert(oids, &refs->old_oid);
}
static int tip_oids_contain(struct oidset *tip_oids,
struct ref *unmatched, struct ref *newlist,
const struct object_id *id)
{
/*
* Note that this only looks at the ref lists the first time it's
* called. This works out in filter_refs() because even though it may
* add to "newlist" between calls, the additions will always be for
* oids that are already in the set.
*/
if (!tip_oids->map.map.tablesize) {
add_refs_to_oidset(tip_oids, unmatched);
add_refs_to_oidset(tip_oids, newlist);
}
return oidset_contains(tip_oids, id);
}
static void filter_refs(struct fetch_pack_args *args,
struct ref **refs,
struct ref **sought, int nr_sought)
{
struct ref *newlist = NULL;
struct ref **newtail = &newlist;
struct ref *unmatched = NULL;
struct ref *ref, *next;
struct oidset tip_oids = OIDSET_INIT;
int i;
i = 0;
for (ref = *refs; ref; ref = next) {
int keep = 0;
next = ref->next;
if (starts_with(ref->name, "refs/") &&
fetch-pack: do not filter out one-level refs Currently fetching a one-level ref like "refs/foo" does not work consistently. The outer "git fetch" program filters the list of refs, checking each against check_refname_format. Then it feeds the result to do_fetch_pack to actually negotiate the haves/wants and get the pack. The fetch-pack code does its own filter, and it behaves differently. The fetch-pack filter looks for refs in "refs/", and then feeds everything _after_ the slash (i.e., just "foo") into check_refname_format. But check_refname_format is not designed to look at a partial refname. It complains that the ref has only one component, thinking it is at the root (i.e., alongside "HEAD"), when in reality we just fed it a partial refname. As a result, we omit a ref like "refs/foo" from the pack request, even though "git fetch" then tries to store the resulting ref. If we happen to get the object anyway (e.g., because the ref is contained in another ref we are fetching), then the fetch succeeds. But if it is a unique object, we fail when trying to update "refs/foo". We can fix this by just passing the whole refname into check_refname_format; we know the part we were omitting is "refs/", which is acceptable in a refname. This at least makes the checks consistent with each other. This problem happens most commonly with "refs/stash", which is the only one-level ref in wide use. However, our test does not use "refs/stash", as we may later want to restrict it specifically (not because it is one-level, but because of the semantics of stashes). We may also want to do away with the multiple levels of filtering (which can cause problems when they are out of sync), or even forbid one-level refs entirely. However, those decisions can come later; this fixes the most immediate problem, which is the mismatch between the two. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
check_refname_format(ref->name, 0))
; /* trash */
else {
while (i < nr_sought) {
int cmp = strcmp(ref->name, sought[i]->name);
if (cmp < 0)
break; /* definitely do not have it */
else if (cmp == 0) {
keep = 1; /* definitely have it */
sought[i]->match_status = REF_MATCHED;
}
i++;
}
fetch-pack: don't try to fetch peel values with --all When "fetch-pack --all" sees a tag-to-blob on the remote, it tries to fetch both the tag itself ("refs/tags/foo") and the peeled value that the remote advertises ("refs/tags/foo^{}"). Asking for the object pointed to by the latter can cause upload-pack to complain with "not our ref", since it does not mark the peeled objects with the OUR_REF (unless they were at the tip of some other ref). Arguably upload-pack _should_ be marking those peeled objects. But it never has in the past, since clients would generally just ask for the tag and expect to get the peeled value along with it. And that's how "git fetch" works, as well as older versions of "fetch-pack --all". The problem was introduced by 5f0fc64513 (fetch-pack: eliminate spurious error messages, 2012-09-09). Before then, the matching logic was something like: if (refname is ill-formed) do nothing else if (doing --all) always consider it matched else look through list of sought refs for a match That commit wanted to flip the order of the second two arms of that conditional. But we ended up with: if (refname is ill-formed) do nothing else look through list of sought refs for a match if (--all and no match so far) always consider it matched That means tha an ill-formed ref will trigger the --all conditional block, even though we should just be ignoring it. We can fix that by having a single "else" with all of the well-formed logic, that checks the sought refs and "--all" in the correct order. Reported-by: Kirill Smelkov <kirr@nexedi.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
if (!keep && args->fetch_all &&
(!args->deepen || !starts_with(ref->name, "refs/tags/")))
keep = 1;
}
if (keep) {
*newtail = ref;
ref->next = NULL;
newtail = &ref->next;
} else {
ref->next = unmatched;
unmatched = ref;
}
}
/* Append unmatched requests to the list */
for (i = 0; i < nr_sought; i++) {
struct object_id oid;
const char *p;
ref = sought[i];
if (ref->match_status != REF_NOT_MATCHED)
continue;
if (parse_oid_hex(ref->name, &oid, &p) ||
*p != '\0' ||
!oideq(&oid, &ref->old_oid))
continue;
if ((allow_unadvertised_object_request &
(ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1)) ||
tip_oids_contain(&tip_oids, unmatched, newlist,
&ref->old_oid)) {
ref->match_status = REF_MATCHED;
filter_ref: make a copy of extra "sought" entries If the server supports allow_tip_sha1_in_want, we add any unmatched raw-sha1 entries in our "sought" list of refs to the list of refs we will ask the other side for. We do so by inserting the original "struct ref" directly into our list, rather than making a copy. This has several problems. The most minor problem is that one cannot ever free the resulting list; it contains structs that are copies of the remote refs (made earlier by fetch_pack) along with sought refs that are referenced elsewhere. But more importantly that we set the ref->next pointer to NULL, chopping off the remainder of any existing list that the ref was a part of. We get the set of "sought" refs in an array rather than a linked list, but that array is often in turn generated from a list. The test modification in t5516 demonstrates this. Rather than fetching just an exact sha1, we fetch that sha1 plus another ref: - we build a linked list of refs to fetch when do_fetch calls get_ref_map; the exact sha1 is first, followed by the named ref ("refs/heads/extra" in this case). - we pass that linked list to transport_fetch_ref, which squashes it into an array of pointers - that array goes to fetch_pack, which calls filter_ref. There we generate the want list from a mix of what the remote side has advertised, and the "sought" entry for the exact sha1. We set the sought entry's "next" pointer to NULL. - after we return from transport_fetch_refs, we then try to update the refs by following the linked list. But our list is now truncated, and we do not update refs/heads/extra at all. We can fix this by making a copy of the ref. There's nothing that fetch_pack does to it that must be reflected in the original "sought" list (and indeed, if that were the case we would have a serious bug, because it is only exact-sha1 entries which are treated this way). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
*newtail = copy_ref(ref);
newtail = &(*newtail)->next;
} else {
ref->match_status = REF_UNADVERTISED_NOT_ALLOWED;
}
}
oidset_clear(&tip_oids);
for (ref = unmatched; ref; ref = next) {
next = ref->next;
free(ref);
}
*refs = newlist;
}
static void mark_alternate_complete(struct fetch_negotiator *unused,
struct object *obj)
{
mark_complete(&obj->oid);
}
struct loose_object_iter {
struct oidset *loose_object_set;
struct ref *refs;
};
/*
* If the number of refs is not larger than the number of loose objects,
* this function stops inserting.
*/
static int add_loose_objects_to_set(const struct object_id *oid,
const char *path,
void *data)
{
struct loose_object_iter *iter = data;
oidset_insert(iter->loose_object_set, oid);
if (iter->refs == NULL)
return 1;
iter->refs = iter->refs->next;
return 0;
}
/*
* Mark recent commits available locally and reachable from a local ref as
* COMPLETE. If args->no_dependents is false, also mark COMPLETE remote refs as
* COMMON_REF (otherwise, we are not planning to participate in negotiation, and
* thus do not need COMMON_REF marks).
*
* The cutoff time for recency is determined by this heuristic: it is the
* earliest commit time of the objects in refs that are commits and that we know
* the commit time of.
*/
static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
struct fetch_pack_args *args,
struct ref **refs)
{
struct ref *ref;
int old_save_commit_buffer = save_commit_buffer;
timestamp_t cutoff = 0;
struct oidset loose_oid_set = OIDSET_INIT;
int use_oidset = 0;
struct loose_object_iter iter = {&loose_oid_set, *refs};
/* Enumerate all loose objects or know refs are not so many. */
use_oidset = !for_each_loose_object(add_loose_objects_to_set,
&iter, 0);
save_commit_buffer = 0;
for (ref = *refs; ref; ref = ref->next) {
struct object *o;
unsigned int flags = OBJECT_INFO_QUICK;
if (use_oidset &&
!oidset_contains(&loose_oid_set, &ref->old_oid)) {
/*
* I know this does not exist in the loose form,
* so check if it exists in a non-loose form.
*/
flags |= OBJECT_INFO_IGNORE_LOOSE;
}
if (!has_object_file_with_flags(&ref->old_oid, flags))
continue;
o = parse_object(the_repository, &ref->old_oid);
if (!o)
continue;
/* We already have it -- which may mean that we were
* in sync with the other side at some time after
* that (it is OK if we guess wrong here).
*/
if (o->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *)o;
if (!cutoff || cutoff < commit->date)
cutoff = commit->date;
}
}
oidset_clear(&loose_oid_set);
if (!args->no_dependents) {
if (!args->deepen) {
for_each_ref(mark_complete_oid, NULL);
for_each_cached_alternate(NULL, mark_alternate_complete);
commit_list_sort_by_date(&complete);
if (cutoff)
mark_recent_complete_commits(args, cutoff);
}
/*
* Mark all complete remote refs as common refs.
* Don't mark them common yet; the server has to be told so first.
*/
for (ref = *refs; ref; ref = ref->next) {
struct object *o = deref_tag(the_repository,
lookup_object(the_repository,
ref->old_oid.hash),
NULL, 0);
if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
continue;
negotiator->known_common(negotiator,
(struct commit *)o);
}
}
save_commit_buffer = old_save_commit_buffer;
}
/*
* Returns 1 if every object pointed to by the given remote refs is available
* locally and reachable from a local ref, and 0 otherwise.
*/
static int everything_local(struct fetch_pack_args *args,
struct ref **refs)
{
struct ref *ref;
int retval;
for (retval = 1, ref = *refs; ref ; ref = ref->next) {
const struct object_id *remote = &ref->old_oid;
struct object *o;
o = lookup_object(the_repository, remote->hash);
if (!o || !(o->flags & COMPLETE)) {
retval = 0;
print_verbose(args, "want %s (%s)", oid_to_hex(remote),
ref->name);
continue;
}
print_verbose(args, _("already have %s (%s)"), oid_to_hex(remote),
ref->name);
}
return retval;
}
static int sideband_demux(int in, int out, void *data)
{
int *xd = data;
int ret;
ret = recv_sideband("fetch-pack", xd[0], out);
close(out);
return ret;
}
static int get_pack(struct fetch_pack_args *args,
int xd[2], char **pack_lockfile)
{
struct async demux;
int do_keep = args->keep_pack;
const char *cmd_name;
struct pack_header header;
int pass_header = 0;
struct child_process cmd = CHILD_PROCESS_INIT;
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
int ret;
memset(&demux, 0, sizeof(demux));
if (use_sideband) {
/* xd[] is talking with upload-pack; subprocess reads from
* xd[0], spits out band#2 to stderr, and feeds us band#1
* through demux->out.
*/
demux.proc = sideband_demux;
demux.data = xd;
demux.out = -1;
demux.isolate_sigpipe = 1;
if (start_async(&demux))
die(_("fetch-pack: unable to fork off sideband demultiplexer"));
}
else
demux.out = xd[0];
if (!args->keep_pack && unpack_limit) {
if (read_pack_header(demux.out, &header))
die(_("protocol error: bad pack header"));
pass_header = 1;
if (ntohl(header.hdr_entries) < unpack_limit)
do_keep = 0;
else
do_keep = 1;
}
if (alternate_shallow_file) {
argv_array_push(&cmd.args, "--shallow-file");
argv_array_push(&cmd.args, alternate_shallow_file);
}
if (do_keep || args->from_promisor) {
if (pack_lockfile)
cmd.out = -1;
cmd_name = "index-pack";
argv_array_push(&cmd.args, cmd_name);
argv_array_push(&cmd.args, "--stdin");
if (!args->quiet && !args->no_progress)
argv_array_push(&cmd.args, "-v");
if (args->use_thin_pack)
argv_array_push(&cmd.args, "--fix-thin");
if (do_keep && (args->lock_pack || unpack_limit)) {
char hostname[HOST_NAME_MAX + 1];
if (xgethostname(hostname, sizeof(hostname)))
xsnprintf(hostname, sizeof(hostname), "localhost");
argv_array_pushf(&cmd.args,
"--keep=fetch-pack %"PRIuMAX " on %s",
(uintmax_t)getpid(), hostname);
}
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
if (args->check_self_contained_and_connected)
argv_array_push(&cmd.args, "--check-self-contained-and-connected");
if (args->from_promisor)
argv_array_push(&cmd.args, "--promisor");
}
else {
cmd_name = "unpack-objects";
argv_array_push(&cmd.args, cmd_name);
if (args->quiet || args->no_progress)
argv_array_push(&cmd.args, "-q");
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
args->check_self_contained_and_connected = 0;
}
if (pass_header)
argv_array_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32,
ntohl(header.hdr_version),
ntohl(header.hdr_entries));
if (fetch_fsck_objects >= 0
? fetch_fsck_objects
: transfer_fsck_objects >= 0
? transfer_fsck_objects
: 0) {
if (args->from_promisor)
/*
* We cannot use --strict in index-pack because it
* checks both broken objects and links, but we only
* want to check for broken objects.
*/
argv_array_push(&cmd.args, "--fsck-objects");
else
fetch: implement fetch.fsck.* Implement support for fetch.fsck.* corresponding with the existing receive.fsck.*. This allows for pedantically cloning repositories with specific issues without turning off fetch.fsckObjects. One such repository is https://github.com/robbyrussell/oh-my-zsh.git which before this change will emit this error when cloned with fetch.fsckObjects: error: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes fatal: Error in object fatal: index-pack failed Now with fetch.fsck.zeroPaddedFilemode=warn we'll warn about that issue, but the clone will succeed: warning: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes warning: object a18c4d13c2a5fa2d4ecd5346c50e119b999b807d: zeroPaddedFilemode: contains zero-padded file modes warning: object 84df066176c8da3fd59b13731a86d90f4f1e5c9d: zeroPaddedFilemode: contains zero-padded file modes The motivation for this is to be able to turn on fetch.fsckObjects globally across a fleet of computers but still be able to manually clone various legacy repositories by either white-listing specific issues, or better yet whitelist specific objects. The use of --git-dir=* instead of -C in the tests could be considered somewhat archaic, but the tests I'm adding here are duplicating the corresponding receive.* tests with as few changes as possible. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
argv_array_pushf(&cmd.args, "--strict%s",
fsck_msg_types.buf);
}
cmd.in = demux.out;
cmd.git_cmd = 1;
if (start_command(&cmd))
die(_("fetch-pack: unable to fork off %s"), cmd_name);
if (do_keep && pack_lockfile) {
*pack_lockfile = index_pack_lockfile(cmd.out);
close(cmd.out);
}
if (!use_sideband)
/* Closed by start_command() */
xd[0] = -1;
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
ret = finish_command(&cmd);
if (!ret || (args->check_self_contained_and_connected && ret == 1))
args->self_contained_and_connected =
args->check_self_contained_and_connected &&
ret == 0;
else
die(_("%s failed"), cmd_name);
if (use_sideband && finish_async(&demux))
die(_("error in sideband demultiplexer"));
return 0;
}
static int cmp_ref_by_name(const void *a_, const void *b_)
{
const struct ref *a = *((const struct ref **)a_);
const struct ref *b = *((const struct ref **)b_);
return strcmp(a->name, b->name);
}
static struct ref *do_fetch_pack(struct fetch_pack_args *args,
int fd[2],
const struct ref *orig_ref,
struct ref **sought, int nr_sought,
struct shallow_info *si,
char **pack_lockfile)
{
struct ref *ref = copy_ref_list(orig_ref);
struct object_id oid;
const char *agent_feature;
int agent_len;
struct fetch_negotiator negotiator;
fetch_negotiator_init(&negotiator, negotiation_algorithm);
sort_ref_list(&ref, ref_compare_name);
QSORT(sought, nr_sought, cmp_ref_by_name);
if ((args->depth > 0 || is_repository_shallow(the_repository)) && !server_supports("shallow"))
die(_("Server does not support shallow clients"));
if (args->depth > 0 || args->deepen_since || args->deepen_not)
args->deepen = 1;
if (server_supports("multi_ack_detailed")) {
print_verbose(args, _("Server supports multi_ack_detailed"));
multi_ack = 2;
if (server_supports("no-done")) {
print_verbose(args, _("Server supports no-done"));
if (args->stateless_rpc)
no_done = 1;
}
}
else if (server_supports("multi_ack")) {
print_verbose(args, _("Server supports multi_ack"));
multi_ack = 1;
}
if (server_supports("side-band-64k")) {
print_verbose(args, _("Server supports side-band-64k"));
use_sideband = 2;
}
else if (server_supports("side-band")) {
print_verbose(args, _("Server supports side-band"));
use_sideband = 1;
}
if (server_supports("allow-tip-sha1-in-want")) {
print_verbose(args, _("Server supports allow-tip-sha1-in-want"));
allow_unadvertised_object_request |= ALLOW_TIP_SHA1;
}
if (server_supports("allow-reachable-sha1-in-want")) {
print_verbose(args, _("Server supports allow-reachable-sha1-in-want"));
allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
}
if (!server_supports("thin-pack"))
args->use_thin_pack = 0;
if (!server_supports("no-progress"))
args->no_progress = 0;
if (!server_supports("include-tag"))
args->include_tag = 0;
if (server_supports("ofs-delta"))
print_verbose(args, _("Server supports ofs-delta"));
else
prefer_ofs_delta = 0;
if (server_supports("filter")) {
server_supports_filtering = 1;
print_verbose(args, _("Server supports filter"));
} else if (args->filter_options.choice) {
warning("filtering not recognized by server, ignoring");
}
if ((agent_feature = server_feature_value("agent", &agent_len))) {
agent_supported = 1;
if (agent_len)
print_verbose(args, _("Server version is %.*s"),
agent_len, agent_feature);
}
if (server_supports("deepen-since"))
deepen_since_ok = 1;
else if (args->deepen_since)
die(_("Server does not support --shallow-since"));
if (server_supports("deepen-not"))
deepen_not_ok = 1;
else if (args->deepen_not)
die(_("Server does not support --shallow-exclude"));
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
if (!server_supports("deepen-relative") && args->deepen_relative)
die(_("Server does not support --deepen"));
mark_complete_and_common_ref(&negotiator, args, &ref);
filter_refs(args, &ref, sought, nr_sought);
if (everything_local(args, &ref)) {
packet_flush(fd[1]);
goto all_done;
}
if (find_common(&negotiator, args, fd, &oid, ref) < 0)
if (!args->keep_pack)
/* When cloning, it is not unusual to have
* no common commit.
*/
warning(_("no common commits"));
if (args->stateless_rpc)
packet_flush(fd[1]);
if (args->deepen)
setup_alternate_shallow(&shallow_lock, &alternate_shallow_file,
NULL);
else if (si->nr_ours || si->nr_theirs)
alternate_shallow_file = setup_temporary_shallow(si->shallow);
else
alternate_shallow_file = NULL;
if (get_pack(args, fd, pack_lockfile))
die(_("git fetch-pack: fetch failed."));
all_done:
negotiator.release(&negotiator);
return ref;
}
static void add_shallow_requests(struct strbuf *req_buf,
const struct fetch_pack_args *args)
{
if (is_repository_shallow(the_repository))
write_shallow_commits(req_buf, 1, NULL);
if (args->depth > 0)
packet_buf_write(req_buf, "deepen %d", args->depth);
if (args->deepen_since) {
timestamp_t max_age = approxidate(args->deepen_since);
packet_buf_write(req_buf, "deepen-since %"PRItime, max_age);
}
if (args->deepen_not) {
int i;
for (i = 0; i < args->deepen_not->nr; i++) {
struct string_list_item *s = args->deepen_not->items + i;
packet_buf_write(req_buf, "deepen-not %s", s->string);
}
}
}
static void add_wants(const struct ref *wants, struct strbuf *req_buf)
{
int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0);
for ( ; wants ; wants = wants->next) {
const struct object_id *remote = &wants->old_oid;
struct object *o;
/*
* If that object is complete (i.e. it is an ancestor of a
* local ref), we tell them we have it but do not have to
* tell them about its ancestors, which they already know
* about.
*
* We use lookup_object here because we are only
* interested in the case we *know* the object is
* reachable and we have already scanned it.
*/
if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
(o->flags & COMPLETE)) {
continue;
}
if (!use_ref_in_want || wants->exact_oid)
packet_buf_write(req_buf, "want %s\n", oid_to_hex(remote));
else
packet_buf_write(req_buf, "want-ref %s\n", wants->name);
}
}
static void add_common(struct strbuf *req_buf, struct oidset *common)
{
struct oidset_iter iter;
const struct object_id *oid;
oidset_iter_init(common, &iter);
while ((oid = oidset_iter_next(&iter))) {
packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
}
}
static int add_haves(struct fetch_negotiator *negotiator,
struct strbuf *req_buf,
int *haves_to_send, int *in_vain)
{
int ret = 0;
int haves_added = 0;
const struct object_id *oid;
while ((oid = negotiator->next(negotiator))) {
packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid));
if (++haves_added >= *haves_to_send)
break;
}
*in_vain += haves_added;
if (!haves_added || *in_vain >= MAX_IN_VAIN) {
/* Send Done */
packet_buf_write(req_buf, "done\n");
ret = 1;
}
/* Increase haves to send on next round */
*haves_to_send = next_flush(1, *haves_to_send);
return ret;
}
static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
const struct fetch_pack_args *args,
const struct ref *wants, struct oidset *common,
int *haves_to_send, int *in_vain)
{
int ret = 0;
struct strbuf req_buf = STRBUF_INIT;
if (server_supports_v2("fetch", 1))
packet_buf_write(&req_buf, "command=fetch");
if (server_supports_v2("agent", 0))
packet_buf_write(&req_buf, "agent=%s", git_user_agent_sanitized());
if (args->server_options && args->server_options->nr &&
server_supports_v2("server-option", 1)) {
int i;
for (i = 0; i < args->server_options->nr; i++)
packet_write_fmt(fd_out, "server-option=%s",
args->server_options->items[i].string);
}
packet_buf_delim(&req_buf);
if (args->use_thin_pack)
packet_buf_write(&req_buf, "thin-pack");
if (args->no_progress)
packet_buf_write(&req_buf, "no-progress");
if (args->include_tag)
packet_buf_write(&req_buf, "include-tag");
if (prefer_ofs_delta)
packet_buf_write(&req_buf, "ofs-delta");
/* Add shallow-info and deepen request */
if (server_supports_feature("fetch", "shallow", 0))
add_shallow_requests(&req_buf, args);
else if (is_repository_shallow(the_repository) || args->deepen)
die(_("Server does not support shallow requests"));
/* Add filter */
if (server_supports_feature("fetch", "filter", 0) &&
args->filter_options.choice) {
print_verbose(args, _("Server supports filter"));
packet_buf_write(&req_buf, "filter %s",
args->filter_options.filter_spec);
} else if (args->filter_options.choice) {
warning("filtering not recognized by server, ignoring");
}
/* add wants */
add_wants(wants, &req_buf);
if (args->no_dependents) {
packet_buf_write(&req_buf, "done");
ret = 1;
} else {
/* Add all of the common commits we've found in previous rounds */
add_common(&req_buf, common);
/* Add initial haves */
ret = add_haves(negotiator, &req_buf, haves_to_send, in_vain);
}
/* Send request */
packet_buf_flush(&req_buf);
write_or_die(fd_out, req_buf.buf, req_buf.len);
strbuf_release(&req_buf);
return ret;
}
/*
* Processes a section header in a server's response and checks if it matches
* `section`. If the value of `peek` is 1, the header line will be peeked (and
* not consumed); if 0, the line will be consumed and the function will die if
* the section header doesn't match what was expected.
*/
static int process_section_header(struct packet_reader *reader,
const char *section, int peek)
{
int ret;
if (packet_reader_peek(reader) != PACKET_READ_NORMAL)
die(_("error reading section header '%s'"), section);
ret = !strcmp(reader->line, section);
if (!peek) {
if (!ret)
die(_("expected '%s', received '%s'"),
section, reader->line);
packet_reader_read(reader);
}
return ret;
}
static int process_acks(struct fetch_negotiator *negotiator,
struct packet_reader *reader,
struct oidset *common)
{
/* received */
int received_ready = 0;
int received_ack = 0;
process_section_header(reader, "acknowledgments", 0);
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
const char *arg;
if (!strcmp(reader->line, "NAK"))
continue;
if (skip_prefix(reader->line, "ACK ", &arg)) {
struct object_id oid;
if (!get_oid_hex(arg, &oid)) {
struct commit *commit;
oidset_insert(common, &oid);
commit = lookup_commit(the_repository, &oid);
negotiator->ack(negotiator, commit);
}
continue;
}
if (!strcmp(reader->line, "ready")) {
received_ready = 1;
continue;
}
die(_("unexpected acknowledgment line: '%s'"), reader->line);
}
if (reader->status != PACKET_READ_FLUSH &&
reader->status != PACKET_READ_DELIM)
die(_("error processing acks: %d"), reader->status);
/* return 0 if no common, 1 if there are common, or 2 if ready */
return received_ready ? 2 : (received_ack ? 1 : 0);
}
static void receive_shallow_info(struct fetch_pack_args *args,
struct packet_reader *reader)
{
process_section_header(reader, "shallow-info", 0);
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
const char *arg;
struct object_id oid;
if (skip_prefix(reader->line, "shallow ", &arg)) {
if (get_oid_hex(arg, &oid))
die(_("invalid shallow line: %s"), reader->line);
register_shallow(the_repository, &oid);
continue;
}
if (skip_prefix(reader->line, "unshallow ", &arg)) {
if (get_oid_hex(arg, &oid))
die(_("invalid unshallow line: %s"), reader->line);
if (!lookup_object(the_repository, oid.hash))
die(_("object not found: %s"), reader->line);
/* make sure that it is parsed as shallow */
if (!parse_object(the_repository, &oid))
die(_("error in object: %s"), reader->line);
if (unregister_shallow(&oid))
die(_("no shallow found: %s"), reader->line);
continue;
}
die(_("expected shallow/unshallow, got %s"), reader->line);
}
if (reader->status != PACKET_READ_FLUSH &&
reader->status != PACKET_READ_DELIM)
die(_("error processing shallow info: %d"), reader->status);
setup_alternate_shallow(&shallow_lock, &alternate_shallow_file, NULL);
args->deepen = 1;
}
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
static void receive_wanted_refs(struct packet_reader *reader,
struct ref **sought, int nr_sought)
{
process_section_header(reader, "wanted-refs", 0);
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
struct object_id oid;
const char *end;
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
int i;
if (parse_oid_hex(reader->line, &oid, &end) || *end++ != ' ')
die(_("expected wanted-ref, got '%s'"), reader->line);
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
for (i = 0; i < nr_sought; i++) {
if (!strcmp(end, sought[i]->name)) {
oidcpy(&sought[i]->old_oid, &oid);
break;
}
}
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
if (i == nr_sought)
die(_("unexpected wanted-ref: '%s'"), reader->line);
}
if (reader->status != PACKET_READ_DELIM)
die(_("error processing wanted refs: %d"), reader->status);
}
enum fetch_state {
FETCH_CHECK_LOCAL = 0,
FETCH_SEND_REQUEST,
FETCH_PROCESS_ACKS,
FETCH_GET_PACK,
FETCH_DONE,
};
static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
int fd[2],
const struct ref *orig_ref,
struct ref **sought, int nr_sought,
char **pack_lockfile)
{
struct ref *ref = copy_ref_list(orig_ref);
enum fetch_state state = FETCH_CHECK_LOCAL;
struct oidset common = OIDSET_INIT;
struct packet_reader reader;
int in_vain = 0;
int haves_to_send = INITIAL_FLUSH;
struct fetch_negotiator negotiator;
fetch_negotiator_init(&negotiator, negotiation_algorithm);
packet_reader_init(&reader, fd[0], NULL, 0,
PACKET_READ_CHOMP_NEWLINE);
while (state != FETCH_DONE) {
switch (state) {
case FETCH_CHECK_LOCAL:
sort_ref_list(&ref, ref_compare_name);
QSORT(sought, nr_sought, cmp_ref_by_name);
/* v2 supports these by default */
allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
use_sideband = 2;
if (args->depth > 0 || args->deepen_since || args->deepen_not)
args->deepen = 1;
/* Filter 'ref' by 'sought' and those that aren't local */
mark_complete_and_common_ref(&negotiator, args, &ref);
filter_refs(args, &ref, sought, nr_sought);
if (everything_local(args, &ref))
state = FETCH_DONE;
else
state = FETCH_SEND_REQUEST;
mark_tips(&negotiator, args->negotiation_tips);
for_each_cached_alternate(&negotiator,
insert_one_alternate_object);
break;
case FETCH_SEND_REQUEST:
if (send_fetch_request(&negotiator, fd[1], args, ref,
&common,
&haves_to_send, &in_vain))
state = FETCH_GET_PACK;
else
state = FETCH_PROCESS_ACKS;
break;
case FETCH_PROCESS_ACKS:
/* Process ACKs/NAKs */
switch (process_acks(&negotiator, &reader, &common)) {
case 2:
state = FETCH_GET_PACK;
break;
case 1:
in_vain = 0;
/* fallthrough */
default:
state = FETCH_SEND_REQUEST;
break;
}
break;
case FETCH_GET_PACK:
/* Check for shallow-info section */
if (process_section_header(&reader, "shallow-info", 1))
receive_shallow_info(args, &reader);
if (process_section_header(&reader, "wanted-refs", 1))
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
receive_wanted_refs(&reader, sought, nr_sought);
/* get the pack */
process_section_header(&reader, "packfile", 0);
if (get_pack(args, fd, pack_lockfile))
die(_("git fetch-pack: fetch failed."));
state = FETCH_DONE;
break;
case FETCH_DONE:
continue;
}
}
negotiator.release(&negotiator);
oidset_clear(&common);
return ref;
}
fetch: implement fetch.fsck.* Implement support for fetch.fsck.* corresponding with the existing receive.fsck.*. This allows for pedantically cloning repositories with specific issues without turning off fetch.fsckObjects. One such repository is https://github.com/robbyrussell/oh-my-zsh.git which before this change will emit this error when cloned with fetch.fsckObjects: error: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes fatal: Error in object fatal: index-pack failed Now with fetch.fsck.zeroPaddedFilemode=warn we'll warn about that issue, but the clone will succeed: warning: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes warning: object a18c4d13c2a5fa2d4ecd5346c50e119b999b807d: zeroPaddedFilemode: contains zero-padded file modes warning: object 84df066176c8da3fd59b13731a86d90f4f1e5c9d: zeroPaddedFilemode: contains zero-padded file modes The motivation for this is to be able to turn on fetch.fsckObjects globally across a fleet of computers but still be able to manually clone various legacy repositories by either white-listing specific issues, or better yet whitelist specific objects. The use of --git-dir=* instead of -C in the tests could be considered somewhat archaic, but the tests I'm adding here are duplicating the corresponding receive.* tests with as few changes as possible. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
static int fetch_pack_config_cb(const char *var, const char *value, void *cb)
{
if (strcmp(var, "fetch.fsck.skiplist") == 0) {
const char *path;
if (git_config_pathname(&path, var, value))
return 1;
strbuf_addf(&fsck_msg_types, "%cskiplist=%s",
fsck_msg_types.len ? ',' : '=', path);
free((char *)path);
return 0;
}
if (skip_prefix(var, "fetch.fsck.", &var)) {
if (is_valid_msg_type(var, value))
strbuf_addf(&fsck_msg_types, "%c%s=%s",
fsck_msg_types.len ? ',' : '=', var, value);
else
warning("Skipping unknown msg id '%s'", var);
return 0;
}
return git_default_config(var, value, cb);
}
static void fetch_pack_config(void)
{
git_config_get_int("fetch.unpacklimit", &fetch_unpack_limit);
git_config_get_int("transfer.unpacklimit", &transfer_unpack_limit);
git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
git_config_get_string("fetch.negotiationalgorithm",
&negotiation_algorithm);
fetch: implement fetch.fsck.* Implement support for fetch.fsck.* corresponding with the existing receive.fsck.*. This allows for pedantically cloning repositories with specific issues without turning off fetch.fsckObjects. One such repository is https://github.com/robbyrussell/oh-my-zsh.git which before this change will emit this error when cloned with fetch.fsckObjects: error: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes fatal: Error in object fatal: index-pack failed Now with fetch.fsck.zeroPaddedFilemode=warn we'll warn about that issue, but the clone will succeed: warning: object 2b7227859263b6aabcc28355b0b994995b7148b6: zeroPaddedFilemode: contains zero-padded file modes warning: object a18c4d13c2a5fa2d4ecd5346c50e119b999b807d: zeroPaddedFilemode: contains zero-padded file modes warning: object 84df066176c8da3fd59b13731a86d90f4f1e5c9d: zeroPaddedFilemode: contains zero-padded file modes The motivation for this is to be able to turn on fetch.fsckObjects globally across a fleet of computers but still be able to manually clone various legacy repositories by either white-listing specific issues, or better yet whitelist specific objects. The use of --git-dir=* instead of -C in the tests could be considered somewhat archaic, but the tests I'm adding here are duplicating the corresponding receive.* tests with as few changes as possible. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
git_config(fetch_pack_config_cb, NULL);
}
static void fetch_pack_setup(void)
{
static int did_setup;
if (did_setup)
return;
fetch_pack_config();
if (0 <= transfer_unpack_limit)
unpack_limit = transfer_unpack_limit;
else if (0 <= fetch_unpack_limit)
unpack_limit = fetch_unpack_limit;
did_setup = 1;
}
static int remove_duplicates_in_refs(struct ref **ref, int nr)
{
struct string_list names = STRING_LIST_INIT_NODUP;
int src, dst;
for (src = dst = 0; src < nr; src++) {
struct string_list_item *item;
item = string_list_insert(&names, ref[src]->name);
if (item->util)
continue; /* already have it */
item->util = ref[src];
if (src != dst)
ref[dst] = ref[src];
dst++;
}
for (src = dst; src < nr; src++)
ref[src] = NULL;
string_list_clear(&names, 0);
return dst;
}
static void update_shallow(struct fetch_pack_args *args,
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
struct ref **sought, int nr_sought,
struct shallow_info *si)
{
struct oid_array ref = OID_ARRAY_INIT;
int *status;
int i;
if (args->deepen && alternate_shallow_file) {
if (*alternate_shallow_file == '\0') { /* --unshallow */
unlink_or_warn(git_path_shallow(the_repository));
rollback_lock_file(&shallow_lock);
} else
commit_lock_file(&shallow_lock);
return;
}
if (!si->shallow || !si->shallow->nr)
return;
if (args->cloning) {
/*
* remote is shallow, but this is a clone, there are
* no objects in repo to worry about. Accept any
* shallow points that exist in the pack (iow in repo
* after get_pack() and reprepare_packed_git())
*/
struct oid_array extra = OID_ARRAY_INIT;
struct object_id *oid = si->shallow->oid;
for (i = 0; i < si->shallow->nr; i++)
if (has_object_file(&oid[i]))
oid_array_append(&extra, &oid[i]);
if (extra.nr) {
setup_alternate_shallow(&shallow_lock,
&alternate_shallow_file,
&extra);
commit_lock_file(&shallow_lock);
}
oid_array_clear(&extra);
return;
}
if (!si->nr_ours && !si->nr_theirs)
return;
remove_nonexistent_theirs_shallow(si);
if (!si->nr_ours && !si->nr_theirs)
return;
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
for (i = 0; i < nr_sought; i++)
oid_array_append(&ref, &sought[i]->old_oid);
si->ref = &ref;
if (args->update_shallow) {
/*
* remote is also shallow, .git/shallow may be updated
* so all refs can be accepted. Make sure we only add
* shallow roots that are actually reachable from new
* refs.
*/
struct oid_array extra = OID_ARRAY_INIT;
struct object_id *oid = si->shallow->oid;
assign_shallow_commits_to_refs(si, NULL, NULL);
if (!si->nr_ours && !si->nr_theirs) {
oid_array_clear(&ref);
return;
}
for (i = 0; i < si->nr_ours; i++)
oid_array_append(&extra, &oid[si->ours[i]]);
for (i = 0; i < si->nr_theirs; i++)
oid_array_append(&extra, &oid[si->theirs[i]]);
setup_alternate_shallow(&shallow_lock,
&alternate_shallow_file,
&extra);
commit_lock_file(&shallow_lock);
oid_array_clear(&extra);
oid_array_clear(&ref);
return;
}
/*
* remote is also shallow, check what ref is safe to update
* without updating .git/shallow
*/
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
status = xcalloc(nr_sought, sizeof(*status));
assign_shallow_commits_to_refs(si, NULL, status);
if (si->nr_ours || si->nr_theirs) {
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
for (i = 0; i < nr_sought; i++)
if (status[i])
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
sought[i]->status = REF_STATUS_REJECT_SHALLOW;
}
free(status);
oid_array_clear(&ref);
}
fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
static int iterate_ref_map(void *cb_data, struct object_id *oid)
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
if (!ref)
return -1; /* end of the list */
*rm = ref->next;
oidcpy(oid, &ref->old_oid);
return 0;
}
struct ref *fetch_pack(struct fetch_pack_args *args,
int fd[], struct child_process *conn,
const struct ref *ref,
const char *dest,
struct ref **sought, int nr_sought,
struct oid_array *shallow,
char **pack_lockfile,
enum protocol_version version)
{
struct ref *ref_cpy;
struct shallow_info si;
fetch_pack_setup();
if (nr_sought)
nr_sought = remove_duplicates_in_refs(sought, nr_sought);
if (!ref) {
packet_flush(fd[1]);
die(_("no matching remote head"));
}
prepare_shallow_info(&si, shallow);
if (version == protocol_v2)
ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought,
pack_lockfile);
else
ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,
&si, pack_lockfile);
reprepare_packed_git(the_repository);
fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
if (!args->cloning && args->deepen) {
struct check_connected_options opt = CHECK_CONNECTED_INIT;
struct ref *iterator = ref_cpy;
opt.shallow_file = alternate_shallow_file;
if (args->deepen)
opt.is_deepening_fetch = 1;
if (check_connected(iterate_ref_map, &iterator, &opt)) {
error(_("remote did not send all necessary objects"));
free_refs(ref_cpy);
ref_cpy = NULL;
rollback_lock_file(&shallow_lock);
goto cleanup;
}
args->connectivity_checked = 1;
}
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
update_shallow(args, sought, nr_sought, &si);
fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
cleanup:
clear_shallow_info(&si);
return ref_cpy;
}
int report_unmatched_refs(struct ref **sought, int nr_sought)
{
int i, ret = 0;
for (i = 0; i < nr_sought; i++) {
if (!sought[i])
continue;
switch (sought[i]->match_status) {
case REF_MATCHED:
continue;
case REF_NOT_MATCHED:
error(_("no such remote ref %s"), sought[i]->name);
break;
case REF_UNADVERTISED_NOT_ALLOWED:
error(_("Server does not allow request for unadvertised object %s"),
sought[i]->name);
break;
}
ret = 1;
}
return ret;
}