|
|
|
#include "git-compat-util.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "environment.h"
|
|
|
|
#include "gettext.h"
|
|
|
|
#include "hex.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "pkt-line.h"
|
|
|
|
#include "sideband.h"
|
|
|
|
#include "repository.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "oid-array.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "object.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "list-objects.h"
|
|
|
|
#include "list-objects-filter.h"
|
|
|
|
#include "list-objects-filter-options.h"
|
|
|
|
#include "run-command.h"
|
|
|
|
#include "connect.h"
|
|
|
|
#include "sigchain.h"
|
|
|
|
#include "version.h"
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
#include "string-list.h"
|
|
|
|
#include "strvec.h"
|
|
|
|
#include "trace2.h"
|
|
|
|
#include "prio-queue.h"
|
|
|
|
#include "protocol.h"
|
|
|
|
#include "quote.h"
|
|
|
|
#include "upload-pack.h"
|
|
|
|
#include "serve.h"
|
|
|
|
#include "commit-graph.h"
|
|
|
|
#include "commit-reach.h"
|
|
|
|
#include "shallow.h"
|
|
|
|
#include "wrapper.h"
|
|
|
|
#include "write-or-die.h"
|
|
|
|
|
|
|
|
/* Remember to update object flag allocation in object.h */
|
|
|
|
#define THEY_HAVE (1u << 11)
|
|
|
|
#define OUR_REF (1u << 12)
|
|
|
|
#define WANTED (1u << 13)
|
|
|
|
#define COMMON_KNOWN (1u << 14)
|
|
|
|
|
|
|
|
#define SHALLOW (1u << 16)
|
|
|
|
#define NOT_SHALLOW (1u << 17)
|
|
|
|
#define CLIENT_SHALLOW (1u << 18)
|
|
|
|
#define HIDDEN_REF (1u << 19)
|
|
|
|
|
upload-pack: clear flags before each v2 request
Suppose a server has the following commit graph:
A B
\ /
O
We create a client by cloning A from the server with depth 1, and add
many commits to it (so that future fetches span multiple requests due to
lengthy negotiation). If it then fetches B using protocol v2, the fetch
spanning multiple requests, the resulting packfile does not contain O
even though the client did report that A is shallow.
This is because upload_pack_v2() can be called multiple times while
processing the same session. During the 2nd and all subsequent
invocations, some object flags remain from the previous invocations. In
particular, CLIENT_SHALLOW remains, preventing process_shallow() from
adding client-reported shallows to the "shallows" array, and hence
pack-objects not knowing about these client-reported shallows.
Therefore, teach upload_pack_v2() to clear object flags at the start of
each invocation. This has some other results:
- THEY_HAVE gates addition of objects to have_obj in process_haves().
Previously in upload_pack_v2(), have_obj needed to be static because
once an object is added to have_obj, it is never readded and thus we
needed to retain the contents of have_obj between invocations. Now
that flags are cleared, this is no longer necessary. This patch does
not change the behavior of ok_to_give_up() (THEY_HAVE is still set on
each "have") and got_oid() (used only in non-v2)); THEY_HAVE is not
used in any other function.
- WANTED gates addition of objects to want_obj in parse_want() and
parse_want_ref(). It is also used in receive_needs(), but that is
only used in non-v2. For the same reasons as THEY_HAVE, want_obj no
longer needs to be static in upload_pack_v2().
- CLIENT_SHALLOW is changed as discussed above.
Clearing of the other 5 flags does not affect functionality in v2. (Note
that in non-v2, upload_pack() is only called once per process, so each
invocation starts with blank flags anyway.)
- OUR_REF is only used in non-v2.
- COMMON_KNOWN is only used as a scratch flag in ok_to_give_up().
- SHALLOW is passed to invocations in deepen() and
deepen_by_rev_list(), but upload-pack doesn't use it.
- NOT_SHALLOW is used by send_shallow() and send_unshallow(), but
invocations of those functions are always preceded by code that sets
NOT_SHALLOW on the appropriate objects.
- HIDDEN_REF is only used in non-v2.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
#define ALL_FLAGS (THEY_HAVE | OUR_REF | WANTED | COMMON_KNOWN | SHALLOW | \
|
|
|
|
NOT_SHALLOW | CLIENT_SHALLOW | HIDDEN_REF)
|
|
|
|
|
|
|
|
/* Enum for allowed unadvertised object request (UOR) */
|
|
|
|
enum allow_uor {
|
|
|
|
/* Allow specifying sha1 if it is a ref tip. */
|
|
|
|
ALLOW_TIP_SHA1 = 0x01,
|
|
|
|
/* Allow request of a sha1 if it is reachable from a ref (possibly hidden ref). */
|
|
|
|
ALLOW_REACHABLE_SHA1 = 0x02,
|
|
|
|
/* Allow request of any sha1. Implies ALLOW_TIP_SHA1 and ALLOW_REACHABLE_SHA1. */
|
|
|
|
ALLOW_ANY_SHA1 = 0x07
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Please annotate, and if possible group together, fields used only
|
|
|
|
* for protocol v0 or only for protocol v2.
|
|
|
|
*/
|
|
|
|
struct upload_pack_data {
|
|
|
|
struct string_list symref; /* v0 only */
|
|
|
|
struct object_array want_obj;
|
|
|
|
struct object_array have_obj;
|
|
|
|
struct oid_array haves; /* v2 only */
|
|
|
|
struct string_list wanted_refs; /* v2 only */
|
|
|
|
struct string_list hidden_refs;
|
|
|
|
|
|
|
|
struct object_array shallows;
|
|
|
|
struct string_list deepen_not;
|
|
|
|
struct object_array extra_edge_obj;
|
|
|
|
int depth;
|
|
|
|
timestamp_t deepen_since;
|
|
|
|
int deepen_rev_list;
|
|
|
|
int deepen_relative;
|
|
|
|
int keepalive;
|
|
|
|
int shallow_nr;
|
|
|
|
timestamp_t oldest_have;
|
|
|
|
|
|
|
|
unsigned int timeout; /* v0 only */
|
|
|
|
enum {
|
|
|
|
NO_MULTI_ACK = 0,
|
|
|
|
MULTI_ACK = 1,
|
|
|
|
MULTI_ACK_DETAILED = 2
|
|
|
|
} multi_ack; /* v0 only */
|
|
|
|
|
|
|
|
/* 0 for no sideband, otherwise DEFAULT_PACKET_MAX or LARGE_PACKET_MAX */
|
|
|
|
int use_sideband;
|
|
|
|
|
|
|
|
struct string_list uri_protocols;
|
|
|
|
enum allow_uor allow_uor;
|
|
|
|
|
|
|
|
struct list_objects_filter_options filter_options;
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
struct string_list allowed_filters;
|
|
|
|
|
|
|
|
struct packet_writer writer;
|
|
|
|
|
|
|
|
const char *pack_objects_hook;
|
|
|
|
|
|
|
|
unsigned stateless_rpc : 1; /* v0 only */
|
|
|
|
unsigned no_done : 1; /* v0 only */
|
|
|
|
unsigned daemon_mode : 1; /* v0 only */
|
|
|
|
unsigned filter_capability_requested : 1; /* v0 only */
|
|
|
|
|
|
|
|
unsigned use_thin_pack : 1;
|
|
|
|
unsigned use_ofs_delta : 1;
|
|
|
|
unsigned no_progress : 1;
|
|
|
|
unsigned use_include_tag : 1;
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
unsigned wait_for_done : 1;
|
|
|
|
unsigned allow_filter : 1;
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
unsigned allow_filter_fallback : 1;
|
upload-pack.c: introduce 'uploadpackfilter.tree.maxDepth'
In b79cf959b2 (upload-pack.c: allow banning certain object filter(s),
2020-02-26), we introduced functionality to disallow certain object
filters from being chosen from within 'git upload-pack'. Traditionally,
administrators use this functionality to disallow filters that are known
to perform slowly, for e.g., those that do not have bitmap-level
filtering.
In the past, the '--filter=tree:<n>' was one such filter that does not
have bitmap-level filtering support, and so was likely to be banned by
administrators.
However, in the previous couple of commits, we introduced bitmap-level
filtering for the case when 'n' is equal to '0', i.e., as if we had a
'--filter=tree:none' choice.
While it would be sufficient to simply write
$ git config uploadpackfilter.tree.allow true
(since it would allow all values of 'n'), we would like to be able to
allow this filter for certain values of 'n', i.e., those no greater than
some pre-specified maximum.
In order to do this, introduce a new configuration key, as follows:
$ git config uploadpackfilter.tree.maxDepth <m>
where '<m>' specifies the maximum allowed value of 'n' in the filter
'tree:n'. Administrators who wish to allow for only the value '0' can
write:
$ git config uploadpackfilter.tree.allow true
$ git config uploadpackfilter.tree.maxDepth 0
which allows '--filter=tree:0', but no other values.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
unsigned long tree_filter_max_depth;
|
|
|
|
|
|
|
|
unsigned done : 1; /* v2 only */
|
|
|
|
unsigned allow_ref_in_want : 1; /* v2 only */
|
|
|
|
unsigned allow_sideband_all : 1; /* v2 only */
|
|
|
|
unsigned advertise_sid : 1;
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
unsigned sent_capabilities : 1;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void upload_pack_data_init(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
struct string_list symref = STRING_LIST_INIT_DUP;
|
|
|
|
struct string_list wanted_refs = STRING_LIST_INIT_DUP;
|
|
|
|
struct string_list hidden_refs = STRING_LIST_INIT_DUP;
|
|
|
|
struct object_array want_obj = OBJECT_ARRAY_INIT;
|
|
|
|
struct object_array have_obj = OBJECT_ARRAY_INIT;
|
|
|
|
struct oid_array haves = OID_ARRAY_INIT;
|
|
|
|
struct object_array shallows = OBJECT_ARRAY_INIT;
|
|
|
|
struct string_list deepen_not = STRING_LIST_INIT_DUP;
|
|
|
|
struct string_list uri_protocols = STRING_LIST_INIT_DUP;
|
|
|
|
struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
struct string_list allowed_filters = STRING_LIST_INIT_DUP;
|
|
|
|
|
|
|
|
memset(data, 0, sizeof(*data));
|
|
|
|
data->symref = symref;
|
|
|
|
data->wanted_refs = wanted_refs;
|
|
|
|
data->hidden_refs = hidden_refs;
|
|
|
|
data->want_obj = want_obj;
|
|
|
|
data->have_obj = have_obj;
|
|
|
|
data->haves = haves;
|
|
|
|
data->shallows = shallows;
|
|
|
|
data->deepen_not = deepen_not;
|
|
|
|
data->uri_protocols = uri_protocols;
|
|
|
|
data->extra_edge_obj = extra_edge_obj;
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
data->allowed_filters = allowed_filters;
|
|
|
|
data->allow_filter_fallback = 1;
|
upload-pack.c: introduce 'uploadpackfilter.tree.maxDepth'
In b79cf959b2 (upload-pack.c: allow banning certain object filter(s),
2020-02-26), we introduced functionality to disallow certain object
filters from being chosen from within 'git upload-pack'. Traditionally,
administrators use this functionality to disallow filters that are known
to perform slowly, for e.g., those that do not have bitmap-level
filtering.
In the past, the '--filter=tree:<n>' was one such filter that does not
have bitmap-level filtering support, and so was likely to be banned by
administrators.
However, in the previous couple of commits, we introduced bitmap-level
filtering for the case when 'n' is equal to '0', i.e., as if we had a
'--filter=tree:none' choice.
While it would be sufficient to simply write
$ git config uploadpackfilter.tree.allow true
(since it would allow all values of 'n'), we would like to be able to
allow this filter for certain values of 'n', i.e., those no greater than
some pre-specified maximum.
In order to do this, introduce a new configuration key, as follows:
$ git config uploadpackfilter.tree.maxDepth <m>
where '<m>' specifies the maximum allowed value of 'n' in the filter
'tree:n'. Administrators who wish to allow for only the value '0' can
write:
$ git config uploadpackfilter.tree.allow true
$ git config uploadpackfilter.tree.maxDepth 0
which allows '--filter=tree:0', but no other values.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
data->tree_filter_max_depth = ULONG_MAX;
|
|
|
|
packet_writer_init(&data->writer, 1);
|
list-objects-filter: add and use initializers
In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.
That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).
So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).
This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.
I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
|
|
|
list_objects_filter_init(&data->filter_options);
|
|
|
|
|
|
|
|
data->keepalive = 5;
|
|
|
|
data->advertise_sid = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void upload_pack_data_clear(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
string_list_clear(&data->symref, 1);
|
|
|
|
string_list_clear(&data->wanted_refs, 1);
|
|
|
|
string_list_clear(&data->hidden_refs, 0);
|
|
|
|
object_array_clear(&data->want_obj);
|
|
|
|
object_array_clear(&data->have_obj);
|
|
|
|
oid_array_clear(&data->haves);
|
|
|
|
object_array_clear(&data->shallows);
|
|
|
|
string_list_clear(&data->deepen_not, 0);
|
|
|
|
object_array_clear(&data->extra_edge_obj);
|
|
|
|
list_objects_filter_release(&data->filter_options);
|
upload-pack.c: don't free allowed_filters util pointers
To keep track of which object filters are allowed or not, 'git
upload-pack' stores the name of each filter in a string_list, and sets
it ->util pointer to be either 0 or 1, indicating whether it is banned
or allowed.
Later on, we attempt to clear that list, but we incorrectly ask for the
util pointers to be free()'d, too. This behavior (introduced back in
6dd3456a8c (upload-pack.c: allow banning certain object filter(s),
2020-08-03)) leads to an invalid free, and causes us to crash.
In order to trigger this, one needs to fetch from a server that (a) has
at least one object filter allowed, and (b) issue a fetch that contains
a subset of the allowed filters (i.e., we cannot ask for a banned
filter, since this causes us to die() before we hit the bogus
string_list_clear()).
In that case, whatever banned filters exist will cause a noop free()
(since those ->util pointers are set to 0), but the first allowed filter
we try to free will crash us.
We never noticed this in the tests because we didn't have an example of
setting 'uploadPackFilter' configuration variables and then following up
with a valid fetch. The first new 'git clone' prevents further
regression here. For good measure on top, add a test which checks the
same behavior at a tree depth greater than 0.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
string_list_clear(&data->allowed_filters, 0);
|
|
|
|
|
|
|
|
free((char *)data->pack_objects_hook);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reset_timeout(unsigned int timeout)
|
|
|
|
{
|
|
|
|
alarm(timeout);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void send_client_data(int fd, const char *data, ssize_t sz,
|
|
|
|
int use_sideband)
|
|
|
|
{
|
|
|
|
if (use_sideband) {
|
|
|
|
send_sideband(1, fd, data, sz, use_sideband);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (fd == 3)
|
|
|
|
/* emergency quit */
|
|
|
|
fd = 2;
|
|
|
|
if (fd == 2) {
|
|
|
|
/* XXX: are we happy to lose stuff here? */
|
|
|
|
xwrite(fd, data, sz);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
write_or_die(fd, data, sz);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
|
|
|
|
{
|
|
|
|
FILE *fp = cb_data;
|
|
|
|
if (graft->nr_parent == -1)
|
|
|
|
fprintf(fp, "--shallow %s\n", oid_to_hex(&graft->oid));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct output_state {
|
|
|
|
/*
|
|
|
|
* We do writes no bigger than LARGE_PACKET_DATA_MAX - 1, because with
|
|
|
|
* sideband-64k the band designator takes up 1 byte of space. Because
|
|
|
|
* relay_pack_data keeps the last byte to itself, we make the buffer 1
|
|
|
|
* byte bigger than the intended maximum write size.
|
|
|
|
*/
|
|
|
|
char buffer[(LARGE_PACKET_DATA_MAX - 1) + 1];
|
|
|
|
int used;
|
|
|
|
unsigned packfile_uris_started : 1;
|
|
|
|
unsigned packfile_started : 1;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int relay_pack_data(int pack_objects_out, struct output_state *os,
|
|
|
|
int use_sideband, int write_packfile_line)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We keep the last byte to ourselves
|
|
|
|
* in case we detect broken rev-list, so that we
|
|
|
|
* can leave the stream corrupted. This is
|
|
|
|
* unfortunate -- unpack-objects would happily
|
|
|
|
* accept a valid packdata with trailing garbage,
|
|
|
|
* so appending garbage after we pass all the
|
|
|
|
* pack data is not good enough to signal
|
|
|
|
* breakage to downstream.
|
|
|
|
*/
|
|
|
|
ssize_t readsz;
|
|
|
|
|
|
|
|
readsz = xread(pack_objects_out, os->buffer + os->used,
|
|
|
|
sizeof(os->buffer) - os->used);
|
|
|
|
if (readsz < 0) {
|
|
|
|
return readsz;
|
|
|
|
}
|
|
|
|
os->used += readsz;
|
|
|
|
|
|
|
|
while (!os->packfile_started) {
|
|
|
|
char *p;
|
|
|
|
if (os->used >= 4 && !memcmp(os->buffer, "PACK", 4)) {
|
|
|
|
os->packfile_started = 1;
|
|
|
|
if (write_packfile_line) {
|
|
|
|
if (os->packfile_uris_started)
|
|
|
|
packet_delim(1);
|
|
|
|
packet_write_fmt(1, "\1packfile\n");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if ((p = memchr(os->buffer, '\n', os->used))) {
|
|
|
|
if (!os->packfile_uris_started) {
|
|
|
|
os->packfile_uris_started = 1;
|
|
|
|
if (!write_packfile_line)
|
|
|
|
BUG("packfile_uris requires sideband-all");
|
|
|
|
packet_write_fmt(1, "\1packfile-uris\n");
|
|
|
|
}
|
|
|
|
*p = '\0';
|
|
|
|
packet_write_fmt(1, "\1%s\n", os->buffer);
|
|
|
|
|
|
|
|
os->used -= p - os->buffer + 1;
|
|
|
|
memmove(os->buffer, p + 1, os->used);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Incomplete line.
|
|
|
|
*/
|
|
|
|
return readsz;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (os->used > 1) {
|
|
|
|
send_client_data(1, os->buffer, os->used - 1, use_sideband);
|
|
|
|
os->buffer[0] = os->buffer[os->used - 1];
|
|
|
|
os->used = 1;
|
|
|
|
} else {
|
|
|
|
send_client_data(1, os->buffer, os->used, use_sideband);
|
|
|
|
os->used = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return readsz;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_pack_file(struct upload_pack_data *pack_data,
|
|
|
|
const struct string_list *uri_protocols)
|
|
|
|
{
|
|
|
|
struct child_process pack_objects = CHILD_PROCESS_INIT;
|
|
|
|
struct output_state *output_state = xcalloc(1, sizeof(struct output_state));
|
|
|
|
char progress[128];
|
|
|
|
char abort_msg[] = "aborting due to possible repository "
|
|
|
|
"corruption on the remote side.";
|
|
|
|
ssize_t sz;
|
|
|
|
int i;
|
|
|
|
FILE *pipe_fd;
|
|
|
|
|
|
|
|
if (!pack_data->pack_objects_hook)
|
upload-pack: provide a hook for running pack-objects
When upload-pack serves a client request, it turns to
pack-objects to do the heavy lifting of creating a
packfile. There's no easy way to intercept the call to
pack-objects, but there are a few good reasons to want to do
so:
1. If you're debugging a client or server issue with
fetching, you may want to store a copy of the generated
packfile.
2. If you're gathering data from real-world fetches for
performance analysis or debugging, storing a copy of
the arguments and stdin lets you replay the pack
generation at your leisure.
3. You may want to insert a caching layer around
pack-objects; it is the most CPU- and memory-intensive
part of serving a fetch, and its output is a pure
function[1] of its input, making it an ideal place to
consolidate identical requests.
This patch adds a simple "hook" interface to intercept calls
to pack-objects. The new test demonstrates how it can be
used for debugging (using it for caching is a
straightforward extension; the tricky part is writing the
actual caching layer).
This hook is unlike the normal hook scripts found in the
"hooks/" directory of a repository. Because we promise that
upload-pack is safe to run in an untrusted repository, we
cannot execute arbitrary code or commands found in the
repository (neither in hooks/, nor in the config). So
instead, this hook is triggered from a config variable that
is explicitly ignored in the per-repo config.
The config variable holds the actual shell command to run as
the hook. Another approach would be to simply treat it as a
boolean: "should I respect the upload-pack hooks in this
repo?", and then run the script from "hooks/" as we usually
do. However, that isn't as flexible; there's no way to run a
hook approved by the site administrator (e.g., in
"/etc/gitconfig") on a repository whose contents are not
trusted. The approach taken by this patch is more
fine-grained, if a little less conventional for git hooks
(it does behave similar to other configured commands like
diff.external, etc).
[1] Pack-objects isn't _actually_ a pure function. Its
output depends on the exact packing of the object
database, and if multi-threading is used for delta
compression, can even differ racily. But for the
purposes of caching, that's OK; of the many possible
outputs for a given input, it is sufficient only that we
output one of them.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
pack_objects.git_cmd = 1;
|
|
|
|
else {
|
|
|
|
strvec_push(&pack_objects.args, pack_data->pack_objects_hook);
|
|
|
|
strvec_push(&pack_objects.args, "git");
|
upload-pack: provide a hook for running pack-objects
When upload-pack serves a client request, it turns to
pack-objects to do the heavy lifting of creating a
packfile. There's no easy way to intercept the call to
pack-objects, but there are a few good reasons to want to do
so:
1. If you're debugging a client or server issue with
fetching, you may want to store a copy of the generated
packfile.
2. If you're gathering data from real-world fetches for
performance analysis or debugging, storing a copy of
the arguments and stdin lets you replay the pack
generation at your leisure.
3. You may want to insert a caching layer around
pack-objects; it is the most CPU- and memory-intensive
part of serving a fetch, and its output is a pure
function[1] of its input, making it an ideal place to
consolidate identical requests.
This patch adds a simple "hook" interface to intercept calls
to pack-objects. The new test demonstrates how it can be
used for debugging (using it for caching is a
straightforward extension; the tricky part is writing the
actual caching layer).
This hook is unlike the normal hook scripts found in the
"hooks/" directory of a repository. Because we promise that
upload-pack is safe to run in an untrusted repository, we
cannot execute arbitrary code or commands found in the
repository (neither in hooks/, nor in the config). So
instead, this hook is triggered from a config variable that
is explicitly ignored in the per-repo config.
The config variable holds the actual shell command to run as
the hook. Another approach would be to simply treat it as a
boolean: "should I respect the upload-pack hooks in this
repo?", and then run the script from "hooks/" as we usually
do. However, that isn't as flexible; there's no way to run a
hook approved by the site administrator (e.g., in
"/etc/gitconfig") on a repository whose contents are not
trusted. The approach taken by this patch is more
fine-grained, if a little less conventional for git hooks
(it does behave similar to other configured commands like
diff.external, etc).
[1] Pack-objects isn't _actually_ a pure function. Its
output depends on the exact packing of the object
database, and if multi-threading is used for delta
compression, can even differ racily. But for the
purposes of caching, that's OK; of the many possible
outputs for a given input, it is sufficient only that we
output one of them.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
pack_objects.use_shell = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pack_data->shallow_nr) {
|
|
|
|
strvec_push(&pack_objects.args, "--shallow-file");
|
|
|
|
strvec_push(&pack_objects.args, "");
|
|
|
|
}
|
|
|
|
strvec_push(&pack_objects.args, "pack-objects");
|
|
|
|
strvec_push(&pack_objects.args, "--revs");
|
|
|
|
if (pack_data->use_thin_pack)
|
|
|
|
strvec_push(&pack_objects.args, "--thin");
|
|
|
|
|
|
|
|
strvec_push(&pack_objects.args, "--stdout");
|
|
|
|
if (pack_data->shallow_nr)
|
|
|
|
strvec_push(&pack_objects.args, "--shallow");
|
|
|
|
if (!pack_data->no_progress)
|
|
|
|
strvec_push(&pack_objects.args, "--progress");
|
|
|
|
if (pack_data->use_ofs_delta)
|
|
|
|
strvec_push(&pack_objects.args, "--delta-base-offset");
|
|
|
|
if (pack_data->use_include_tag)
|
|
|
|
strvec_push(&pack_objects.args, "--include-tag");
|
|
|
|
if (pack_data->filter_options.choice) {
|
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&pack_data->filter_options);
|
|
|
|
strvec_pushf(&pack_objects.args, "--filter=%s", spec);
|
|
|
|
}
|
|
|
|
if (uri_protocols) {
|
|
|
|
for (i = 0; i < uri_protocols->nr; i++)
|
|
|
|
strvec_pushf(&pack_objects.args, "--uri-protocol=%s",
|
|
|
|
uri_protocols->items[i].string);
|
|
|
|
}
|
|
|
|
|
upload-pack: start pack-objects before async rev-list
In a pthread-enabled version of upload-pack, there's a race condition
that can cause a deadlock on the fflush(NULL) we call from run-command.
What happens is this:
1. Upload-pack is informed we are doing a shallow clone.
2. We call start_async() to spawn a thread that will generate rev-list
results to feed to pack-objects. It gets a file descriptor to a
pipe which will eventually hook to pack-objects.
3. The rev-list thread uses fdopen to create a new output stream
around the fd we gave it, called pack_pipe.
4. The thread writes results to pack_pipe. Outside of our control,
libc is doing locking on the stream. We keep writing until the OS
pipe buffer is full, and then we block in write(), still holding
the lock.
5. The main thread now uses start_command to spawn pack-objects.
Before forking, it calls fflush(NULL) to flush every stdio output
buffer. It blocks trying to get the lock on pack_pipe.
And we have a deadlock. The thread will block until somebody starts
reading from the pipe. But nobody will read from the pipe until we
finish flushing to the pipe.
To fix this, we swap the start order: we start the
pack-objects reader first, and then the rev-list writer
after. Thus the problematic fflush(NULL) happens before we
even open the new file descriptor (and even if it didn't,
flushing should no longer block, as the reader at the end of
the pipe is now active).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
pack_objects.in = -1;
|
|
|
|
pack_objects.out = -1;
|
|
|
|
pack_objects.err = -1;
|
upload-pack: kill pack-objects helper on signal or exit
We spawn an external pack-objects process to actually send objects to
the remote side. If we are killed by a signal during this process, then
pack-objects may continue to run. As soon as it starts producing output
for the pack, it will see a failure writing to upload-pack and exit
itself. But before then, it may do significant work traversing the
object graph, compressing deltas, etc, which will all be pointless. So
let's make sure to kill as soon as we know that the caller will not read
the result.
There's no test here, since it's inherently racy, but here's an easy
reproduction is on a large-ish repo like linux.git:
- make sure you don't have pack bitmaps (since they make the enumerating
phase go quickly). For linux.git it takes ~30s or so to walk the
whole graph on my machine.
- run "git clone --no-local -q . dst"; the "-q" is important because
if pack-objects is writing progress to upload-pack (to get
multiplexed over the sideband to the client), then it will notice
pretty quickly the failure to write to stderr
- kill the client-side clone process in another terminal (don't use
^C, as that will send SIGINT to all of the processes)
- run "ps au | grep git" or similar to observe upload-pack dying
within 5 seconds (it will send a keepalive that will notice the
client has gone away)
- but you'll still see pack-objects consuming 100% CPU (and 1GB+ of
RAM) during the traversal and delta compression phases. It will exit
as soon as it starts to write the pack (when it will notice that
upload-pack went away).
With this patch, pack-objects exits as soon as upload-pack does.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
pack_objects.clean_on_exit = 1;
|
|
|
|
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
if (start_command(&pack_objects))
|
|
|
|
die("git upload-pack: unable to fork git-pack-objects");
|
|
|
|
|
|
|
|
pipe_fd = xfdopen(pack_objects.in, "w");
|
|
|
|
|
|
|
|
if (pack_data->shallow_nr)
|
|
|
|
for_each_commit_graft(write_one_shallow, pipe_fd);
|
|
|
|
|
|
|
|
for (i = 0; i < pack_data->want_obj.nr; i++)
|
|
|
|
fprintf(pipe_fd, "%s\n",
|
|
|
|
oid_to_hex(&pack_data->want_obj.objects[i].item->oid));
|
|
|
|
fprintf(pipe_fd, "--not\n");
|
|
|
|
for (i = 0; i < pack_data->have_obj.nr; i++)
|
|
|
|
fprintf(pipe_fd, "%s\n",
|
|
|
|
oid_to_hex(&pack_data->have_obj.objects[i].item->oid));
|
|
|
|
for (i = 0; i < pack_data->extra_edge_obj.nr; i++)
|
|
|
|
fprintf(pipe_fd, "%s\n",
|
|
|
|
oid_to_hex(&pack_data->extra_edge_obj.objects[i].item->oid));
|
|
|
|
fprintf(pipe_fd, "\n");
|
|
|
|
fflush(pipe_fd);
|
|
|
|
fclose(pipe_fd);
|
|
|
|
|
|
|
|
/* We read from pack_objects.err to capture stderr output for
|
|
|
|
* progress bar, and pack_objects.out to capture the pack data.
|
|
|
|
*/
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct pollfd pfd[2];
|
|
|
|
int pe, pu, pollsize, polltimeout;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
reset_timeout(pack_data->timeout);
|
|
|
|
|
|
|
|
pollsize = 0;
|
|
|
|
pe = pu = -1;
|
|
|
|
|
|
|
|
if (0 <= pack_objects.out) {
|
|
|
|
pfd[pollsize].fd = pack_objects.out;
|
|
|
|
pfd[pollsize].events = POLLIN;
|
|
|
|
pu = pollsize;
|
|
|
|
pollsize++;
|
|
|
|
}
|
|
|
|
if (0 <= pack_objects.err) {
|
|
|
|
pfd[pollsize].fd = pack_objects.err;
|
|
|
|
pfd[pollsize].events = POLLIN;
|
|
|
|
pe = pollsize;
|
|
|
|
pollsize++;
|
|
|
|
}
|
|
|
|
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
if (!pollsize)
|
|
|
|
break;
|
|
|
|
|
|
|
|
polltimeout = pack_data->keepalive < 0
|
|
|
|
? -1
|
|
|
|
: 1000 * pack_data->keepalive;
|
|
|
|
|
|
|
|
ret = poll(pfd, pollsize, polltimeout);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
if (errno != EINTR) {
|
|
|
|
error_errno("poll failed, resuming");
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
sleep(1);
|
|
|
|
}
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (0 <= pe && (pfd[pe].revents & (POLLIN|POLLHUP))) {
|
|
|
|
/* Status ready; we ship that in the side-band
|
|
|
|
* or dump to the standard error.
|
|
|
|
*/
|
|
|
|
sz = xread(pack_objects.err, progress,
|
|
|
|
sizeof(progress));
|
|
|
|
if (0 < sz)
|
|
|
|
send_client_data(2, progress, sz,
|
|
|
|
pack_data->use_sideband);
|
|
|
|
else if (sz == 0) {
|
|
|
|
close(pack_objects.err);
|
|
|
|
pack_objects.err = -1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
goto fail;
|
|
|
|
/* give priority to status messages */
|
|
|
|
continue;
|
|
|
|
}
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) {
|
|
|
|
int result = relay_pack_data(pack_objects.out,
|
|
|
|
output_state,
|
|
|
|
pack_data->use_sideband,
|
|
|
|
!!uri_protocols);
|
|
|
|
|
|
|
|
if (result == 0) {
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
close(pack_objects.out);
|
|
|
|
pack_objects.out = -1;
|
|
|
|
} else if (result < 0) {
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
goto fail;
|
|
|
|
}
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We hit the keepalive timeout without saying anything; send
|
|
|
|
* an empty message on the data sideband just to let the other
|
|
|
|
* side know we're still working on it, but don't have any data
|
|
|
|
* yet.
|
|
|
|
*
|
|
|
|
* If we don't have a sideband channel, there's no room in the
|
|
|
|
* protocol to say anything, so those clients are just out of
|
|
|
|
* luck.
|
|
|
|
*/
|
|
|
|
if (!ret && pack_data->use_sideband) {
|
|
|
|
static const char buf[] = "0005\1";
|
|
|
|
write_or_die(1, buf, 5);
|
|
|
|
}
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
}
|
|
|
|
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
if (finish_command(&pack_objects)) {
|
|
|
|
error("git upload-pack: git-pack-objects died with error.");
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
/* flush the data */
|
|
|
|
if (output_state->used > 0) {
|
|
|
|
send_client_data(1, output_state->buffer, output_state->used,
|
|
|
|
pack_data->use_sideband);
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
fprintf(stderr, "flushed.\n");
|
|
|
|
}
|
|
|
|
free(output_state);
|
|
|
|
if (pack_data->use_sideband)
|
upload-pack: Use finish_{command,async}() instead of waitpid().
upload-pack spawns two processes, rev-list and pack-objects, and carefully
monitors their status so that it can report failure to the remote end.
This change removes the complicated procedures on the grounds of the
following observations:
- If everything is OK, rev-list closes its output pipe end, upon which
pack-objects (which reads from the pipe) sees EOF and terminates itself,
closing its output (and error) pipes. upload-pack reads from both until
it sees EOF in both. It collects the exit codes of the child processes
(which indicate success) and terminates successfully.
- If rev-list sees an error, it closes its output and terminates with
failure. pack-objects sees EOF in its input and terminates successfully.
Again upload-pack reads its inputs until EOF. When it now collects
the exit codes of its child processes, it notices the failure of rev-list
and signals failure to the remote end.
- If pack-objects sees an error, it terminates with failure. Since this
breaks the pipe to rev-list, rev-list is killed with SIGPIPE.
upload-pack reads its input until EOF, then collects the exit codes of
the child processes, notices their failures, and signals failure to the
remote end.
- If upload-pack itself dies unexpectedly, pack-objects is killed with
SIGPIPE, and subsequently also rev-list.
The upshot of this is that precise monitoring of child processes is not
required because both terminate if either one of them dies unexpectedly.
This allows us to use finish_command() and finish_async() instead of
an explicit waitpid(2) call.
The change is smaller than it looks because most of it only reduces the
indentation of a large part of the inner loop.
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
packet_flush(1);
|
|
|
|
return;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
free(output_state);
|
|
|
|
send_client_data(3, abort_msg, sizeof(abort_msg),
|
|
|
|
pack_data->use_sideband);
|
|
|
|
die("git upload-pack: %s", abort_msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid)
|
|
|
|
{
|
|
|
|
int we_knew_they_have = 0;
|
|
|
|
struct object *o = parse_object(the_repository, oid);
|
|
|
|
|
|
|
|
if (!o)
|
|
|
|
die("oops (%s)", oid_to_hex(oid));
|
|
|
|
if (o->type == OBJ_COMMIT) {
|
|
|
|
struct commit_list *parents;
|
|
|
|
struct commit *commit = (struct commit *)o;
|
|
|
|
if (o->flags & THEY_HAVE)
|
|
|
|
we_knew_they_have = 1;
|
|
|
|
else
|
|
|
|
o->flags |= THEY_HAVE;
|
|
|
|
if (!data->oldest_have || (commit->date < data->oldest_have))
|
|
|
|
data->oldest_have = commit->date;
|
|
|
|
for (parents = commit->parents;
|
|
|
|
parents;
|
|
|
|
parents = parents->next)
|
|
|
|
parents->item->object.flags |= THEY_HAVE;
|
|
|
|
}
|
|
|
|
if (!we_knew_they_have) {
|
|
|
|
add_object_array(o, NULL, &data->have_obj);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int got_oid(struct upload_pack_data *data,
|
|
|
|
const char *hex, struct object_id *oid)
|
|
|
|
{
|
|
|
|
if (get_oid_hex(hex, oid))
|
|
|
|
die("git upload-pack: expected SHA1 object, got '%s'", hex);
|
|
|
|
if (!repo_has_object_file_with_flags(the_repository, oid,
|
|
|
|
OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT))
|
|
|
|
return -1;
|
|
|
|
return do_got_oid(data, oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ok_to_give_up(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
timestamp_t min_generation = GENERATION_NUMBER_ZERO;
|
|
|
|
|
|
|
|
if (!data->have_obj.nr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return can_all_from_reach_with_flag(&data->want_obj, THEY_HAVE,
|
|
|
|
COMMON_KNOWN, data->oldest_have,
|
|
|
|
min_generation);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_common_commits(struct upload_pack_data *data,
|
|
|
|
struct packet_reader *reader)
|
|
|
|
{
|
|
|
|
struct object_id oid;
|
|
|
|
char last_hex[GIT_MAX_HEXSZ + 1];
|
|
|
|
int got_common = 0;
|
|
|
|
int got_other = 0;
|
|
|
|
int sent_ready = 0;
|
|
|
|
|
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
const char *arg;
|
|
|
|
|
|
|
|
reset_timeout(data->timeout);
|
|
|
|
|
|
|
|
if (packet_reader_read(reader) != PACKET_READ_NORMAL) {
|
|
|
|
if (data->multi_ack == MULTI_ACK_DETAILED
|
|
|
|
&& got_common
|
|
|
|
&& !got_other
|
|
|
|
&& ok_to_give_up(data)) {
|
|
|
|
sent_ready = 1;
|
|
|
|
packet_write_fmt(1, "ACK %s ready\n", last_hex);
|
|
|
|
}
|
|
|
|
if (data->have_obj.nr == 0 || data->multi_ack)
|
|
|
|
packet_write_fmt(1, "NAK\n");
|
|
|
|
|
|
|
|
if (data->no_done && sent_ready) {
|
|
|
|
packet_write_fmt(1, "ACK %s\n", last_hex);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (data->stateless_rpc)
|
|
|
|
exit(0);
|
|
|
|
got_common = 0;
|
|
|
|
got_other = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (skip_prefix(reader->line, "have ", &arg)) {
|
|
|
|
switch (got_oid(data, arg, &oid)) {
|
|
|
|
case -1: /* they have what we do not */
|
|
|
|
got_other = 1;
|
|
|
|
if (data->multi_ack
|
|
|
|
&& ok_to_give_up(data)) {
|
|
|
|
const char *hex = oid_to_hex(&oid);
|
|
|
|
if (data->multi_ack == MULTI_ACK_DETAILED) {
|
|
|
|
sent_ready = 1;
|
|
|
|
packet_write_fmt(1, "ACK %s ready\n", hex);
|
|
|
|
} else
|
|
|
|
packet_write_fmt(1, "ACK %s continue\n", hex);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
got_common = 1;
|
|
|
|
oid_to_hex_r(last_hex, &oid);
|
|
|
|
if (data->multi_ack == MULTI_ACK_DETAILED)
|
|
|
|
packet_write_fmt(1, "ACK %s common\n", last_hex);
|
|
|
|
else if (data->multi_ack)
|
|
|
|
packet_write_fmt(1, "ACK %s continue\n", last_hex);
|
|
|
|
else if (data->have_obj.nr == 1)
|
|
|
|
packet_write_fmt(1, "ACK %s\n", last_hex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(reader->line, "done")) {
|
|
|
|
if (data->have_obj.nr > 0) {
|
|
|
|
if (data->multi_ack)
|
|
|
|
packet_write_fmt(1, "ACK %s\n", last_hex);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
packet_write_fmt(1, "NAK\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
die("git upload-pack: expected SHA1 list, got '%s'", reader->line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int is_our_ref(struct object *o, enum allow_uor allow_uor)
|
|
|
|
{
|
|
|
|
int allow_hidden_ref = (allow_uor &
|
|
|
|
(ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1));
|
|
|
|
return o->flags & ((allow_hidden_ref ? HIDDEN_REF : 0) | OUR_REF);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* on successful case, it's up to the caller to close cmd->out
|
|
|
|
*/
|
|
|
|
static int do_reachable_revlist(struct child_process *cmd,
|
|
|
|
struct object_array *src,
|
|
|
|
struct object_array *reachable,
|
|
|
|
enum allow_uor allow_uor)
|
|
|
|
{
|
|
|
|
struct object *o;
|
|
|
|
FILE *cmd_in = NULL;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
strvec_pushl(&cmd->args, "rev-list", "--stdin", NULL);
|
|
|
|
cmd->git_cmd = 1;
|
|
|
|
cmd->no_stderr = 1;
|
|
|
|
cmd->in = -1;
|
|
|
|
cmd->out = -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the next rev-list --stdin encounters an unknown commit,
|
|
|
|
* it terminates, which will cause SIGPIPE in the write loop
|
|
|
|
* below.
|
|
|
|
*/
|
|
|
|
sigchain_push(SIGPIPE, SIG_IGN);
|
|
|
|
|
|
|
|
if (start_command(cmd))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
cmd_in = xfdopen(cmd->in, "w");
|
|
|
|
|
|
|
|
for (i = get_max_object_index(); 0 < i; ) {
|
|
|
|
o = get_indexed_object(--i);
|
|
|
|
if (!o)
|
|
|
|
continue;
|
|
|
|
if (reachable && o->type == OBJ_COMMIT)
|
|
|
|
o->flags &= ~TMP_MARK;
|
|
|
|
if (!is_our_ref(o, allow_uor))
|
|
|
|
continue;
|
|
|
|
if (fprintf(cmd_in, "^%s\n", oid_to_hex(&o->oid)) < 0)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
for (i = 0; i < src->nr; i++) {
|
|
|
|
o = src->objects[i].item;
|
|
|
|
if (is_our_ref(o, allow_uor)) {
|
|
|
|
if (reachable)
|
|
|
|
add_object_array(o, NULL, reachable);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (reachable && o->type == OBJ_COMMIT)
|
|
|
|
o->flags |= TMP_MARK;
|
|
|
|
if (fprintf(cmd_in, "%s\n", oid_to_hex(&o->oid)) < 0)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (ferror(cmd_in) || fflush(cmd_in))
|
|
|
|
goto error;
|
|
|
|
fclose(cmd_in);
|
|
|
|
cmd->in = -1;
|
|
|
|
sigchain_pop(SIGPIPE);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
sigchain_pop(SIGPIPE);
|
|
|
|
|
|
|
|
if (cmd_in)
|
|
|
|
fclose(cmd_in);
|
|
|
|
if (cmd->out >= 0)
|
|
|
|
close(cmd->out);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_reachable_list(struct upload_pack_data *data,
|
|
|
|
struct object_array *reachable)
|
|
|
|
{
|
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
int i;
|
|
|
|
struct object *o;
|
|
|
|
char namebuf[GIT_MAX_HEXSZ + 2]; /* ^ + hash + LF */
|
|
|
|
const unsigned hexsz = the_hash_algo->hexsz;
|
|
|
|
|
|
|
|
if (do_reachable_revlist(&cmd, &data->shallows, reachable,
|
|
|
|
data->allow_uor) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
while ((i = read_in_full(cmd.out, namebuf, hexsz + 1)) == hexsz + 1) {
|
|
|
|
struct object_id oid;
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
if (parse_oid_hex(namebuf, &oid, &p) || *p != '\n')
|
|
|
|
break;
|
|
|
|
|
|
|
|
o = lookup_object(the_repository, &oid);
|
|
|
|
if (o && o->type == OBJ_COMMIT) {
|
|
|
|
o->flags &= ~TMP_MARK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = get_max_object_index(); 0 < i; i--) {
|
|
|
|
o = get_indexed_object(i - 1);
|
|
|
|
if (o && o->type == OBJ_COMMIT &&
|
|
|
|
(o->flags & TMP_MARK)) {
|
|
|
|
add_object_array(o, NULL, reachable);
|
|
|
|
o->flags &= ~TMP_MARK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(cmd.out);
|
|
|
|
|
|
|
|
if (finish_command(&cmd))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int has_unreachable(struct object_array *src, enum allow_uor allow_uor)
|
|
|
|
{
|
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
char buf[1];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (do_reachable_revlist(&cmd, src, NULL, allow_uor) < 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The commits out of the rev-list are not ancestors of
|
|
|
|
* our ref.
|
|
|
|
*/
|
|
|
|
i = read_in_full(cmd.out, buf, 1);
|
|
|
|
if (i)
|
|
|
|
goto error;
|
|
|
|
close(cmd.out);
|
|
|
|
cmd.out = -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rev-list may have died by encountering a bad commit
|
|
|
|
* in the history, in which case we do want to bail out
|
|
|
|
* even when it showed no commit.
|
|
|
|
*/
|
|
|
|
if (finish_command(&cmd))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/* All the non-tip ones are ancestors of what we advertised */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
if (cmd.out >= 0)
|
|
|
|
close(cmd.out);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check_non_tip(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the normal in-process case without
|
|
|
|
* uploadpack.allowReachableSHA1InWant,
|
|
|
|
* non-tip requests can never happen.
|
|
|
|
*/
|
|
|
|
if (!data->stateless_rpc && !(data->allow_uor & ALLOW_REACHABLE_SHA1))
|
|
|
|
goto error;
|
|
|
|
if (!has_unreachable(&data->want_obj, data->allow_uor))
|
|
|
|
/* All the non-tip ones are ancestors of what we advertised */
|
|
|
|
return;
|
|
|
|
|
|
|
|
error:
|
|
|
|
/* Pick one of them (we know there at least is one) */
|
|
|
|
for (i = 0; i < data->want_obj.nr; i++) {
|
|
|
|
struct object *o = data->want_obj.objects[i].item;
|
|
|
|
if (!is_our_ref(o, data->allow_uor)) {
|
|
|
|
packet_writer_error(&data->writer,
|
upload-pack: send ERR packet for non-tip objects
Commit bdb31eada7 (upload-pack: report "not our ref" to client,
2017-02-23) catches the case where a client asks for an object we don't
have, and issues a message that the client can show to the user (in
addition to dying and writing to stderr).
There's a similar case (with the same message) when the client asks for
an object which we _do_ have, but which isn't a ref tip (or isn't
reachable, when uploadpack.allowReachableSHA1InWant is true). Let's give
that one the same treatment, for the same reason (namely that it's more
informative to the client than just hanging up, since they won't see our
stderr over some protocols).
There are two tests here. We cover it most directly in t5530 by invoking
upload-pack, which matches the existing "not our ref" test.
But a more end-to-end check is that "git fetch" actually shows the
message to the client. We're already checking in t5516 that this case
fails, so we can just check stderr there, too. Note that even after we
started ignoring SIGPIPE in 8bf4becf0c, this could in theory still be
racy as described in that commit (because we die() on write failures
before pumping the connection for any ERR packets).
In practice this should be OK for this case. The server will not
actually check reachability until it has received our whole group of
"want" lines. And since we have no objects in the repository, we won't
send any "have" lines, meaning we're always waiting to read the server
response.
Note also that this case cannot happen in the v2 protocol, since it
allows any available object to be requested. However, we don't have to
take any steps to protect against the upcoming GIT_TEST_PROTOCOL_VERSION
in our tests:
- the tests in t5516 would already need to be skipped under v2, and
that is covered by ab0c5f5096 (tests: always test fetch of
unreachable with v0, 2019-02-25)
- the tests in t5530 invoke upload-pack directly, which will continue
to default to v0. Eventually we may have a test setting which uses
v2 even for bare upload-pack calls, but we can't override it here
until we know what the setting looks like.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
"upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&o->oid));
|
|
|
|
die("git upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&o->oid));
|
upload-pack: send ERR packet for non-tip objects
Commit bdb31eada7 (upload-pack: report "not our ref" to client,
2017-02-23) catches the case where a client asks for an object we don't
have, and issues a message that the client can show to the user (in
addition to dying and writing to stderr).
There's a similar case (with the same message) when the client asks for
an object which we _do_ have, but which isn't a ref tip (or isn't
reachable, when uploadpack.allowReachableSHA1InWant is true). Let's give
that one the same treatment, for the same reason (namely that it's more
informative to the client than just hanging up, since they won't see our
stderr over some protocols).
There are two tests here. We cover it most directly in t5530 by invoking
upload-pack, which matches the existing "not our ref" test.
But a more end-to-end check is that "git fetch" actually shows the
message to the client. We're already checking in t5516 that this case
fails, so we can just check stderr there, too. Note that even after we
started ignoring SIGPIPE in 8bf4becf0c, this could in theory still be
racy as described in that commit (because we die() on write failures
before pumping the connection for any ERR packets).
In practice this should be OK for this case. The server will not
actually check reachability until it has received our whole group of
"want" lines. And since we have no objects in the repository, we won't
send any "have" lines, meaning we're always waiting to read the server
response.
Note also that this case cannot happen in the v2 protocol, since it
allows any available object to be requested. However, we don't have to
take any steps to protect against the upcoming GIT_TEST_PROTOCOL_VERSION
in our tests:
- the tests in t5516 would already need to be skipped under v2, and
that is covered by ab0c5f5096 (tests: always test fetch of
unreachable with v0, 2019-02-25)
- the tests in t5530 invoke upload-pack directly, which will continue
to default to v0. Eventually we may have a test setting which uses
v2 even for bare upload-pack calls, but we can't override it here
until we know what the setting looks like.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void send_shallow(struct upload_pack_data *data,
|
|
|
|
struct commit_list *result)
|
|
|
|
{
|
|
|
|
while (result) {
|
|
|
|
struct object *object = &result->item->object;
|
|
|
|
if (!(object->flags & (CLIENT_SHALLOW|NOT_SHALLOW))) {
|
|
|
|
packet_writer_write(&data->writer, "shallow %s",
|
|
|
|
oid_to_hex(&object->oid));
|
|
|
|
register_shallow(the_repository, &object->oid);
|
|
|
|
data->shallow_nr++;
|
|
|
|
}
|
|
|
|
result = result->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void send_unshallow(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < data->shallows.nr; i++) {
|
|
|
|
struct object *object = data->shallows.objects[i].item;
|
|
|
|
if (object->flags & NOT_SHALLOW) {
|
|
|
|
struct commit_list *parents;
|
|
|
|
packet_writer_write(&data->writer, "unshallow %s",
|
|
|
|
oid_to_hex(&object->oid));
|
|
|
|
object->flags &= ~CLIENT_SHALLOW;
|
|
|
|
/*
|
|
|
|
* We want to _register_ "object" as shallow, but we
|
|
|
|
* also need to traverse object's parents to deepen a
|
|
|
|
* shallow clone. Unregister it for now so we can
|
|
|
|
* parse and add the parents to the want list, then
|
|
|
|
* re-register it.
|
|
|
|
*/
|
|
|
|
unregister_shallow(&object->oid);
|
|
|
|
object->parsed = 0;
|
|
|
|
parse_commit_or_die((struct commit *)object);
|
|
|
|
parents = ((struct commit *)object)->parents;
|
|
|
|
while (parents) {
|
|
|
|
add_object_array(&parents->item->object,
|
|
|
|
NULL, &data->want_obj);
|
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
add_object_array(object, NULL, &data->extra_edge_obj);
|
|
|
|
}
|
|
|
|
/* make sure commit traversal conforms to client */
|
|
|
|
register_shallow(the_repository, &object->oid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_ref(const char *refname_full, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data);
|
|
|
|
static void deepen(struct upload_pack_data *data, int depth)
|
|
|
|
{
|
|
|
|
if (depth == INFINITE_DEPTH && !is_repository_shallow(the_repository)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < data->shallows.nr; i++) {
|
|
|
|
struct object *object = data->shallows.objects[i].item;
|
|
|
|
object->flags |= NOT_SHALLOW;
|
|
|
|
}
|
|
|
|
} else if (data->deepen_relative) {
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
struct object_array reachable_shallows = OBJECT_ARRAY_INIT;
|
|
|
|
struct commit_list *result;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Checking for reachable shallows requires that our refs be
|
|
|
|
* marked with OUR_REF.
|
|
|
|
*/
|
|
|
|
head_ref_namespaced(check_ref, data);
|
|
|
|
for_each_namespaced_ref(check_ref, data);
|
|
|
|
|
|
|
|
get_reachable_list(data, &reachable_shallows);
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
result = get_shallow_commits(&reachable_shallows,
|
|
|
|
depth + 1,
|
|
|
|
SHALLOW, NOT_SHALLOW);
|
|
|
|
send_shallow(data, result);
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
free_commit_list(result);
|
|
|
|
object_array_clear(&reachable_shallows);
|
|
|
|
} else {
|
|
|
|
struct commit_list *result;
|
|
|
|
|
|
|
|
result = get_shallow_commits(&data->want_obj, depth,
|
|
|
|
SHALLOW, NOT_SHALLOW);
|
|
|
|
send_shallow(data, result);
|
|
|
|
free_commit_list(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
send_unshallow(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void deepen_by_rev_list(struct upload_pack_data *data,
|
|
|
|
int ac,
|
|
|
|
const char **av)
|
|
|
|
{
|
|
|
|
struct commit_list *result;
|
|
|
|
|
upload-pack: disable commit graph more gently for shallow traversal
When the client has asked for certain shallow options like
"deepen-since", we do a custom rev-list walk that pretends to be
shallow. Before doing so, we have to disable the commit-graph, since it
is not compatible with the shallow view of the repository. That's
handled by 829a321569 (commit-graph: close_commit_graph before shallow
walk, 2018-08-20). That commit literally closes and frees our
repo->objects->commit_graph struct.
That creates an interesting problem for commits that have _already_ been
parsed using the commit graph. Their commit->object.parsed flag is set,
their commit->graph_pos is set, but their commit->maybe_tree may still
be NULL. When somebody later calls repo_get_commit_tree(), we see that
we haven't loaded the tree oid yet and try to get it from the commit
graph. But since it has been freed, we segfault!
So the root of the issue is a data dependency between the commit's
lazy-load of the tree oid and the fact that the commit graph can go
away mid-process. How can we resolve it?
There are a couple of general approaches:
1. The obvious answer is to avoid loading the tree from the graph when
we see that it's NULL. But then what do we return for the tree oid?
If we return NULL, our caller in do_traverse() will rightly
complain that we have no tree. We'd have to fallback to loading the
actual commit object and re-parsing it. That requires teaching
parse_commit_buffer() to understand re-parsing (i.e., not starting
from a clean slate and not leaking any allocated bits like parent
list pointers).
2. When we close the commit graph, walk through the set of in-memory
objects and clear any graph_pos pointers. But this means we also
have to "unparse" any such commits so that we know they still need
to open the commit object to fill in their trees. So it's no less
complicated than (1), and is more expensive (since we clear objects
we might not later need).
3. Stop freeing the commit-graph struct. Continue to let it be used
for lazy-loads of tree oids, but let upload-pack specify that it
shouldn't be used for further commit parsing.
4. Push the whole shallow rev-list out to its own sub-process, with
the commit-graph disabled from the start, giving it a clean memory
space to work from.
I've chosen (3) here. Options (1) and (2) would work, but are
non-trivial to implement. Option (4) is more expensive, and I'm not sure
how complicated it is (shelling out for the actual rev-list part is
easy, but we do then parse the resulting commits internally, and I'm not
clear which parts need to be handling shallow-ness).
The new test in t5500 triggers this segfault, but see the comments there
for how horribly intimate it has to be with how both upload-pack and
commit graphs work.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
disable_commit_graph(the_repository);
|
|
|
|
result = get_shallow_commits_by_rev_list(ac, av, SHALLOW, NOT_SHALLOW);
|
|
|
|
send_shallow(data, result);
|
|
|
|
free_commit_list(result);
|
|
|
|
send_unshallow(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns 1 if a shallow list is sent or 0 otherwise */
|
|
|
|
static int send_shallow_list(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (data->depth > 0 && data->deepen_rev_list)
|
|
|
|
die("git upload-pack: deepen and deepen-since (or deepen-not) cannot be used together");
|
|
|
|
if (data->depth > 0) {
|
|
|
|
deepen(data, data->depth);
|
|
|
|
ret = 1;
|
|
|
|
} else if (data->deepen_rev_list) {
|
|
|
|
struct strvec av = STRVEC_INIT;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
strvec_push(&av, "rev-list");
|
|
|
|
if (data->deepen_since)
|
|
|
|
strvec_pushf(&av, "--max-age=%"PRItime, data->deepen_since);
|
|
|
|
if (data->deepen_not.nr) {
|
|
|
|
strvec_push(&av, "--not");
|
|
|
|
for (i = 0; i < data->deepen_not.nr; i++) {
|
|
|
|
struct string_list_item *s = data->deepen_not.items + i;
|
|
|
|
strvec_push(&av, s->string);
|
|
|
|
}
|
|
|
|
strvec_push(&av, "--not");
|
|
|
|
}
|
|
|
|
for (i = 0; i < data->want_obj.nr; i++) {
|
|
|
|
struct object *o = data->want_obj.objects[i].item;
|
|
|
|
strvec_push(&av, oid_to_hex(&o->oid));
|
|
|
|
}
|
|
|
|
deepen_by_rev_list(data, av.nr, av.v);
|
|
|
|
strvec_clear(&av);
|
|
|
|
ret = 1;
|
|
|
|
} else {
|
|
|
|
if (data->shallows.nr > 0) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < data->shallows.nr; i++)
|
|
|
|
register_shallow(the_repository,
|
|
|
|
&data->shallows.objects[i].item->oid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
data->shallow_nr += data->shallows.nr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_shallow(const char *line, struct object_array *shallows)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "shallow ", &arg)) {
|
|
|
|
struct object_id oid;
|
|
|
|
struct object *object;
|
|
|
|
if (get_oid_hex(arg, &oid))
|
|
|
|
die("invalid shallow line: %s", line);
|
|
|
|
object = parse_object(the_repository, &oid);
|
|
|
|
if (!object)
|
|
|
|
return 1;
|
|
|
|
if (object->type != OBJ_COMMIT)
|
|
|
|
die("invalid shallow object %s", oid_to_hex(&oid));
|
|
|
|
if (!(object->flags & CLIENT_SHALLOW)) {
|
|
|
|
object->flags |= CLIENT_SHALLOW;
|
|
|
|
add_object_array(object, NULL, shallows);
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_deepen(const char *line, int *depth)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "deepen ", &arg)) {
|
|
|
|
char *end = NULL;
|
|
|
|
*depth = (int)strtol(arg, &end, 0);
|
|
|
|
if (!end || *end || *depth <= 0)
|
|
|
|
die("Invalid deepen: %s", line);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_deepen_since(const char *line, timestamp_t *deepen_since, int *deepen_rev_list)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "deepen-since ", &arg)) {
|
|
|
|
char *end = NULL;
|
|
|
|
*deepen_since = parse_timestamp(arg, &end, 0);
|
|
|
|
if (!end || *end || !deepen_since ||
|
|
|
|
/* revisions.c's max_age -1 is special */
|
|
|
|
*deepen_since == -1)
|
|
|
|
die("Invalid deepen-since: %s", line);
|
|
|
|
*deepen_rev_list = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_deepen_not(const char *line, struct string_list *deepen_not, int *deepen_rev_list)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "deepen-not ", &arg)) {
|
|
|
|
char *ref = NULL;
|
|
|
|
struct object_id oid;
|
|
|
|
if (expand_ref(the_repository, arg, strlen(arg), &oid, &ref) != 1)
|
|
|
|
die("git upload-pack: ambiguous deepen-not: %s", line);
|
|
|
|
string_list_append(deepen_not, ref);
|
|
|
|
free(ref);
|
|
|
|
*deepen_rev_list = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
NORETURN __attribute__((format(printf,2,3)))
|
|
|
|
static void send_err_and_die(struct upload_pack_data *data,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
strbuf_vaddf(&buf, fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
packet_writer_error(&data->writer, "%s", buf.buf);
|
|
|
|
die("%s", buf.buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check_one_filter(struct upload_pack_data *data,
|
|
|
|
struct list_objects_filter_options *opts)
|
|
|
|
{
|
|
|
|
const char *key = list_object_filter_config_name(opts->choice);
|
|
|
|
struct string_list_item *item = string_list_lookup(&data->allowed_filters,
|
|
|
|
key);
|
|
|
|
int allowed;
|
|
|
|
|
|
|
|
if (item)
|
|
|
|
allowed = (intptr_t)item->util;
|
|
|
|
else
|
|
|
|
allowed = data->allow_filter_fallback;
|
|
|
|
|
|
|
|
if (!allowed)
|
|
|
|
send_err_and_die(data, "filter '%s' not supported", key);
|
upload-pack.c: introduce 'uploadpackfilter.tree.maxDepth'
In b79cf959b2 (upload-pack.c: allow banning certain object filter(s),
2020-02-26), we introduced functionality to disallow certain object
filters from being chosen from within 'git upload-pack'. Traditionally,
administrators use this functionality to disallow filters that are known
to perform slowly, for e.g., those that do not have bitmap-level
filtering.
In the past, the '--filter=tree:<n>' was one such filter that does not
have bitmap-level filtering support, and so was likely to be banned by
administrators.
However, in the previous couple of commits, we introduced bitmap-level
filtering for the case when 'n' is equal to '0', i.e., as if we had a
'--filter=tree:none' choice.
While it would be sufficient to simply write
$ git config uploadpackfilter.tree.allow true
(since it would allow all values of 'n'), we would like to be able to
allow this filter for certain values of 'n', i.e., those no greater than
some pre-specified maximum.
In order to do this, introduce a new configuration key, as follows:
$ git config uploadpackfilter.tree.maxDepth <m>
where '<m>' specifies the maximum allowed value of 'n' in the filter
'tree:n'. Administrators who wish to allow for only the value '0' can
write:
$ git config uploadpackfilter.tree.allow true
$ git config uploadpackfilter.tree.maxDepth 0
which allows '--filter=tree:0', but no other values.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
|
|
|
|
if (opts->choice == LOFC_TREE_DEPTH &&
|
|
|
|
opts->tree_exclude_depth > data->tree_filter_max_depth)
|
|
|
|
send_err_and_die(data,
|
|
|
|
"tree filter allows max depth %lu, but got %lu",
|
|
|
|
data->tree_filter_max_depth,
|
|
|
|
opts->tree_exclude_depth);
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
}
|
|
|
|
|
|
|
|
static void check_filter_recurse(struct upload_pack_data *data,
|
|
|
|
struct list_objects_filter_options *opts)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
check_one_filter(data, opts);
|
|
|
|
if (opts->choice != LOFC_COMBINE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < opts->sub_nr; i++)
|
|
|
|
check_filter_recurse(data, &opts->sub[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void die_if_using_banned_filter(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
check_filter_recurse(data, &data->filter_options);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void receive_needs(struct upload_pack_data *data,
|
|
|
|
struct packet_reader *reader)
|
|
|
|
{
|
|
|
|
int has_non_tip = 0;
|
|
|
|
|
|
|
|
data->shallow_nr = 0;
|
|
|
|
for (;;) {
|
|
|
|
struct object *o;
|
|
|
|
const char *features;
|
|
|
|
struct object_id oid_buf;
|
|
|
|
const char *arg;
|
|
|
|
size_t feature_len;
|
|
|
|
|
|
|
|
reset_timeout(data->timeout);
|
|
|
|
if (packet_reader_read(reader) != PACKET_READ_NORMAL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (process_shallow(reader->line, &data->shallows))
|
|
|
|
continue;
|
|
|
|
if (process_deepen(reader->line, &data->depth))
|
allow cloning a repository "shallowly"
By specifying a depth, you can now clone a repository such that
all fetched ancestor-chains' length is at most "depth". For example,
if the upstream repository has only 2 branches ("A" and "B"), which
are linear, and you specify depth 3, you will get A, A~1, A~2, A~3,
B, B~1, B~2, and B~3. The ends are automatically made shallow
commits.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>
18 years ago
|
|
|
continue;
|
|
|
|
if (process_deepen_since(reader->line, &data->deepen_since, &data->deepen_rev_list))
|
|
|
|
continue;
|
|
|
|
if (process_deepen_not(reader->line, &data->deepen_not, &data->deepen_rev_list))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (skip_prefix(reader->line, "filter ", &arg)) {
|
|
|
|
if (!data->filter_capability_requested)
|
|
|
|
die("git upload-pack: filtering capability not negotiated");
|
|
|
|
list_objects_filter_die_if_populated(&data->filter_options);
|
|
|
|
parse_list_objects_filter(&data->filter_options, arg);
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
die_if_using_banned_filter(data);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!skip_prefix(reader->line, "want ", &arg) ||
|
|
|
|
parse_oid_hex(arg, &oid_buf, &features))
|
|
|
|
die("git upload-pack: protocol error, "
|
|
|
|
"expected to get object ID, not '%s'", reader->line);
|
|
|
|
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
if (parse_feature_request(features, "deepen-relative"))
|
|
|
|
data->deepen_relative = 1;
|
|
|
|
if (parse_feature_request(features, "multi_ack_detailed"))
|
|
|
|
data->multi_ack = MULTI_ACK_DETAILED;
|
|
|
|
else if (parse_feature_request(features, "multi_ack"))
|
|
|
|
data->multi_ack = MULTI_ACK;
|
|
|
|
if (parse_feature_request(features, "no-done"))
|
|
|
|
data->no_done = 1;
|
|
|
|
if (parse_feature_request(features, "thin-pack"))
|
|
|
|
data->use_thin_pack = 1;
|
|
|
|
if (parse_feature_request(features, "ofs-delta"))
|
|
|
|
data->use_ofs_delta = 1;
|
|
|
|
if (parse_feature_request(features, "side-band-64k"))
|
|
|
|
data->use_sideband = LARGE_PACKET_MAX;
|
|
|
|
else if (parse_feature_request(features, "side-band"))
|
|
|
|
data->use_sideband = DEFAULT_PACKET_MAX;
|
|
|
|
if (parse_feature_request(features, "no-progress"))
|
|
|
|
data->no_progress = 1;
|
|
|
|
if (parse_feature_request(features, "include-tag"))
|
|
|
|
data->use_include_tag = 1;
|
|
|
|
if (data->allow_filter &&
|
|
|
|
parse_feature_request(features, "filter"))
|
|
|
|
data->filter_capability_requested = 1;
|
|
|
|
|
|
|
|
arg = parse_feature_value(features, "session-id", &feature_len, NULL);
|
|
|
|
if (arg) {
|
|
|
|
char *client_sid = xstrndup(arg, feature_len);
|
|
|
|
trace2_data_string("transfer", NULL, "client-sid", client_sid);
|
|
|
|
free(client_sid);
|
|
|
|
}
|
|
|
|
|
|
|
|
o = parse_object(the_repository, &oid_buf);
|
|
|
|
if (!o) {
|
|
|
|
packet_writer_error(&data->writer,
|
|
|
|
"upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&oid_buf));
|
|
|
|
die("git upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&oid_buf));
|
|
|
|
}
|
|
|
|
if (!(o->flags & WANTED)) {
|
|
|
|
o->flags |= WANTED;
|
|
|
|
if (!((data->allow_uor & ALLOW_ANY_SHA1) == ALLOW_ANY_SHA1
|
|
|
|
|| is_our_ref(o, data->allow_uor)))
|
|
|
|
has_non_tip = 1;
|
|
|
|
add_object_array(o, NULL, &data->want_obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have sent all our refs already, and the other end
|
|
|
|
* should have chosen out of them. When we are operating
|
|
|
|
* in the stateless RPC mode, however, their choice may
|
|
|
|
* have been based on the set of older refs advertised
|
|
|
|
* by another process that handled the initial request.
|
|
|
|
*/
|
|
|
|
if (has_non_tip)
|
|
|
|
check_non_tip(data);
|
|
|
|
|
|
|
|
if (!data->use_sideband && data->daemon_mode)
|
|
|
|
data->no_progress = 1;
|
|
|
|
|
|
|
|
if (data->depth == 0 && !data->deepen_rev_list && data->shallows.nr == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (send_shallow_list(data))
|
|
|
|
packet_flush(1);
|
|
|
|
}
|
|
|
|
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
/* return non-zero if the ref is hidden, otherwise 0 */
|
|
|
|
static int mark_our_ref(const char *refname, const char *refname_full,
|
|
|
|
const struct object_id *oid, const struct string_list *hidden_refs)
|
|
|
|
{
|
|
|
|
struct object *o = lookup_unknown_object(the_repository, oid);
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
|
|
|
|
if (ref_is_hidden(refname, refname_full, hidden_refs)) {
|
|
|
|
o->flags |= HIDDEN_REF;
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
o->flags |= OUR_REF;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_ref(const char *refname_full, const struct object_id *oid,
|
|
|
|
int flag UNUSED, void *cb_data)
|
upload-pack: fix transfer.hiderefs over smart-http
When upload-pack advertises the refs (either for a normal,
non-stateless request, or for the initial contact in a
stateless one), we call for_each_ref with the send_ref
function as its callback. send_ref, in turn, calls
mark_our_ref, which checks whether the ref is hidden, and
sets OUR_REF or HIDDEN_REF on the object as appropriate. If
it is hidden, mark_our_ref also returns "1" to signal
send_ref that the ref should not be advertised.
If we are not advertising refs, (i.e., the follow-up
invocation by an http client to send its "want" lines), we
use mark_our_ref directly as a callback to for_each_ref. Its
marking does the right thing, but when it then returns "1"
to for_each_ref, the latter interprets this as an error and
stops iterating. As a result, we skip marking all of the
refs that come lexicographically after it. Any "want" lines
from the client asking for those objects will fail, as they
were not properly marked with OUR_REF.
To solve this, we introduce a wrapper callback around
mark_our_ref which always returns 0 (even if the ref is
hidden, we want to keep iterating). We also tweak the
signature of mark_our_ref to exclude unnecessary parameters
that were present only to conform to the callback interface.
This should make it less likely for somebody to accidentally
use it as a callback in the future.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
{
|
|
|
|
const char *refname = strip_namespace(refname_full);
|
|
|
|
struct upload_pack_data *data = cb_data;
|
|
|
|
|
|
|
|
mark_our_ref(refname, refname_full, oid, &data->hidden_refs);
|
upload-pack: fix transfer.hiderefs over smart-http
When upload-pack advertises the refs (either for a normal,
non-stateless request, or for the initial contact in a
stateless one), we call for_each_ref with the send_ref
function as its callback. send_ref, in turn, calls
mark_our_ref, which checks whether the ref is hidden, and
sets OUR_REF or HIDDEN_REF on the object as appropriate. If
it is hidden, mark_our_ref also returns "1" to signal
send_ref that the ref should not be advertised.
If we are not advertising refs, (i.e., the follow-up
invocation by an http client to send its "want" lines), we
use mark_our_ref directly as a callback to for_each_ref. Its
marking does the right thing, but when it then returns "1"
to for_each_ref, the latter interprets this as an error and
stops iterating. As a result, we skip marking all of the
refs that come lexicographically after it. Any "want" lines
from the client asking for those objects will fail, as they
were not properly marked with OUR_REF.
To solve this, we introduce a wrapper callback around
mark_our_ref which always returns 0 (even if the ref is
hidden, we want to keep iterating). We also tweak the
signature of mark_our_ref to exclude unnecessary parameters
that were present only to conform to the callback interface.
This should make it less likely for somebody to accidentally
use it as a callback in the future.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void format_symref_info(struct strbuf *buf, struct string_list *symref)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
if (!symref->nr)
|
|
|
|
return;
|
|
|
|
for_each_string_list_item(item, symref)
|
|
|
|
strbuf_addf(buf, " symref=%s:%s", item->string, (char *)item->util);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void format_session_id(struct strbuf *buf, struct upload_pack_data *d) {
|
|
|
|
if (d->advertise_sid)
|
|
|
|
strbuf_addf(buf, " session-id=%s", trace2_session_id());
|
|
|
|
}
|
|
|
|
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
static void write_v0_ref(struct upload_pack_data *data,
|
|
|
|
const char *refname, const char *refname_nons,
|
|
|
|
const struct object_id *oid)
|
|
|
|
{
|
|
|
|
static const char *capabilities = "multi_ack thin-pack side-band"
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
" side-band-64k ofs-delta shallow deepen-since deepen-not"
|
|
|
|
" deepen-relative no-progress include-tag multi_ack_detailed";
|
|
|
|
struct object_id peeled;
|
|
|
|
|
|
|
|
if (mark_our_ref(refname_nons, refname, oid, &data->hidden_refs))
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
return;
|
|
|
|
|
|
|
|
if (capabilities) {
|
|
|
|
struct strbuf symref_info = STRBUF_INIT;
|
|
|
|
struct strbuf session_id = STRBUF_INIT;
|
|
|
|
|
|
|
|
format_symref_info(&symref_info, &data->symref);
|
|
|
|
format_session_id(&session_id, data);
|
|
|
|
packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
|
|
|
|
oid_to_hex(oid), refname_nons,
|
|
|
|
0, capabilities,
|
|
|
|
(data->allow_uor & ALLOW_TIP_SHA1) ?
|
|
|
|
" allow-tip-sha1-in-want" : "",
|
|
|
|
(data->allow_uor & ALLOW_REACHABLE_SHA1) ?
|
|
|
|
" allow-reachable-sha1-in-want" : "",
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
data->no_done ? " no-done" : "",
|
|
|
|
symref_info.buf,
|
|
|
|
data->allow_filter ? " filter" : "",
|
|
|
|
session_id.buf,
|
|
|
|
the_hash_algo->name,
|
|
|
|
git_user_agent_sanitized());
|
|
|
|
strbuf_release(&symref_info);
|
|
|
|
strbuf_release(&session_id);
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
data->sent_capabilities = 1;
|
|
|
|
} else {
|
|
|
|
packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons);
|
|
|
|
}
|
|
|
|
capabilities = NULL;
|
refs: switch peel_ref() to peel_iterated_oid()
The peel_ref() interface is confusing and error-prone:
- it's typically used by ref iteration callbacks that have both a
refname and oid. But since they pass only the refname, we may load
the ref value from the filesystem again. This is inefficient, but
also means we are open to a race if somebody simultaneously updates
the ref. E.g., this:
int some_ref_cb(const char *refname, const struct object_id *oid, ...)
{
if (!peel_ref(refname, &peeled))
printf("%s peels to %s",
oid_to_hex(oid), oid_to_hex(&peeled);
}
could print nonsense. It is correct to say "refname peels to..."
(you may see the "before" value or the "after" value, either of
which is consistent), but mentioning both oids may be mixing
before/after values.
Worse, whether this is possible depends on whether the optimization
to read from the current iterator value kicks in. So it is actually
not possible with:
for_each_ref(some_ref_cb);
but it _is_ possible with:
head_ref(some_ref_cb);
which does not use the iterator mechanism (though in practice, HEAD
should never peel to anything, so this may not be triggerable).
- it must take a fully-qualified refname for the read_ref_full() code
path to work. Yet we routinely pass it partial refnames from
callbacks to for_each_tag_ref(), etc. This happens to work when
iterating because there we do not call read_ref_full() at all, and
only use the passed refname to check if it is the same as the
iterator. But the requirements for the function parameters are quite
unclear.
Instead of taking a refname, let's instead take an oid. That fixes both
problems. It's a little funny for a "ref" function not to involve refs
at all. The key thing is that it's optimizing under the hood based on
having access to the ref iterator. So let's change the name to make it
clear why you'd want this function versus just peel_object().
There are two other directions I considered but rejected:
- we could pass the peel information into the each_ref_fn callback.
However, we don't know if the caller actually wants it or not. For
packed-refs, providing it is essentially free. But for loose refs,
we actually have to peel the object, which would be wasteful in most
cases. We could likewise pass in a flag to the callback indicating
whether the peeled information is known, but that complicates those
callbacks, as they then have to decide whether to manually peel
themselves. Plus it requires changing the interface of every
callback, whether they care about peeling or not, and there are many
of them.
- we could make a function to return the peeled value of the current
iterated ref (computing it if necessary), and BUG() otherwise. I.e.:
int peel_current_iterated_ref(struct object_id *out);
Each of the current callers is an each_ref_fn callback, so they'd
mostly be happy. But:
- we use those callbacks with functions like head_ref(), which do
not use the iteration code. So we'd need to handle the fallback
case there, anyway.
- it's possible that a caller would want to call into generic code
that sometimes is used during iteration and sometimes not. This
encapsulates the logic to do the fast thing when possible, and
fallback when necessary.
The implementation is mostly obvious, but I want to call out a few
things in the patch:
- the test-tool coverage for peel_ref() is now meaningless, as it all
collapses to a single peel_object() call (arguably they were pretty
uninteresting before; the tricky part of that function is the
fast-path we see during iteration, but these calls didn't trigger
that). I've just dropped it entirely, though note that some other
tests relied on the tags we created; I've moved that creation to the
tests where it matters.
- we no longer need to take a ref_store parameter, since we'd never
look up a ref now. We do still rely on a global "current iterator"
variable which _could_ be kept per-ref-store. But in practice this
is only useful if there are multiple recursive iterations, at which
point the more appropriate solution is probably a stack of
iterators. No caller used the actual ref-store parameter anyway
(they all call the wrapper that passes the_repository).
- the original only kicked in the optimization when the "refname"
pointer matched (i.e., not string comparison). We do likewise with
the "oid" parameter here, but fall back to doing an actual oideq()
call. This in theory lets us kick in the optimization more often,
though in practice no current caller cares. It should never be
wrong, though (peeling is a property of an object, so two refs
pointing to the same object would peel identically).
- the original took care not to touch the peeled out-parameter unless
we found something to put in it. But no caller cares about this, and
anyway, it is enforced by peel_object() itself (and even in the
optimized iterator case, that's where we eventually end up). We can
shorten the code and avoid an extra copy by just passing the
out-parameter through the stack.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (!peel_iterated_oid(oid, &peeled))
|
|
|
|
packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int send_ref(const char *refname, const struct object_id *oid,
|
|
|
|
int flag UNUSED, void *cb_data)
|
|
|
|
{
|
|
|
|
write_v0_ref(cb_data, refname, strip_namespace(refname), oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int find_symref(const char *refname,
|
|
|
|
const struct object_id *oid UNUSED,
|
|
|
|
int flag, void *cb_data)
|
|
|
|
{
|
|
|
|
const char *symref_target;
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
if ((flag & REF_ISSYMREF) == 0)
|
|
|
|
return 0;
|
|
|
|
symref_target = resolve_ref_unsafe(refname, 0, NULL, &flag);
|
|
|
|
if (!symref_target || (flag & REF_ISSYMREF) == 0)
|
|
|
|
die("'%s' is a symref but it is not?", refname);
|
upload-pack: strip namespace from symref data
Since 7171d8c15f (upload-pack: send symbolic ref information as
capability, 2013-09-17), we've sent cloning and fetching clients special
information about which branch HEAD is pointing to, so that they don't
have to guess based on matching up commit ids.
However, this feature has never worked properly with the GIT_NAMESPACE
feature. Because upload-pack uses head_ref_namespaced(find_symref), we
do find and report on refs/namespaces/foo/HEAD instead of the actual
HEAD of the repo. This makes sense, since the branch pointed to by the
top-level HEAD may not be advertised at all. But we do two things wrong:
1. We report the full name refs/namespaces/foo/HEAD, instead of just
HEAD. Meaning no client is going to bother doing anything with that
symref, since we're not otherwise advertising it.
2. We report the symref destination using its full name (e.g.,
refs/namespaces/foo/refs/heads/master). That's similarly useless to
the client, who only saw "refs/heads/master" in the advertisement.
We should be stripping the namespace prefix off of both places (which
this patch fixes).
Likely nobody noticed because we tend to do the right thing anyway. Bug
(1) means that we said nothing about HEAD (just refs/namespace/foo/HEAD).
And so the client half of the code, from a45b5f0552 (connect: annotate
refs with their symref information in get_remote_head(), 2013-09-17),
does not annotate HEAD, and we use the fallback in guess_remote_head(),
matching refs by object id. Which is usually right. It only falls down
in ambiguous cases, like the one laid out in the included test.
This also means that we don't have to worry about breaking anybody who
was putting pre-stripped names into their namespace symrefs when we fix
bug (2). Because of bug (1), nobody would have been using the symref we
advertised in the first place (not to mention that those symrefs would
have appeared broken for any non-namespaced access).
Note that we have separate fixes here for the v0 and v2 protocols. The
symref advertisement moved in v2 to be a part of the ls-refs command.
This actually gets part (1) right, since the symref annotation
piggy-backs on the existing ref advertisement, which is properly
stripped. But it still needs a fix for part (2). The included tests
cover both protocols.
Reported-by: Bryan Turner <bturner@atlassian.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
item = string_list_append(cb_data, strip_namespace(refname));
|
|
|
|
item->util = xstrdup(strip_namespace(symref_target));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
static int parse_object_filter_config(const char *var, const char *value,
|
|
|
|
struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
const char *sub, *key;
|
|
|
|
size_t sub_len;
|
|
|
|
|
|
|
|
if (parse_config_key(var, "uploadpackfilter", &sub, &sub_len, &key))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!sub) {
|
|
|
|
if (!strcmp(key, "allow"))
|
|
|
|
data->allow_filter_fallback = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_add(&buf, sub, sub_len);
|
|
|
|
|
|
|
|
if (!strcmp(key, "allow"))
|
|
|
|
string_list_insert(&data->allowed_filters, buf.buf)->util =
|
|
|
|
(void *)(intptr_t)git_config_bool(var, value);
|
upload-pack.c: introduce 'uploadpackfilter.tree.maxDepth'
In b79cf959b2 (upload-pack.c: allow banning certain object filter(s),
2020-02-26), we introduced functionality to disallow certain object
filters from being chosen from within 'git upload-pack'. Traditionally,
administrators use this functionality to disallow filters that are known
to perform slowly, for e.g., those that do not have bitmap-level
filtering.
In the past, the '--filter=tree:<n>' was one such filter that does not
have bitmap-level filtering support, and so was likely to be banned by
administrators.
However, in the previous couple of commits, we introduced bitmap-level
filtering for the case when 'n' is equal to '0', i.e., as if we had a
'--filter=tree:none' choice.
While it would be sufficient to simply write
$ git config uploadpackfilter.tree.allow true
(since it would allow all values of 'n'), we would like to be able to
allow this filter for certain values of 'n', i.e., those no greater than
some pre-specified maximum.
In order to do this, introduce a new configuration key, as follows:
$ git config uploadpackfilter.tree.maxDepth <m>
where '<m>' specifies the maximum allowed value of 'n' in the filter
'tree:n'. Administrators who wish to allow for only the value '0' can
write:
$ git config uploadpackfilter.tree.allow true
$ git config uploadpackfilter.tree.maxDepth 0
which allows '--filter=tree:0', but no other values.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
else if (!strcmp(buf.buf, "tree") && !strcmp(key, "maxdepth")) {
|
|
|
|
if (!value) {
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return config_error_nonbool(var);
|
|
|
|
}
|
|
|
|
string_list_insert(&data->allowed_filters, buf.buf)->util =
|
|
|
|
(void *)(intptr_t)1;
|
|
|
|
data->tree_filter_max_depth = git_config_ulong(var, value);
|
|
|
|
}
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int upload_pack_config(const char *var, const char *value, void *cb_data)
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
{
|
|
|
|
struct upload_pack_data *data = cb_data;
|
|
|
|
|
|
|
|
if (!strcmp("uploadpack.allowtipsha1inwant", var)) {
|
|
|
|
if (git_config_bool(var, value))
|
|
|
|
data->allow_uor |= ALLOW_TIP_SHA1;
|
|
|
|
else
|
|
|
|
data->allow_uor &= ~ALLOW_TIP_SHA1;
|
|
|
|
} else if (!strcmp("uploadpack.allowreachablesha1inwant", var)) {
|
|
|
|
if (git_config_bool(var, value))
|
|
|
|
data->allow_uor |= ALLOW_REACHABLE_SHA1;
|
|
|
|
else
|
|
|
|
data->allow_uor &= ~ALLOW_REACHABLE_SHA1;
|
|
|
|
} else if (!strcmp("uploadpack.allowanysha1inwant", var)) {
|
|
|
|
if (git_config_bool(var, value))
|
|
|
|
data->allow_uor |= ALLOW_ANY_SHA1;
|
|
|
|
else
|
|
|
|
data->allow_uor &= ~ALLOW_ANY_SHA1;
|
|
|
|
} else if (!strcmp("uploadpack.keepalive", var)) {
|
|
|
|
data->keepalive = git_config_int(var, value);
|
|
|
|
if (!data->keepalive)
|
|
|
|
data->keepalive = -1;
|
|
|
|
} else if (!strcmp("uploadpack.allowfilter", var)) {
|
|
|
|
data->allow_filter = git_config_bool(var, value);
|
|
|
|
} else if (!strcmp("uploadpack.allowrefinwant", var)) {
|
|
|
|
data->allow_ref_in_want = git_config_bool(var, value);
|
|
|
|
} else if (!strcmp("uploadpack.allowsidebandall", var)) {
|
|
|
|
data->allow_sideband_all = git_config_bool(var, value);
|
Honor core.precomposeUnicode in more places
On Mac's HFS where git sets core.precomposeUnicode to true automatically
by git init/clone, when a user creates a simple unicode refname (in NFC
format) such as españa:
$ git branch españa
different commands would display the branch name differently. For
example, git branch, git log --decorate, and git fast-export all used
65 73 70 61 c3 b1 61 (or "espa\xc3\xb1a")
(NFC form) while show-ref would use
65 73 70 61 6e cc 83 61 (or "espan\xcc\x83a")
(NFD form). A stress test for git filter-repo was tripped up by this
inconsistency, though digging in I found that the problems could
compound; for example, if the user ran
$ git pack-refs --all
and then tried to check out the branch, they would be met with:
$ git checkout españa
error: pathspec 'españa' did not match any file(s) known to git
$ git checkout españa --
fatal: invalid reference: españa
$ git branch
españa
* master
Note that the user could run the `git branch` command first and copy and
paste the `españa` portion of the output and still see the same two
errors. Also, if the user added --no-prune to the pack-refs command,
then they would see three branches: master, españa, and españa (those
last two are NFC vs. NFD forms, even if they render the same).
Further, if the user had the `españa` branch checked out before
running `git pack-refs --all`, the user would be greeted with (note
that I'm trimming trailing output with an ellipsis):
$ git rev-parse HEAD
fatal: ambiguous argument 'HEAD': unknown revision or path...
$ git status
On branch españa
No commits yet...
Or worse, if the user didn't check this stuff first, running `git
commit` will create a new commit with all changes of all of history
being squashed into it.
In addition to pack-refs, one could also get into this state with
upload-pack or anything that calls either pack-refs or upload-pack (e.g.
gc or clone).
Add code in a few places (pack-refs, show-ref, upload-pack) to check and
honor the setting of core.precomposeUnicode to avoid these bugs.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
} else if (!strcmp("core.precomposeunicode", var)) {
|
|
|
|
precomposed_unicode = git_config_bool(var, value);
|
|
|
|
} else if (!strcmp("transfer.advertisesid", var)) {
|
|
|
|
data->advertise_sid = git_config_bool(var, value);
|
|
|
|
}
|
upload-pack: fix broken if/else chain in config callback
The upload_pack_config() callback uses an if/else chain
like:
if (!strcmp(var, "a"))
...
else if (!strcmp(var, "b"))
...
etc
This works as long as the conditions are mutually exclusive,
but one of them is not. 20b20a22f8 (upload-pack: provide a
hook for running pack-objects, 2016-05-18) added:
else if (current_config_scope() != CONFIG_SCOPE_REPO) {
... check some more options ...
}
That was fine in that commit, because it came at the end of
the chain. But later, 10ac85c785 (upload-pack: add object
filtering for partial clone, 2017-12-08) did this:
else if (current_config_scope() != CONFIG_SCOPE_REPO) {
... check some more options ...
} else if (!strcmp("uploadpack.allowfilter", var))
...
We'd always check the scope condition first, meaning we'd
_only_ respect allowfilter when it's in the repo config. You
can see this with:
git -c uploadpack.allowfilter=true upload-pack . | head -1
which will not advertise the filter capability (but will
after this patch). We never noticed because:
- our tests always set it in the repo config
- in protocol v2, we use a different code path that
actually calls repo_config_get_bool() separately, so
that _does_ work. Real-world people experimenting with
this may be using v2.
The more recent uploadpack.allowrefinwant option is in the
same boat.
There are a few possible fixes:
1. Bump the scope conditional back to the bottom of the
chain. But that just means somebody else is likely to
make the same mistake later.
2. Make the conditional more like the others. I.e.:
else if (!current_config_scope() != CONFIG_SCOPE_REPO &&
!strcmp(var, "uploadpack.notallowedinrepo"))
This works, but the idea of the original structure was
that we may grow multiple sensitive options like this.
3. Pull it out of the chain entirely. The chain mostly
serves to avoid extra strcmp() calls after we've found
a match. But it's not worth caring about those. In the
worst case, when there isn't a match, we're already
hitting every strcmp (and this happens regularly for
stuff like "core.bare", etc).
This patch does (3).
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
|
|
|
|
if (parse_object_filter_config(var, value, data) < 0)
|
|
|
|
return -1;
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
|
|
|
|
return parse_hide_refs_config(var, value, "uploadpack", &data->hidden_refs);
|
upload/receive-pack: allow hiding ref hierarchies
A repository may have refs that are only used for its internal
bookkeeping purposes that should not be exposed to the others that
come over the network.
Teach upload-pack to omit some refs from its initial advertisement
by paying attention to the uploadpack.hiderefs multi-valued
configuration variable. Do the same to receive-pack via the
receive.hiderefs variable. As a convenient short-hand, allow using
transfer.hiderefs to set the value to both of these variables.
Any ref that is under the hierarchies listed on the value of these
variable is excluded from responses to requests made by "ls-remote",
"fetch", etc. (for upload-pack) and "push" (for receive-pack).
Because these hidden refs do not count as OUR_REF, an attempt to
fetch objects at the tip of them will be rejected, and because these
refs do not get advertised, "git push :" will not see local branches
that have the same name as them as "matching" ones to be sent.
An attempt to update/delete these hidden refs with an explicit
refspec, e.g. "git push origin :refs/hidden/22", is rejected. This
is not a new restriction. To the pusher, it would appear that there
is no such ref, so its push request will conclude with "Now that I
sent you all the data, it is time for you to update the refs. I saw
that the ref did not exist when I started pushing, and I want the
result to point at this commit". The receiving end will apply the
compare-and-swap rule to this request and rejects the push with
"Well, your update request conflicts with somebody else; I see there
is such a ref.", which is the right thing to do. Otherwise a push to
a hidden ref will always be "the last one wins", which is not a good
default.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
}
|
|
|
|
|
config: learn `git_protected_config()`
`uploadpack.packObjectsHook` is the only 'protected configuration only'
variable today, but we've noted that `safe.directory` and the upcoming
`safe.bareRepository` should also be 'protected configuration only'. So,
for consistency, we'd like to have a single implementation for protected
configuration.
The primary constraints are:
1. Reading from protected configuration should be fast. Nearly all "git"
commands inside a bare repository will read both `safe.directory` and
`safe.bareRepository`, so we cannot afford to be slow.
2. Protected configuration must be readable when the gitdir is not
known. `safe.directory` and `safe.bareRepository` both affect
repository discovery and the gitdir is not known at that point [1].
The chosen implementation in this commit is to read protected
configuration and cache the values in a global configset. This is
similar to the caching behavior we get with the_repository->config.
Introduce git_protected_config(), which reads protected configuration
and caches them in the global configset protected_config. Then, refactor
`uploadpack.packObjectsHook` to use git_protected_config().
The protected configuration functions are named similarly to their
non-protected counterparts, e.g. git_protected_config_check_init() vs
git_config_check_init().
In light of constraint 1, this implementation can still be improved.
git_protected_config() iterates through every variable in
protected_config, which is wasteful, but it makes the conversion simple
because it matches existing patterns. We will likely implement constant
time lookup functions for protected configuration in a future series
(such functions already exist for non-protected configuration, i.e.
repo_config_get_*()).
An alternative that avoids introducing another configset is to continue
to read all config using git_config(), but only accept values that have
the correct config scope [2]. This technically fulfills constraint 2,
because git_config() simply ignores the local and worktree config when
the gitdir is not known. However, this would read incomplete config into
the_repository->config, which would need to be reset when the gitdir is
known and git_config() needs to read the local and worktree config.
Resetting the_repository->config might be reasonable while we only have
these 'protected configuration only' variables, but it's not clear
whether this extends well to future variables.
[1] In this case, we do have a candidate gitdir though, so with a little
refactoring, it might be possible to provide a gitdir.
[2] This is how `uploadpack.packObjectsHook` was implemented prior to
this commit.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
|
|
|
static int upload_pack_protected_config(const char *var, const char *value, void *cb_data)
|
|
|
|
{
|
|
|
|
struct upload_pack_data *data = cb_data;
|
|
|
|
|
|
|
|
if (!strcmp("uploadpack.packobjectshook", var))
|
|
|
|
return git_config_string(&data->pack_objects_hook, var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void get_upload_pack_config(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
git_config(upload_pack_config, data);
|
|
|
|
git_protected_config(upload_pack_protected_config, data);
|
|
|
|
}
|
|
|
|
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
void upload_pack(const int advertise_refs, const int stateless_rpc,
|
|
|
|
const int timeout)
|
|
|
|
{
|
|
|
|
struct packet_reader reader;
|
|
|
|
struct upload_pack_data data;
|
|
|
|
|
|
|
|
upload_pack_data_init(&data);
|
config: learn `git_protected_config()`
`uploadpack.packObjectsHook` is the only 'protected configuration only'
variable today, but we've noted that `safe.directory` and the upcoming
`safe.bareRepository` should also be 'protected configuration only'. So,
for consistency, we'd like to have a single implementation for protected
configuration.
The primary constraints are:
1. Reading from protected configuration should be fast. Nearly all "git"
commands inside a bare repository will read both `safe.directory` and
`safe.bareRepository`, so we cannot afford to be slow.
2. Protected configuration must be readable when the gitdir is not
known. `safe.directory` and `safe.bareRepository` both affect
repository discovery and the gitdir is not known at that point [1].
The chosen implementation in this commit is to read protected
configuration and cache the values in a global configset. This is
similar to the caching behavior we get with the_repository->config.
Introduce git_protected_config(), which reads protected configuration
and caches them in the global configset protected_config. Then, refactor
`uploadpack.packObjectsHook` to use git_protected_config().
The protected configuration functions are named similarly to their
non-protected counterparts, e.g. git_protected_config_check_init() vs
git_config_check_init().
In light of constraint 1, this implementation can still be improved.
git_protected_config() iterates through every variable in
protected_config, which is wasteful, but it makes the conversion simple
because it matches existing patterns. We will likely implement constant
time lookup functions for protected configuration in a future series
(such functions already exist for non-protected configuration, i.e.
repo_config_get_*()).
An alternative that avoids introducing another configset is to continue
to read all config using git_config(), but only accept values that have
the correct config scope [2]. This technically fulfills constraint 2,
because git_config() simply ignores the local and worktree config when
the gitdir is not known. However, this would read incomplete config into
the_repository->config, which would need to be reset when the gitdir is
known and git_config() needs to read the local and worktree config.
Resetting the_repository->config might be reasonable while we only have
these 'protected configuration only' variables, but it's not clear
whether this extends well to future variables.
[1] In this case, we do have a candidate gitdir though, so with a little
refactoring, it might be possible to provide a gitdir.
[2] This is how `uploadpack.packObjectsHook` was implemented prior to
this commit.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
|
|
|
get_upload_pack_config(&data);
|
|
|
|
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
data.stateless_rpc = stateless_rpc;
|
|
|
|
data.timeout = timeout;
|
|
|
|
if (data.timeout)
|
|
|
|
data.daemon_mode = 1;
|
|
|
|
|
|
|
|
head_ref_namespaced(find_symref, &data.symref);
|
|
|
|
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (advertise_refs || !data.stateless_rpc) {
|
|
|
|
reset_timeout(data.timeout);
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (advertise_refs)
|
|
|
|
data.no_done = 1;
|
|
|
|
head_ref_namespaced(send_ref, &data);
|
|
|
|
for_each_namespaced_ref(send_ref, &data);
|
upload-pack: advertise capabilities when cloning empty repos
When cloning an empty repository, protocol versions 0 and 1 currently
offer nothing but the header and flush packets for the /info/refs
endpoint. This means that no capabilities are provided, so the client
side doesn't know what capabilities are present.
However, this does pose a problem when working with SHA-256
repositories, since we use the capabilities to know the remote side's
object format (hash algorithm). As of 8b214c2e9d ("clone: propagate
object-format when cloning from void", 2023-04-05), this has been fixed
for protocol v2, since there we always read the hash algorithm from the
remote.
Fortunately, the push version of the protocol already indicates a clue
for how to solve this. When the /info/refs endpoint is accessed for a
push and the remote is empty, we include a dummy "capabilities^{}" ref
pointing to the all-zeros object ID. The protocol documentation already
indicates this should _always_ be sent, even for fetches and clones, so
let's just do that, which means we'll properly announce the hash
algorithm as part of the capabilities. This just works with the
existing code because we share the same ref code for fetches and clones,
and libgit2, JGit, and dulwich do as well.
There is one minor issue to fix, though. If we called send_ref with
namespaces, we would return NULL with the capabilities entry, which
would cause a crash. Instead, let's refactor out a function to print
just the ref itself without stripping the namespace and use it for our
special capabilities entry.
Add several sets of tests for HTTP as well as for local clones. The
behavior can be slightly different for HTTP versus a local or SSH clone
because of the stateless-rpc functionality, so it's worth testing both.
Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 year ago
|
|
|
if (!data.sent_capabilities) {
|
|
|
|
const char *refname = "capabilities^{}";
|
|
|
|
write_v0_ref(&data, refname, refname, null_oid());
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* fflush stdout before calling advertise_shallow_grafts because send_ref
|
|
|
|
* uses stdio.
|
|
|
|
*/
|
|
|
|
fflush_or_die(stdout);
|
|
|
|
advertise_shallow_grafts(1);
|
|
|
|
packet_flush(1);
|
|
|
|
} else {
|
|
|
|
head_ref_namespaced(check_ref, &data);
|
|
|
|
for_each_namespaced_ref(check_ref, &data);
|
|
|
|
}
|
|
|
|
|
serve.[ch]: remove "serve_options", split up --advertise-refs code
The "advertise capabilities" mode of serve.c added in
ed10cb952d3 (serve: introduce git-serve, 2018-03-15) is only used by
the http-backend.c to call {upload,receive}-pack with the
--advertise-refs parameter. See 42526b478e3 (Add stateless RPC options
to upload-pack, receive-pack, 2009-10-30).
Let's just make cmd_upload_pack() take the two (v2) or three (v2)
parameters the the v2/v1 servicing functions need directly, and pass
those in via the function signature. The logic of whether daemon mode
is implied by the timeout belongs in the v1 function (only used
there).
Once we split up the "advertise v2 refs" from "serve v2 request" it
becomes clear that v2 never cared about those in combination. The only
time it mattered was for v1 to emit its ref advertisement, in that
case we wanted to emit the smart-http-only "no-done" capability.
Since we only do that in the --advertise-refs codepath let's just have
it set "do_done" itself in v1's upload_pack() just before send_ref(),
at that point --advertise-refs and --stateless-rpc in combination are
redundant (the only user is get_info_refs() in http-backend.c), so we
can just pass in --advertise-refs only.
Since we need to touch all the serve() and advertise_capabilities()
codepaths let's rename them to less clever and obvious names, it's
been suggested numerous times, the latest of which is [1]'s suggestion
for protocol_v2_serve_loop(). Let's go with that.
1. https://lore.kernel.org/git/CAFQ2z_NyGb8rju5CKzmo6KhZXD0Dp21u-BbyCb2aNxLEoSPRJw@mail.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (!advertise_refs) {
|
|
|
|
packet_reader_init(&reader, 0, NULL, 0,
|
|
|
|
PACKET_READ_CHOMP_NEWLINE |
|
|
|
|
PACKET_READ_DIE_ON_ERR_PACKET);
|
|
|
|
|
|
|
|
receive_needs(&data, &reader);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An EOF at this exact point in negotiation should be
|
|
|
|
* acceptable from stateless clients as they will consume the
|
|
|
|
* shallow list before doing subsequent rpc with haves/etc.
|
|
|
|
*/
|
|
|
|
if (data.stateless_rpc)
|
|
|
|
reader.options |= PACKET_READ_GENTLE_ON_EOF;
|
|
|
|
|
|
|
|
if (data.want_obj.nr &&
|
|
|
|
packet_reader_peek(&reader) != PACKET_READ_EOF) {
|
|
|
|
reader.options &= ~PACKET_READ_GENTLE_ON_EOF;
|
|
|
|
get_common_commits(&data, &reader);
|
|
|
|
create_pack_file(&data, NULL);
|
|
|
|
}
|
|
|
|
}
|
upload-pack: clear filter_options for each v2 fetch command
Because of the request/response model of protocol v2, the
upload_pack_v2() function is sometimes called twice in the same
process, while 'struct list_objects_filter_options filter_options'
was declared as static at the beginning of 'upload-pack.c'.
This made the check in list_objects_filter_die_if_populated(), which
is called by process_args(), fail the second time upload_pack_v2() is
called, as filter_options had already been populated the first time.
To fix that, filter_options is not static any more. It's now owned
directly by upload_pack(). It's now also part of 'struct
upload_pack_data', so that it's owned indirectly by upload_pack_v2().
In the long term, the goal is to also have upload_pack() use
'struct upload_pack_data', so adding filter_options to this struct
makes more sense than to have it owned directly by upload_pack_v2().
This fixes the first of the 2 bugs documented by d0badf8797
(partial-clone: demonstrate bugs in partial fetch, 2020-02-21).
Helped-by: Derrick Stolee <dstolee@microsoft.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
|
|
|
|
upload_pack_data_clear(&data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_want(struct packet_writer *writer, const char *line,
|
|
|
|
struct object_array *want_obj)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "want ", &arg)) {
|
|
|
|
struct object_id oid;
|
|
|
|
struct object *o;
|
|
|
|
|
|
|
|
if (get_oid_hex(arg, &oid))
|
|
|
|
die("git upload-pack: protocol error, "
|
|
|
|
"expected to get oid, not '%s'", line);
|
|
|
|
|
|
|
|
o = parse_object_with_flags(the_repository, &oid,
|
|
|
|
PARSE_OBJECT_SKIP_HASH_CHECK);
|
|
|
|
|
|
|
|
if (!o) {
|
|
|
|
packet_writer_error(writer,
|
|
|
|
"upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&oid));
|
|
|
|
die("git upload-pack: not our ref %s",
|
|
|
|
oid_to_hex(&oid));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(o->flags & WANTED)) {
|
|
|
|
o->flags |= WANTED;
|
|
|
|
add_object_array(o, NULL, want_obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_want_ref(struct packet_writer *writer, const char *line,
|
|
|
|
struct string_list *wanted_refs,
|
|
|
|
struct string_list *hidden_refs,
|
|
|
|
struct object_array *want_obj)
|
|
|
|
{
|
|
|
|
const char *refname_nons;
|
|
|
|
if (skip_prefix(line, "want-ref ", &refname_nons)) {
|
|
|
|
struct object_id oid;
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct object *o = NULL;
|
|
|
|
struct strbuf refname = STRBUF_INIT;
|
|
|
|
|
|
|
|
strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
|
|
|
|
if (ref_is_hidden(refname_nons, refname.buf, hidden_refs) ||
|
|
|
|
read_ref(refname.buf, &oid)) {
|
|
|
|
packet_writer_error(writer, "unknown ref %s", refname_nons);
|
|
|
|
die("unknown ref %s", refname_nons);
|
|
|
|
}
|
|
|
|
strbuf_release(&refname);
|
|
|
|
|
|
|
|
item = string_list_append(wanted_refs, refname_nons);
|
|
|
|
item->util = oiddup(&oid);
|
|
|
|
|
|
|
|
if (!starts_with(refname_nons, "refs/tags/")) {
|
|
|
|
struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
|
|
|
|
if (commit)
|
|
|
|
o = &commit->object;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!o)
|
|
|
|
o = parse_object_or_die(&oid, refname_nons);
|
|
|
|
|
|
|
|
if (!(o->flags & WANTED)) {
|
|
|
|
o->flags |= WANTED;
|
|
|
|
add_object_array(o, NULL, want_obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_have(const char *line, struct oid_array *haves)
|
|
|
|
{
|
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(line, "have ", &arg)) {
|
|
|
|
struct object_id oid;
|
|
|
|
|
|
|
|
if (get_oid_hex(arg, &oid))
|
|
|
|
die("git upload-pack: expected SHA1 object, got '%s'", arg);
|
|
|
|
oid_array_append(haves, &oid);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void process_args(struct packet_reader *request,
|
|
|
|
struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
while (packet_reader_read(request) == PACKET_READ_NORMAL) {
|
|
|
|
const char *arg = request->line;
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
/* process want */
|
|
|
|
if (parse_want(&data->writer, arg, &data->want_obj))
|
|
|
|
continue;
|
|
|
|
if (data->allow_ref_in_want &&
|
|
|
|
parse_want_ref(&data->writer, arg, &data->wanted_refs,
|
|
|
|
&data->hidden_refs, &data->want_obj))
|
|
|
|
continue;
|
|
|
|
/* process have line */
|
|
|
|
if (parse_have(arg, &data->haves))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* process args like thin-pack */
|
|
|
|
if (!strcmp(arg, "thin-pack")) {
|
|
|
|
data->use_thin_pack = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "ofs-delta")) {
|
|
|
|
data->use_ofs_delta = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "no-progress")) {
|
|
|
|
data->no_progress = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "include-tag")) {
|
|
|
|
data->use_include_tag = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "done")) {
|
|
|
|
data->done = 1;
|
|
|
|
continue;
|
|
|
|
}
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (!strcmp(arg, "wait-for-done")) {
|
|
|
|
data->wait_for_done = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Shallow related arguments */
|
|
|
|
if (process_shallow(arg, &data->shallows))
|
|
|
|
continue;
|
|
|
|
if (process_deepen(arg, &data->depth))
|
|
|
|
continue;
|
|
|
|
if (process_deepen_since(arg, &data->deepen_since,
|
|
|
|
&data->deepen_rev_list))
|
|
|
|
continue;
|
|
|
|
if (process_deepen_not(arg, &data->deepen_not,
|
|
|
|
&data->deepen_rev_list))
|
|
|
|
continue;
|
|
|
|
if (!strcmp(arg, "deepen-relative")) {
|
|
|
|
data->deepen_relative = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (data->allow_filter && skip_prefix(arg, "filter ", &p)) {
|
upload-pack: clear filter_options for each v2 fetch command
Because of the request/response model of protocol v2, the
upload_pack_v2() function is sometimes called twice in the same
process, while 'struct list_objects_filter_options filter_options'
was declared as static at the beginning of 'upload-pack.c'.
This made the check in list_objects_filter_die_if_populated(), which
is called by process_args(), fail the second time upload_pack_v2() is
called, as filter_options had already been populated the first time.
To fix that, filter_options is not static any more. It's now owned
directly by upload_pack(). It's now also part of 'struct
upload_pack_data', so that it's owned indirectly by upload_pack_v2().
In the long term, the goal is to also have upload_pack() use
'struct upload_pack_data', so adding filter_options to this struct
makes more sense than to have it owned directly by upload_pack_v2().
This fixes the first of the 2 bugs documented by d0badf8797
(partial-clone: demonstrate bugs in partial fetch, 2020-02-21).
Helped-by: Derrick Stolee <dstolee@microsoft.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
list_objects_filter_die_if_populated(&data->filter_options);
|
|
|
|
parse_list_objects_filter(&data->filter_options, p);
|
upload-pack.c: allow banning certain object filter(s)
Git clients may ask the server for a partial set of objects, where the
set of objects being requested is refined by one or more object filters.
Server administrators can configure 'git upload-pack' to allow or ban
these filters by setting the 'uploadpack.allowFilter' variable to
'true' or 'false', respectively.
However, administrators using bitmaps may wish to allow certain kinds of
object filters, but ban others. Specifically, they may wish to allow
object filters that can be optimized by the use of bitmaps, while
rejecting other object filters which aren't and represent a perceived
performance degradation (as well as an increased load factor on the
server).
Allow configuring 'git upload-pack' to support object filters on a
case-by-case basis by introducing two new configuration variables:
- 'uploadpackfilter.allow'
- 'uploadpackfilter.<kind>.allow'
where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on.
Setting the second configuration variable for any valid value of
'<kind>' explicitly allows or disallows restricting that kind of object
filter.
If a client requests the object filter <kind> and the respective
configuration value is not set, 'git upload-pack' will default to the
value of 'uploadpackfilter.allow', which itself defaults to 'true' to
maintain backwards compatibility. Note that this differs from
'uploadpack.allowfilter', which controls whether or not the 'filter'
capability is advertised.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
die_if_using_banned_filter(data);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
|
|
|
|
data->allow_sideband_all) &&
|
|
|
|
!strcmp(arg, "sideband-all")) {
|
|
|
|
data->writer.use_sideband = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skip_prefix(arg, "packfile-uris ", &p)) {
|
|
|
|
string_list_split(&data->uri_protocols, p, ',', -1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ignore unknown lines maybe? */
|
|
|
|
die("unexpected line: '%s'", arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (data->uri_protocols.nr && !data->writer.use_sideband)
|
|
|
|
string_list_clear(&data->uri_protocols, 0);
|
|
|
|
|
|
|
|
if (request->status != PACKET_READ_FLUSH)
|
|
|
|
die(_("expected flush after fetch arguments"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_haves(struct upload_pack_data *data, struct oid_array *common)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Process haves */
|
|
|
|
for (i = 0; i < data->haves.nr; i++) {
|
|
|
|
const struct object_id *oid = &data->haves.oid[i];
|
|
|
|
|
|
|
|
if (!repo_has_object_file_with_flags(the_repository, oid,
|
|
|
|
OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
oid_array_append(common, oid);
|
|
|
|
|
|
|
|
do_got_oid(data, oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
packet_writer_write(&data->writer, "acknowledgments\n");
|
|
|
|
|
|
|
|
/* Send Acks */
|
|
|
|
if (!acks->nr)
|
|
|
|
packet_writer_write(&data->writer, "NAK\n");
|
|
|
|
|
|
|
|
for (i = 0; i < acks->nr; i++) {
|
|
|
|
packet_writer_write(&data->writer, "ACK %s\n",
|
|
|
|
oid_to_hex(&acks->oid[i]));
|
|
|
|
}
|
|
|
|
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (!data->wait_for_done && ok_to_give_up(data)) {
|
|
|
|
/* Send Ready */
|
|
|
|
packet_writer_write(&data->writer, "ready\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_haves_and_send_acks(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
struct oid_array common = OID_ARRAY_INIT;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
process_haves(data, &common);
|
|
|
|
if (data->done) {
|
|
|
|
ret = 1;
|
|
|
|
} else if (send_acks(data, &common)) {
|
|
|
|
packet_writer_delim(&data->writer);
|
|
|
|
ret = 1;
|
|
|
|
} else {
|
|
|
|
/* Add Flush */
|
|
|
|
packet_writer_flush(&data->writer);
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
oid_array_clear(&data->haves);
|
|
|
|
oid_array_clear(&common);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void send_wanted_ref_info(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
const struct string_list_item *item;
|
|
|
|
|
|
|
|
if (!data->wanted_refs.nr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
packet_writer_write(&data->writer, "wanted-refs\n");
|
|
|
|
|
|
|
|
for_each_string_list_item(item, &data->wanted_refs) {
|
|
|
|
packet_writer_write(&data->writer, "%s %s\n",
|
|
|
|
oid_to_hex(item->util),
|
|
|
|
item->string);
|
|
|
|
}
|
|
|
|
|
|
|
|
packet_writer_delim(&data->writer);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void send_shallow_info(struct upload_pack_data *data)
|
|
|
|
{
|
|
|
|
/* No shallow info needs to be sent */
|
|
|
|
if (!data->depth && !data->deepen_rev_list && !data->shallows.nr &&
|
|
|
|
!is_repository_shallow(the_repository))
|
|
|
|
return;
|
|
|
|
|
|
|
|
packet_writer_write(&data->writer, "shallow-info\n");
|
|
|
|
|
|
|
|
if (!send_shallow_list(data) &&
|
|
|
|
is_repository_shallow(the_repository))
|
|
|
|
deepen(data, INFINITE_DEPTH);
|
|
|
|
|
|
|
|
packet_delim(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
enum fetch_state {
|
|
|
|
FETCH_PROCESS_ARGS = 0,
|
|
|
|
FETCH_SEND_ACKS,
|
|
|
|
FETCH_SEND_PACK,
|
|
|
|
FETCH_DONE,
|
|
|
|
};
|
|
|
|
|
|
|
|
int upload_pack_v2(struct repository *r UNUSED, struct packet_reader *request)
|
|
|
|
{
|
|
|
|
enum fetch_state state = FETCH_PROCESS_ARGS;
|
|
|
|
struct upload_pack_data data;
|
upload-pack: clear flags before each v2 request
Suppose a server has the following commit graph:
A B
\ /
O
We create a client by cloning A from the server with depth 1, and add
many commits to it (so that future fetches span multiple requests due to
lengthy negotiation). If it then fetches B using protocol v2, the fetch
spanning multiple requests, the resulting packfile does not contain O
even though the client did report that A is shallow.
This is because upload_pack_v2() can be called multiple times while
processing the same session. During the 2nd and all subsequent
invocations, some object flags remain from the previous invocations. In
particular, CLIENT_SHALLOW remains, preventing process_shallow() from
adding client-reported shallows to the "shallows" array, and hence
pack-objects not knowing about these client-reported shallows.
Therefore, teach upload_pack_v2() to clear object flags at the start of
each invocation. This has some other results:
- THEY_HAVE gates addition of objects to have_obj in process_haves().
Previously in upload_pack_v2(), have_obj needed to be static because
once an object is added to have_obj, it is never readded and thus we
needed to retain the contents of have_obj between invocations. Now
that flags are cleared, this is no longer necessary. This patch does
not change the behavior of ok_to_give_up() (THEY_HAVE is still set on
each "have") and got_oid() (used only in non-v2)); THEY_HAVE is not
used in any other function.
- WANTED gates addition of objects to want_obj in parse_want() and
parse_want_ref(). It is also used in receive_needs(), but that is
only used in non-v2. For the same reasons as THEY_HAVE, want_obj no
longer needs to be static in upload_pack_v2().
- CLIENT_SHALLOW is changed as discussed above.
Clearing of the other 5 flags does not affect functionality in v2. (Note
that in non-v2, upload_pack() is only called once per process, so each
invocation starts with blank flags anyway.)
- OUR_REF is only used in non-v2.
- COMMON_KNOWN is only used as a scratch flag in ok_to_give_up().
- SHALLOW is passed to invocations in deepen() and
deepen_by_rev_list(), but upload-pack doesn't use it.
- NOT_SHALLOW is used by send_shallow() and send_unshallow(), but
invocations of those functions are always preceded by code that sets
NOT_SHALLOW on the appropriate objects.
- HIDDEN_REF is only used in non-v2.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
|
|
|
|
clear_object_flags(ALL_FLAGS);
|
|
|
|
|
|
|
|
upload_pack_data_init(&data);
|
|
|
|
data.use_sideband = LARGE_PACKET_MAX;
|
config: learn `git_protected_config()`
`uploadpack.packObjectsHook` is the only 'protected configuration only'
variable today, but we've noted that `safe.directory` and the upcoming
`safe.bareRepository` should also be 'protected configuration only'. So,
for consistency, we'd like to have a single implementation for protected
configuration.
The primary constraints are:
1. Reading from protected configuration should be fast. Nearly all "git"
commands inside a bare repository will read both `safe.directory` and
`safe.bareRepository`, so we cannot afford to be slow.
2. Protected configuration must be readable when the gitdir is not
known. `safe.directory` and `safe.bareRepository` both affect
repository discovery and the gitdir is not known at that point [1].
The chosen implementation in this commit is to read protected
configuration and cache the values in a global configset. This is
similar to the caching behavior we get with the_repository->config.
Introduce git_protected_config(), which reads protected configuration
and caches them in the global configset protected_config. Then, refactor
`uploadpack.packObjectsHook` to use git_protected_config().
The protected configuration functions are named similarly to their
non-protected counterparts, e.g. git_protected_config_check_init() vs
git_config_check_init().
In light of constraint 1, this implementation can still be improved.
git_protected_config() iterates through every variable in
protected_config, which is wasteful, but it makes the conversion simple
because it matches existing patterns. We will likely implement constant
time lookup functions for protected configuration in a future series
(such functions already exist for non-protected configuration, i.e.
repo_config_get_*()).
An alternative that avoids introducing another configset is to continue
to read all config using git_config(), but only accept values that have
the correct config scope [2]. This technically fulfills constraint 2,
because git_config() simply ignores the local and worktree config when
the gitdir is not known. However, this would read incomplete config into
the_repository->config, which would need to be reset when the gitdir is
known and git_config() needs to read the local and worktree config.
Resetting the_repository->config might be reasonable while we only have
these 'protected configuration only' variables, but it's not clear
whether this extends well to future variables.
[1] In this case, we do have a candidate gitdir though, so with a little
refactoring, it might be possible to provide a gitdir.
[2] This is how `uploadpack.packObjectsHook` was implemented prior to
this commit.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
|
|
|
get_upload_pack_config(&data);
|
|
|
|
|
|
|
|
while (state != FETCH_DONE) {
|
|
|
|
switch (state) {
|
|
|
|
case FETCH_PROCESS_ARGS:
|
|
|
|
process_args(request, &data);
|
|
|
|
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (!data.want_obj.nr && !data.wait_for_done) {
|
|
|
|
/*
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
* Request didn't contain any 'want' lines (and
|
|
|
|
* the request does not contain
|
|
|
|
* "wait-for-done", in which it is reasonable
|
|
|
|
* to just send 'have's without 'want's); guess
|
|
|
|
* they didn't want anything.
|
|
|
|
*/
|
|
|
|
state = FETCH_DONE;
|
|
|
|
} else if (data.haves.nr) {
|
|
|
|
/*
|
|
|
|
* Request had 'have' lines, so lets ACK them.
|
|
|
|
*/
|
|
|
|
state = FETCH_SEND_ACKS;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Request had 'want's but no 'have's so we can
|
|
|
|
* immedietly go to construct and send a pack.
|
|
|
|
*/
|
|
|
|
state = FETCH_SEND_PACK;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FETCH_SEND_ACKS:
|
|
|
|
if (process_haves_and_send_acks(&data))
|
|
|
|
state = FETCH_SEND_PACK;
|
|
|
|
else
|
|
|
|
state = FETCH_DONE;
|
|
|
|
break;
|
|
|
|
case FETCH_SEND_PACK:
|
|
|
|
send_wanted_ref_info(&data);
|
|
|
|
send_shallow_info(&data);
|
|
|
|
|
|
|
|
if (data.uri_protocols.nr) {
|
|
|
|
create_pack_file(&data, &data.uri_protocols);
|
|
|
|
} else {
|
|
|
|
packet_writer_write(&data.writer, "packfile\n");
|
|
|
|
create_pack_file(&data, NULL);
|
|
|
|
}
|
|
|
|
state = FETCH_DONE;
|
|
|
|
break;
|
|
|
|
case FETCH_DONE:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
upload_pack_data_clear(&data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upload_pack_advertise(struct repository *r,
|
|
|
|
struct strbuf *value)
|
|
|
|
{
|
|
|
|
if (value) {
|
|
|
|
int allow_filter_value;
|
|
|
|
int allow_ref_in_want;
|
|
|
|
int allow_sideband_all_value;
|
|
|
|
char *str = NULL;
|
|
|
|
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
strbuf_addstr(value, "shallow wait-for-done");
|
|
|
|
|
|
|
|
if (!repo_config_get_bool(r,
|
|
|
|
"uploadpack.allowfilter",
|
|
|
|
&allow_filter_value) &&
|
|
|
|
allow_filter_value)
|
|
|
|
strbuf_addstr(value, " filter");
|
|
|
|
|
|
|
|
if (!repo_config_get_bool(r,
|
|
|
|
"uploadpack.allowrefinwant",
|
|
|
|
&allow_ref_in_want) &&
|
|
|
|
allow_ref_in_want)
|
|
|
|
strbuf_addstr(value, " ref-in-want");
|
|
|
|
|
|
|
|
if (git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
|
|
|
|
(!repo_config_get_bool(r,
|
|
|
|
"uploadpack.allowsidebandall",
|
|
|
|
&allow_sideband_all_value) &&
|
|
|
|
allow_sideband_all_value))
|
|
|
|
strbuf_addstr(value, " sideband-all");
|
|
|
|
|
|
|
|
if (!repo_config_get_string(r,
|
|
|
|
"uploadpack.blobpackfileuri",
|
|
|
|
&str) &&
|
|
|
|
str) {
|
|
|
|
strbuf_addstr(value, " packfile-uris");
|
|
|
|
free(str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|