|
|
|
/*
|
|
|
|
* Builtin "git clone"
|
|
|
|
*
|
|
|
|
* Copyright (c) 2007 Kristian Høgsberg <krh@redhat.com>,
|
|
|
|
* 2008 Daniel Barkalow <barkalow@iabervon.org>
|
|
|
|
* Based on git-commit.sh by Junio C Hamano and Linus Torvalds
|
|
|
|
*
|
|
|
|
* Clone a repository into a different directory that does not yet exist.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define USE_THE_INDEX_COMPATIBILITY_MACROS
|
Fix sparse warnings
Fix warnings from 'make check'.
- These files don't include 'builtin.h' causing sparse to complain that
cmd_* isn't declared:
builtin/clone.c:364, builtin/fetch-pack.c:797,
builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78,
builtin/merge-index.c:69, builtin/merge-recursive.c:22
builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426
builtin/notes.c:822, builtin/pack-redundant.c:596,
builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149,
builtin/remote.c:1512, builtin/remote-ext.c:240,
builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384,
builtin/unpack-file.c:25, builtin/var.c:75
- These files have symbols which should be marked static since they're
only file scope:
submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13,
submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79,
unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123,
url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48
- These files redeclare symbols to be different types:
builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571,
usage.c:49, usage.c:58, usage.c:63, usage.c:72
- These files use a literal integer 0 when they really should use a NULL
pointer:
daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362
While we're in the area, clean up some unused #includes in builtin files
(mostly exec_cmd.h).
Signed-off-by: Stephen Boyd <bebarino@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
#include "builtin.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "lockfile.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "fetch-pack.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "refspec.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "tree-walk.h"
|
|
|
|
#include "unpack-trees.h"
|
|
|
|
#include "transport.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "dir-iterator.h"
|
|
|
|
#include "iterator.h"
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
#include "sigchain.h"
|
|
|
|
#include "branch.h"
|
|
|
|
#include "remote.h"
|
|
|
|
#include "run-command.h"
|
|
|
|
#include "connected.h"
|
|
|
|
#include "packfile.h"
|
|
|
|
#include "list-objects-filter-options.h"
|
|
|
|
#include "hook.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Overall FIXMEs:
|
|
|
|
* - respect DB_ENVIRONMENT for .git/objects.
|
|
|
|
*
|
|
|
|
* Implementation notes:
|
|
|
|
* - dropping use-separate-remote and no-separate-remote compatibility
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static const char * const builtin_clone_usage[] = {
|
|
|
|
N_("git clone [<options>] [--] <repo> [<dir>]"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static int option_no_checkout, option_bare, option_mirror, option_single_branch = -1;
|
|
|
|
static int option_local = -1, option_no_hardlinks, option_shared;
|
|
|
|
static int option_no_tags;
|
|
|
|
static int option_shallow_submodules;
|
|
|
|
static int option_reject_shallow = -1; /* unspecified */
|
|
|
|
static int config_reject_shallow = -1; /* unspecified */
|
|
|
|
static int deepen;
|
|
|
|
static char *option_template, *option_depth, *option_since;
|
|
|
|
static char *option_origin = NULL;
|
|
|
|
static char *remote_name = NULL;
|
|
|
|
static char *option_branch = NULL;
|
|
|
|
static struct string_list option_not = STRING_LIST_INIT_NODUP;
|
|
|
|
static const char *real_git_dir;
|
|
|
|
static char *option_upload_pack = "git-upload-pack";
|
|
|
|
static int option_verbosity;
|
|
|
|
static int option_progress = -1;
|
|
|
|
static int option_sparse_checkout;
|
|
|
|
static enum transport_family family;
|
|
|
|
static struct string_list option_config = STRING_LIST_INIT_NODUP;
|
|
|
|
static struct string_list option_required_reference = STRING_LIST_INIT_NODUP;
|
|
|
|
static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP;
|
|
|
|
static int option_dissociate;
|
|
|
|
static int max_jobs = -1;
|
|
|
|
static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
|
|
|
|
static struct list_objects_filter_options filter_options;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
static int option_filter_submodules = -1; /* unspecified */
|
|
|
|
static int config_filter_submodules = -1; /* unspecified */
|
|
|
|
static struct string_list server_options = STRING_LIST_INIT_NODUP;
|
|
|
|
static int option_remote_submodules;
|
|
|
|
|
|
|
|
static int recurse_submodules_cb(const struct option *opt,
|
|
|
|
const char *arg, int unset)
|
|
|
|
{
|
|
|
|
if (unset)
|
|
|
|
string_list_clear((struct string_list *)opt->value, 0);
|
|
|
|
else if (arg)
|
|
|
|
string_list_append((struct string_list *)opt->value, arg);
|
|
|
|
else
|
|
|
|
string_list_append((struct string_list *)opt->value,
|
|
|
|
(const char *)opt->defval);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct option builtin_clone_options[] = {
|
|
|
|
OPT__VERBOSITY(&option_verbosity),
|
|
|
|
OPT_BOOL(0, "progress", &option_progress,
|
|
|
|
N_("force progress reporting")),
|
|
|
|
OPT_BOOL(0, "reject-shallow", &option_reject_shallow,
|
|
|
|
N_("don't clone shallow repository")),
|
|
|
|
OPT_BOOL('n', "no-checkout", &option_no_checkout,
|
|
|
|
N_("don't create a checkout")),
|
|
|
|
OPT_BOOL(0, "bare", &option_bare, N_("create a bare repository")),
|
|
|
|
OPT_HIDDEN_BOOL(0, "naked", &option_bare,
|
|
|
|
N_("create a bare repository")),
|
|
|
|
OPT_BOOL(0, "mirror", &option_mirror,
|
|
|
|
N_("create a mirror repository (implies bare)")),
|
|
|
|
OPT_BOOL('l', "local", &option_local,
|
|
|
|
N_("to clone from a local repository")),
|
|
|
|
OPT_BOOL(0, "no-hardlinks", &option_no_hardlinks,
|
|
|
|
N_("don't use local hardlinks, always copy")),
|
|
|
|
OPT_BOOL('s', "shared", &option_shared,
|
|
|
|
N_("setup as shared repository")),
|
|
|
|
{ OPTION_CALLBACK, 0, "recurse-submodules", &option_recurse_submodules,
|
|
|
|
N_("pathspec"), N_("initialize submodules in the clone"),
|
|
|
|
PARSE_OPT_OPTARG, recurse_submodules_cb, (intptr_t)"." },
|
|
|
|
OPT_ALIAS(0, "recursive", "recurse-submodules"),
|
|
|
|
OPT_INTEGER('j', "jobs", &max_jobs,
|
|
|
|
N_("number of submodules cloned in parallel")),
|
|
|
|
OPT_STRING(0, "template", &option_template, N_("template-directory"),
|
|
|
|
N_("directory from which templates will be used")),
|
|
|
|
OPT_STRING_LIST(0, "reference", &option_required_reference, N_("repo"),
|
|
|
|
N_("reference repository")),
|
|
|
|
OPT_STRING_LIST(0, "reference-if-able", &option_optional_reference,
|
|
|
|
N_("repo"), N_("reference repository")),
|
|
|
|
OPT_BOOL(0, "dissociate", &option_dissociate,
|
|
|
|
N_("use --reference only while cloning")),
|
|
|
|
OPT_STRING('o', "origin", &option_origin, N_("name"),
|
|
|
|
N_("use <name> instead of 'origin' to track upstream")),
|
|
|
|
OPT_STRING('b', "branch", &option_branch, N_("branch"),
|
|
|
|
N_("checkout <branch> instead of the remote's HEAD")),
|
|
|
|
OPT_STRING('u', "upload-pack", &option_upload_pack, N_("path"),
|
|
|
|
N_("path to git-upload-pack on the remote")),
|
|
|
|
OPT_STRING(0, "depth", &option_depth, N_("depth"),
|
|
|
|
N_("create a shallow clone of that depth")),
|
|
|
|
OPT_STRING(0, "shallow-since", &option_since, N_("time"),
|
|
|
|
N_("create a shallow clone since a specific time")),
|
|
|
|
OPT_STRING_LIST(0, "shallow-exclude", &option_not, N_("revision"),
|
|
|
|
N_("deepen history of shallow clone, excluding rev")),
|
|
|
|
OPT_BOOL(0, "single-branch", &option_single_branch,
|
|
|
|
N_("clone only one branch, HEAD or --branch")),
|
|
|
|
OPT_BOOL(0, "no-tags", &option_no_tags,
|
|
|
|
N_("don't clone any tags, and make later fetches not to follow them")),
|
|
|
|
OPT_BOOL(0, "shallow-submodules", &option_shallow_submodules,
|
|
|
|
N_("any cloned submodules will be shallow")),
|
|
|
|
OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
|
|
|
|
N_("separate git dir from working tree")),
|
|
|
|
OPT_STRING_LIST('c', "config", &option_config, N_("key=value"),
|
|
|
|
N_("set config inside the new repository")),
|
|
|
|
OPT_STRING_LIST(0, "server-option", &server_options,
|
|
|
|
N_("server-specific"), N_("option to transmit")),
|
|
|
|
OPT_SET_INT('4', "ipv4", &family, N_("use IPv4 addresses only"),
|
|
|
|
TRANSPORT_FAMILY_IPV4),
|
|
|
|
OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
|
|
|
|
TRANSPORT_FAMILY_IPV6),
|
|
|
|
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
OPT_BOOL(0, "also-filter-submodules", &option_filter_submodules,
|
|
|
|
N_("apply partial clone filters to submodules")),
|
|
|
|
OPT_BOOL(0, "remote-submodules", &option_remote_submodules,
|
|
|
|
N_("any cloned submodules will use their remote-tracking branch")),
|
|
|
|
OPT_BOOL(0, "sparse", &option_sparse_checkout,
|
|
|
|
N_("initialize sparse-checkout file to include only files at root")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
|
|
|
|
{
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
|
|
|
|
static char *bundle_suffix[] = { ".bundle", "" };
|
|
|
|
size_t baselen = path->len;
|
|
|
|
struct stat st;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(suffix); i++) {
|
|
|
|
strbuf_setlen(path, baselen);
|
|
|
|
strbuf_addstr(path, suffix[i]);
|
|
|
|
if (stat(path->buf, &st))
|
|
|
|
continue;
|
|
|
|
if (S_ISDIR(st.st_mode) && is_git_directory(path->buf)) {
|
|
|
|
*is_bundle = 0;
|
|
|
|
return path->buf;
|
|
|
|
} else if (S_ISREG(st.st_mode) && st.st_size > 8) {
|
|
|
|
/* Is it a "gitfile"? */
|
|
|
|
char signature[8];
|
|
|
|
const char *dst;
|
|
|
|
int len, fd = open(path->buf, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
continue;
|
|
|
|
len = read_in_full(fd, signature, 8);
|
|
|
|
close(fd);
|
|
|
|
if (len != 8 || strncmp(signature, "gitdir: ", 8))
|
|
|
|
continue;
|
|
|
|
dst = read_gitfile(path->buf);
|
|
|
|
if (dst) {
|
|
|
|
*is_bundle = 0;
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bundle_suffix); i++) {
|
|
|
|
strbuf_setlen(path, baselen);
|
|
|
|
strbuf_addstr(path, bundle_suffix[i]);
|
|
|
|
if (!stat(path->buf, &st) && S_ISREG(st.st_mode)) {
|
|
|
|
*is_bundle = 1;
|
|
|
|
return path->buf;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *get_repo_path(const char *repo, int *is_bundle)
|
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
const char *raw;
|
|
|
|
char *canon;
|
|
|
|
|
|
|
|
strbuf_addstr(&path, repo);
|
|
|
|
raw = get_repo_path_1(&path, is_bundle);
|
|
|
|
canon = raw ? absolute_pathdup(raw) : NULL;
|
|
|
|
strbuf_release(&path);
|
|
|
|
return canon;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_one_reference(struct string_list_item *item, void *cb_data)
|
|
|
|
{
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
int *required = cb_data;
|
|
|
|
char *ref_git = compute_alternate_path(item->string, &err);
|
|
|
|
|
|
|
|
if (!ref_git) {
|
|
|
|
if (*required)
|
|
|
|
die("%s", err.buf);
|
|
|
|
else
|
|
|
|
fprintf(stderr,
|
|
|
|
_("info: Could not add alternate for '%s': %s\n"),
|
|
|
|
item->string, err.buf);
|
|
|
|
} else {
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
strbuf_addf(&sb, "%s/objects", ref_git);
|
|
|
|
add_to_alternates_file(sb.buf);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
free(ref_git);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void setup_reference(void)
|
|
|
|
{
|
|
|
|
int required = 1;
|
|
|
|
for_each_string_list(&option_required_reference,
|
|
|
|
add_one_reference, &required);
|
|
|
|
required = 0;
|
|
|
|
for_each_string_list(&option_optional_reference,
|
|
|
|
add_one_reference, &required);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void copy_alternates(struct strbuf *src, const char *src_repo)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Read from the source objects/info/alternates file
|
|
|
|
* and copy the entries to corresponding file in the
|
|
|
|
* destination repository with add_to_alternates_file().
|
|
|
|
* Both src and dst have "$path/objects/info/alternates".
|
|
|
|
*
|
|
|
|
* Instead of copying bit-for-bit from the original,
|
|
|
|
* we need to append to existing one so that the already
|
|
|
|
* created entry via "clone -s" is not lost, and also
|
|
|
|
* to turn entries with paths relative to the original
|
|
|
|
* absolute, so that they can be used in the new repository.
|
|
|
|
*/
|
|
|
|
FILE *in = xfopen(src->buf, "r");
|
|
|
|
struct strbuf line = STRBUF_INIT;
|
|
|
|
|
|
|
|
while (strbuf_getline(&line, in) != EOF) {
|
|
|
|
char *abs_path;
|
|
|
|
if (!line.len || line.buf[0] == '#')
|
|
|
|
continue;
|
|
|
|
if (is_absolute_path(line.buf)) {
|
|
|
|
add_to_alternates_file(line.buf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
abs_path = mkpathdup("%s/objects/%s", src_repo, line.buf);
|
clone: detect errors in normalize_path_copy
When we are copying the alternates from the source
repository, if we find a relative path that is too deep for
the source (e.g., "../../../objects" from "/repo.git/objects"),
then normalize_path_copy will report an error and leave
trash in the buffer, which we will add to our new alternates
file. Instead, let's detect the error, print a warning, and
skip copying that alternate.
There's no need to die. The relative path is probably just
broken cruft in the source repo. If it turns out to have
been important for accessing some objects, we rely on other
parts of the clone to detect that, just as they would with a
missing object in the source repo itself (though note that
clones with "-s" are inherently local, which may do fewer
object-quality checks in the first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (!normalize_path_copy(abs_path, abs_path))
|
|
|
|
add_to_alternates_file(abs_path);
|
|
|
|
else
|
|
|
|
warning("skipping invalid relative alternate: %s/%s",
|
|
|
|
src_repo, line.buf);
|
|
|
|
free(abs_path);
|
|
|
|
}
|
|
|
|
strbuf_release(&line);
|
|
|
|
fclose(in);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mkdir_if_missing(const char *pathname, mode_t mode)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!mkdir(pathname, mode))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (errno != EEXIST)
|
|
|
|
die_errno(_("failed to create directory '%s'"), pathname);
|
|
|
|
else if (stat(pathname, &st))
|
|
|
|
die_errno(_("failed to stat '%s'"), pathname);
|
|
|
|
else if (!S_ISDIR(st.st_mode))
|
|
|
|
die(_("%s exists and is not a directory"), pathname);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
|
|
|
|
const char *src_repo)
|
|
|
|
{
|
|
|
|
int src_len, dest_len;
|
|
|
|
struct dir_iterator *iter;
|
|
|
|
int iter_status;
|
|
|
|
unsigned int flags;
|
|
|
|
struct strbuf realpath = STRBUF_INIT;
|
|
|
|
|
|
|
|
mkdir_if_missing(dest->buf, 0777);
|
|
|
|
|
|
|
|
flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
|
|
|
|
iter = dir_iterator_begin(src->buf, flags);
|
|
|
|
|
|
|
|
if (!iter)
|
|
|
|
die_errno(_("failed to start iterator over '%s'"), src->buf);
|
|
|
|
|
|
|
|
strbuf_addch(src, '/');
|
|
|
|
src_len = src->len;
|
|
|
|
strbuf_addch(dest, '/');
|
|
|
|
dest_len = dest->len;
|
|
|
|
|
|
|
|
while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
|
|
|
|
strbuf_setlen(src, src_len);
|
|
|
|
strbuf_addstr(src, iter->relative_path);
|
|
|
|
strbuf_setlen(dest, dest_len);
|
|
|
|
strbuf_addstr(dest, iter->relative_path);
|
|
|
|
|
|
|
|
if (S_ISDIR(iter->st.st_mode)) {
|
|
|
|
mkdir_if_missing(dest->buf, 0777);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Files that cannot be copied bit-for-bit... */
|
|
|
|
if (!fspathcmp(iter->relative_path, "info/alternates")) {
|
|
|
|
copy_alternates(src, src_repo);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlink(dest->buf) && errno != ENOENT)
|
|
|
|
die_errno(_("failed to unlink '%s'"), dest->buf);
|
|
|
|
if (!option_no_hardlinks) {
|
|
|
|
strbuf_realpath(&realpath, src->buf, 1);
|
|
|
|
if (!link(realpath.buf, dest->buf))
|
|
|
|
continue;
|
|
|
|
if (option_local > 0)
|
|
|
|
die_errno(_("failed to create link '%s'"), dest->buf);
|
|
|
|
option_no_hardlinks = 1;
|
|
|
|
}
|
|
|
|
if (copy_file_with_time(dest->buf, src->buf, 0666))
|
|
|
|
die_errno(_("failed to copy file to '%s'"), dest->buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iter_status != ITER_DONE) {
|
|
|
|
strbuf_setlen(src, src_len);
|
|
|
|
die(_("failed to iterate over '%s'"), src->buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&realpath);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clone_local(const char *src_repo, const char *dest_repo)
|
|
|
|
{
|
|
|
|
if (option_shared) {
|
|
|
|
struct strbuf alt = STRBUF_INIT;
|
|
|
|
get_common_dir(&alt, src_repo);
|
|
|
|
strbuf_addstr(&alt, "/objects");
|
|
|
|
add_to_alternates_file(alt.buf);
|
|
|
|
strbuf_release(&alt);
|
|
|
|
} else {
|
|
|
|
struct strbuf src = STRBUF_INIT;
|
|
|
|
struct strbuf dest = STRBUF_INIT;
|
|
|
|
get_common_dir(&src, src_repo);
|
|
|
|
get_common_dir(&dest, dest_repo);
|
|
|
|
strbuf_addstr(&src, "/objects");
|
|
|
|
strbuf_addstr(&dest, "/objects");
|
|
|
|
copy_or_link_directory(&src, &dest, src_repo);
|
|
|
|
strbuf_release(&src);
|
|
|
|
strbuf_release(&dest);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (0 <= option_verbosity)
|
|
|
|
fprintf(stderr, _("done.\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *junk_work_tree;
|
|
|
|
static int junk_work_tree_flags;
|
|
|
|
static const char *junk_git_dir;
|
|
|
|
static int junk_git_dir_flags;
|
|
|
|
static enum {
|
|
|
|
JUNK_LEAVE_NONE,
|
|
|
|
JUNK_LEAVE_REPO,
|
|
|
|
JUNK_LEAVE_ALL
|
|
|
|
} junk_mode = JUNK_LEAVE_NONE;
|
|
|
|
|
|
|
|
static const char junk_leave_repo_msg[] =
|
|
|
|
N_("Clone succeeded, but checkout failed.\n"
|
|
|
|
"You can inspect what was checked out with 'git status'\n"
|
|
|
|
"and retry with 'git restore --source=HEAD :/'\n");
|
|
|
|
|
|
|
|
static void remove_junk(void)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
|
|
|
switch (junk_mode) {
|
|
|
|
case JUNK_LEAVE_REPO:
|
|
|
|
warning("%s", _(junk_leave_repo_msg));
|
|
|
|
/* fall-through */
|
|
|
|
case JUNK_LEAVE_ALL:
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
/* proceed to removal */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (junk_git_dir) {
|
|
|
|
strbuf_addstr(&sb, junk_git_dir);
|
|
|
|
remove_dir_recursively(&sb, junk_git_dir_flags);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
}
|
|
|
|
if (junk_work_tree) {
|
|
|
|
strbuf_addstr(&sb, junk_work_tree);
|
|
|
|
remove_dir_recursively(&sb, junk_work_tree_flags);
|
|
|
|
}
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_junk_on_signal(int signo)
|
|
|
|
{
|
|
|
|
remove_junk();
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
sigchain_pop(signo);
|
|
|
|
raise(signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref *find_remote_branch(const struct ref *refs, const char *branch)
|
|
|
|
{
|
|
|
|
struct ref *ref;
|
|
|
|
struct strbuf head = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&head, "refs/heads/");
|
|
|
|
strbuf_addstr(&head, branch);
|
|
|
|
ref = find_ref_by_name(refs, head.buf);
|
|
|
|
strbuf_release(&head);
|
|
|
|
|
|
|
|
if (ref)
|
|
|
|
return ref;
|
|
|
|
|
|
|
|
strbuf_addstr(&head, "refs/tags/");
|
|
|
|
strbuf_addstr(&head, branch);
|
|
|
|
ref = find_ref_by_name(refs, head.buf);
|
|
|
|
strbuf_release(&head);
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref *wanted_peer_refs(const struct ref *refs,
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
struct refspec *refspec)
|
|
|
|
{
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
struct ref *head = copy_ref(find_ref_by_name(refs, "HEAD"));
|
|
|
|
struct ref *local_refs = head;
|
|
|
|
struct ref **tail = head ? &head->next : &local_refs;
|
|
|
|
|
|
|
|
if (option_single_branch) {
|
|
|
|
struct ref *remote_head = NULL;
|
|
|
|
|
|
|
|
if (!option_branch)
|
|
|
|
remote_head = guess_remote_head(head, refs, 0);
|
|
|
|
else {
|
|
|
|
local_refs = NULL;
|
|
|
|
tail = &local_refs;
|
|
|
|
remote_head = copy_ref(find_remote_branch(refs, option_branch));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!remote_head && option_branch)
|
|
|
|
warning(_("Could not find remote branch %s to clone."),
|
|
|
|
option_branch);
|
|
|
|
else {
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
int i;
|
|
|
|
for (i = 0; i < refspec->nr; i++)
|
|
|
|
get_fetch_map(remote_head, &refspec->items[i],
|
|
|
|
&tail, 0);
|
|
|
|
|
|
|
|
/* if --branch=tag, pull the requested tag explicitly */
|
|
|
|
get_fetch_map(remote_head, tag_refspec, &tail, 0);
|
|
|
|
}
|
clone: respect additional configured fetch refspecs during initial fetch
The initial fetch during a clone doesn't transfer refs matching
additional fetch refspecs given on the command line as configuration
variables, e.g. '-c remote.origin.fetch=<refspec>'. This contradicts
the documentation stating that configuration variables specified via
'git clone -c <key>=<value> ...' "take effect immediately after the
repository is initialized, but before the remote history is fetched"
and the given example specifically mentions "adding additional fetch
refspecs to the origin remote". Furthermore, one-shot configuration
variables specified via 'git -c <key>=<value> clone ...', though not
written to the newly created repository's config file, live during the
lifetime of the 'clone' command, including the initial fetch. All
this implies that any fetch refspecs specified this way should already
be taken into account during the initial fetch.
The reason for this is that the initial fetch is not a fully fledged
'git fetch' but a bunch of direct calls into the fetch/transport
machinery with clone's own refs-to-refspec matching logic, which
bypasses parts of 'git fetch' processing configured fetch refspecs.
This logic only considers a single default refspec, potentially
influenced by options like '--single-branch' and '--mirror'. The
configured refspecs are, however, already read and parsed properly
when clone calls remote.c:remote_get(), but it never looks at the
parsed refspecs in the resulting 'struct remote'.
Modify clone to take the remote's configured fetch refspecs into
account to retrieve all matching refs during the initial fetch. Note
that we have to explicitly add the default fetch refspec to the
remote's refspecs, because at that point the remote only includes the
fetch refspecs specified on the command line.
Add tests to check that refspecs given both via 'git clone -c ...' and
'git -c ... clone' retrieve all refs matching either the default or
the additional refspecs, and that it works even when the user
specifies an alternative remote name via '--origin=<name>'.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
} else {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < refspec->nr; i++)
|
|
|
|
get_fetch_map(refs, &refspec->items[i], &tail, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!option_mirror && !option_single_branch && !option_no_tags)
|
|
|
|
get_fetch_map(refs, tag_refspec, &tail, 0);
|
|
|
|
|
|
|
|
return local_refs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_remote_refs(const struct ref *local_refs)
|
|
|
|
{
|
|
|
|
const struct ref *r;
|
|
|
|
|
|
|
|
struct ref_transaction *t;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
|
|
|
t = ref_transaction_begin(&err);
|
|
|
|
if (!t)
|
|
|
|
die("%s", err.buf);
|
|
|
|
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
for (r = local_refs; r; r = r->next) {
|
|
|
|
if (!r->peer_ref)
|
|
|
|
continue;
|
|
|
|
if (ref_transaction_create(t, r->peer_ref->name, &r->old_oid,
|
|
|
|
0, NULL, &err))
|
|
|
|
die("%s", err.buf);
|
clone: always fetch remote HEAD
In most cases, fetching the remote HEAD explicitly is
unnecessary. It's just a symref pointing to a branch which
we are already fetching, so we will already ask for its sha1.
However, if the remote has a detached HEAD, things are less
certain. We do not ask for HEAD's sha1, but we do try to
write it into a local detached HEAD. In most cases this is
fine, as the remote HEAD is pointing to some part of the
history graph that we will fetch via the refs.
But if the remote HEAD points to an "orphan" commit (one
which was is not an ancestor of any refs), then we will not
have the object, and update_ref will complain when we try to
write the detached HEAD, aborting the whole clone.
This patch makes clone always explicitly ask the remote for
the sha1 of its HEAD commit. In the non-detached case, this
is a no-op, as we were going to ask for that sha1 anyway. In
the regular detached case, this will add an extra "want" to
the protocol negotiation, but will not change the history
that gets sent. And in the detached orphan case, we will
fetch the orphaned history so that we can write it into our
local detached HEAD.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
}
|
|
|
|
|
|
|
|
if (initial_ref_transaction_commit(t, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
ref_transaction_free(t);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_followtags(const struct ref *refs, const char *msg)
|
|
|
|
{
|
|
|
|
const struct ref *ref;
|
|
|
|
for (ref = refs; ref; ref = ref->next) {
|
|
|
|
if (!starts_with(ref->name, "refs/tags/"))
|
|
|
|
continue;
|
|
|
|
if (ends_with(ref->name, "^{}"))
|
|
|
|
continue;
|
clone: use "quick" lookup while following tags
When cloning with --single-branch, we implement git-fetch's usual
tag-following behavior, grabbing any tag objects that point to objects
we have locally.
When we're a partial clone, though, our has_object_file() check will
actually lazy-fetch each tag. That not only defeats the purpose of
--single-branch, but it does it incredibly slowly, potentially kicking
off a new fetch for each tag. This is even worse for a shallow clone,
which implies --single-branch, because even tags which are supersets of
each other will be fetched individually.
We can fix this by passing OBJECT_INFO_SKIP_FETCH_OBJECT to the call,
which is what git-fetch does in this case.
Likewise, let's include OBJECT_INFO_QUICK, as that's what git-fetch
does. The rationale is discussed in 5827a03545 (fetch: use "quick"
has_sha1_file for tag following, 2016-10-13), but here the tradeoff
would apply even more so because clone is very unlikely to be racing
with another process repacking our newly-created repository.
This may provide a very small speedup even in the non-partial case case,
as we'd avoid calling reprepare_packed_git() for each tag (though in
practice, we'd only have a single packfile, so that reprepare should be
quite cheap).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
if (!has_object_file_with_flags(&ref->old_oid,
|
|
|
|
OBJECT_INFO_QUICK |
|
|
|
|
OBJECT_INFO_SKIP_FETCH_OBJECT))
|
|
|
|
continue;
|
|
|
|
update_ref(msg, ref->name, &ref->old_oid, NULL, 0,
|
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct object_id *iterate_ref_map(void *cb_data)
|
|
|
|
{
|
|
|
|
struct ref **rm = cb_data;
|
|
|
|
struct ref *ref = *rm;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip anything missing a peer_ref, which we are not
|
|
|
|
* actually going to write a ref for.
|
|
|
|
*/
|
|
|
|
while (ref && !ref->peer_ref)
|
|
|
|
ref = ref->next;
|
|
|
|
if (!ref)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
*rm = ref->next;
|
|
|
|
return &ref->old_oid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void update_remote_refs(const struct ref *refs,
|
|
|
|
const struct ref *mapped_refs,
|
|
|
|
const struct ref *remote_head_points_at,
|
|
|
|
const char *branch_top,
|
|
|
|
const char *msg,
|
|
|
|
struct transport *transport,
|
connected: always use partial clone optimization
With 50033772d5 ("connected: verify promisor-ness of partial clone",
2020-01-30), the fast path (checking promisor packs) in
check_connected() now passes a subset of the slow path (rev-list) - if
all objects to be checked are found in promisor packs, both the fast
path and the slow path will pass; otherwise, the fast path will
definitely not pass. This means that we can always attempt the fast path
whenever we need to do the slow path.
The fast path is currently guarded by a flag; therefore, remove that
flag. Also, make the fast path fallback to the slow path - if the fast
path fails, the failing OID and all remaining OIDs will be passed to
rev-list.
The main user-visible benefit is the performance of fetch from a partial
clone - specifically, the speedup of the connectivity check done before
the fetch. In particular, a no-op fetch into a partial clone on my
computer was sped up from 7 seconds to 0.01 seconds. This is a
complement to the work in 2df1aa239c ("fetch: forgo full
connectivity check if --filter", 2020-01-30), which is the child of the
aforementioned 50033772d5. In that commit, the connectivity check
*after* the fetch was sped up.
The addition of the fast path might cause performance reductions in
these cases:
- If a partial clone or a fetch into a partial clone fails, Git will
fruitlessly run rev-list (it is expected that everything fetched
would go into promisor packs, so if that didn't happen, it is most
likely that rev-list will fail too).
- Any connectivity checks done by receive-pack, in the (in my opinion,
unlikely) event that a partial clone serves receive-pack.
I think that these cases are rare enough, and the performance reduction
in this case minor enough (additional object DB access), that the
benefit of avoiding a flag outweighs these.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Reviewed-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
int check_connectivity)
|
|
|
|
{
|
|
|
|
const struct ref *rm = mapped_refs;
|
|
|
|
|
clone: drop connectivity check for local clones
Commit 0433ad1 (clone: run check_everything_connected,
2013-03-25) added the same connectivity check to clone that
we use for fetching. The intent was to provide enough safety
checks that "git clone git://..." could be counted on to
detect bit errors and other repo corruption, and not
silently propagate them to the clone.
For local clones, this turns out to be a bad idea, for two
reasons:
1. Local clones use hard linking (or even shared object
stores), and so complete far more quickly. The time
spent on the connectivity check is therefore
proportionally much more painful.
2. Local clones do not actually meet our safety guarantee
anyway. The connectivity check makes sure we have all
of the objects we claim to, but it does not check for
bit errors. We will notice bit errors in commits and
trees, but we do not load blob objects at all. Whereas
over the pack transport, we actually recompute the sha1
of each object in the incoming packfile; bit errors
change the sha1 of the object, which is then caught by
the connectivity check.
This patch drops the connectivity check in the local case.
Note that we have to revert the changes from 0433ad1 to
t5710, as we no longer notice the corruption during clone.
We could go a step further and provide a "verify even local
clones" option, but it is probably not worthwhile. You can
already spell that as "cd foo.git && git fsck && git clone ."
or as "git clone --no-local foo.git".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (check_connectivity) {
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
struct check_connected_options opt = CHECK_CONNECTED_INIT;
|
|
|
|
|
|
|
|
opt.transport = transport;
|
|
|
|
opt.progress = transport->progress;
|
check_everything_connected: use a struct with named options
The number of variants of check_everything_connected has
grown over the years, so that the "real" function takes
several possibly-zero, possibly-NULL arguments. We hid the
complexity behind some wrapper functions, but this doesn't
scale well when we want to add new options.
If we add more wrapper variants to handle the new options,
then we can get a combinatorial explosion when those options
might be used together (right now nobody wants to use both
"shallow" and "transport" together, so we get by with just a
few wrappers).
If instead we add new parameters to each function, each of
which can have a default value, then callers who want the
defaults end up with confusing invocations like:
check_everything_connected(fn, 0, data, -1, 0, NULL);
where it is unclear which parameter is which (and every
caller needs updated when we add new options).
Instead, let's add a struct to hold all of the optional
parameters. This is a little more verbose for the callers
(who have to declare the struct and fill it in), but it
makes their code much easier to follow, because every option
is named as it is set (and unused options do not have to be
mentioned at all).
Note that we could also stick the iteration function and its
callback data into the option struct, too. But since those
are required for each call, by avoiding doing so, we can let
very simple callers just pass "NULL" for the options and not
worry about the struct at all.
While we're touching each site, let's also rename the
function to check_connected(). The existing name was quite
long, and not all of the wrappers even used the full name.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
|
|
|
|
if (check_connected(iterate_ref_map, &rm, &opt))
|
clone: drop connectivity check for local clones
Commit 0433ad1 (clone: run check_everything_connected,
2013-03-25) added the same connectivity check to clone that
we use for fetching. The intent was to provide enough safety
checks that "git clone git://..." could be counted on to
detect bit errors and other repo corruption, and not
silently propagate them to the clone.
For local clones, this turns out to be a bad idea, for two
reasons:
1. Local clones use hard linking (or even shared object
stores), and so complete far more quickly. The time
spent on the connectivity check is therefore
proportionally much more painful.
2. Local clones do not actually meet our safety guarantee
anyway. The connectivity check makes sure we have all
of the objects we claim to, but it does not check for
bit errors. We will notice bit errors in commits and
trees, but we do not load blob objects at all. Whereas
over the pack transport, we actually recompute the sha1
of each object in the incoming packfile; bit errors
change the sha1 of the object, which is then caught by
the connectivity check.
This patch drops the connectivity check in the local case.
Note that we have to revert the changes from 0433ad1 to
t5710, as we no longer notice the corruption during clone.
We could go a step further and provide a "verify even local
clones" option, but it is probably not worthwhile. You can
already spell that as "cd foo.git && git fsck && git clone ."
or as "git clone --no-local foo.git".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
die(_("remote did not send all necessary objects"));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (refs) {
|
|
|
|
write_remote_refs(mapped_refs);
|
|
|
|
if (option_single_branch && !option_no_tags)
|
|
|
|
write_followtags(refs, msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remote_head_points_at && !option_bare) {
|
|
|
|
struct strbuf head_ref = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&head_ref, branch_top);
|
|
|
|
strbuf_addstr(&head_ref, "HEAD");
|
|
|
|
if (create_symref(head_ref.buf,
|
|
|
|
remote_head_points_at->peer_ref->name,
|
|
|
|
msg) < 0)
|
|
|
|
die(_("unable to update %s"), head_ref.buf);
|
|
|
|
strbuf_release(&head_ref);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void update_head(const struct ref *our, const struct ref *remote,
|
|
|
|
const char *msg)
|
|
|
|
{
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
const char *head;
|
|
|
|
if (our && skip_prefix(our->name, "refs/heads/", &head)) {
|
|
|
|
/* Local default branch link */
|
|
|
|
if (create_symref("HEAD", our->name, NULL) < 0)
|
|
|
|
die(_("unable to update HEAD"));
|
|
|
|
if (!option_bare) {
|
|
|
|
update_ref(msg, "HEAD", &our->old_oid, NULL, 0,
|
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
|
|
|
install_branch_config(0, head, remote_name, our->name);
|
|
|
|
}
|
|
|
|
} else if (our) {
|
|
|
|
struct commit *c = lookup_commit_reference(the_repository,
|
|
|
|
&our->old_oid);
|
|
|
|
/* --branch specifies a non-branch (i.e. tags), detach HEAD */
|
|
|
|
update_ref(msg, "HEAD", &c->object.oid, NULL, REF_NO_DEREF,
|
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
|
|
|
} else if (remote) {
|
|
|
|
/*
|
|
|
|
* We know remote HEAD points to a non-branch, or
|
|
|
|
* HEAD points to a branch but we don't know which one.
|
|
|
|
* Detach HEAD in all these cases.
|
|
|
|
*/
|
|
|
|
update_ref(msg, "HEAD", &remote->old_oid, NULL, REF_NO_DEREF,
|
|
|
|
UPDATE_REFS_DIE_ON_ERR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int git_sparse_checkout_init(const char *repo)
|
|
|
|
{
|
|
|
|
struct strvec argv = STRVEC_INIT;
|
|
|
|
int result = 0;
|
|
|
|
strvec_pushl(&argv, "-C", repo, "sparse-checkout", "set", NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We must apply the setting in the current process
|
|
|
|
* for the later checkout to use the sparse-checkout file.
|
|
|
|
*/
|
|
|
|
core_apply_sparse_checkout = 1;
|
|
|
|
|
|
|
|
if (run_command_v_opt(argv.v, RUN_GIT_CMD)) {
|
|
|
|
error(_("failed to initialize sparse-checkout"));
|
|
|
|
result = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
strvec_clear(&argv);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
static int checkout(int submodule_progress, int filter_submodules)
|
|
|
|
{
|
|
|
|
struct object_id oid;
|
|
|
|
char *head;
|
|
|
|
struct lock_file lock_file = LOCK_INIT;
|
|
|
|
struct unpack_trees_options opts;
|
|
|
|
struct tree *tree;
|
|
|
|
struct tree_desc t;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (option_no_checkout)
|
|
|
|
return 0;
|
|
|
|
|
refs: convert resolve_refdup and refs_resolve_refdup to struct object_id
All of the callers already pass the hash member of struct object_id, so
update them to pass a pointer to the struct directly,
This transformation was done with an update to declaration and
definition and the following semantic patch:
@@
expression E1, E2, E3, E4;
@@
- resolve_refdup(E1, E2, E3.hash, E4)
+ resolve_refdup(E1, E2, &E3, E4)
@@
expression E1, E2, E3, E4;
@@
- resolve_refdup(E1, E2, E3->hash, E4)
+ resolve_refdup(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
head = resolve_refdup("HEAD", RESOLVE_REF_READING, &oid, NULL);
|
|
|
|
if (!head) {
|
|
|
|
warning(_("remote HEAD refers to nonexistent ref, "
|
|
|
|
"unable to checkout.\n"));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(head, "HEAD")) {
|
|
|
|
if (advice_enabled(ADVICE_DETACHED_HEAD))
|
|
|
|
detach_advice(oid_to_hex(&oid));
|
|
|
|
FREE_AND_NULL(head);
|
|
|
|
} else {
|
|
|
|
if (!starts_with(head, "refs/heads/"))
|
|
|
|
die(_("HEAD not found below refs/heads!"));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to be in the new work tree for the checkout */
|
|
|
|
setup_work_tree();
|
|
|
|
|
|
|
|
hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);
|
|
|
|
|
|
|
|
memset(&opts, 0, sizeof opts);
|
|
|
|
opts.update = 1;
|
|
|
|
opts.merge = 1;
|
|
|
|
opts.clone = 1;
|
|
|
|
opts.preserve_ignored = 0;
|
|
|
|
opts.fn = oneway_merge;
|
|
|
|
opts.verbose_update = (option_verbosity >= 0);
|
|
|
|
opts.src_index = &the_index;
|
|
|
|
opts.dst_index = &the_index;
|
|
|
|
init_checkout_metadata(&opts.meta, head, &oid, NULL);
|
|
|
|
|
|
|
|
tree = parse_tree_indirect(&oid);
|
|
|
|
parse_tree(tree);
|
|
|
|
init_tree_desc(&t, tree->buffer, tree->size);
|
clone: die on errors from unpack_trees
When clone is populating the working tree, it ignores the
return status from unpack_trees; this means we may report a
successful clone, even when the checkout fails.
When checkout fails, we may want to leave the $GIT_DIR in
place, as it might be possible to recover the data through
further use of "git checkout" (e.g., if the checkout failed
due to a transient error, disk full, etc). However, we
already die on a number of other checkout-related errors, so
this patch follows that pattern.
In addition to marking a now-passing test, we need to adjust
t5710, which blindly assumed it could make bogus clones of
very deep alternates hierarchies. By using "--bare", we can
avoid it actually touching any objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (unpack_trees(1, &t, &opts) < 0)
|
|
|
|
die(_("unable to checkout working tree"));
|
|
|
|
|
|
|
|
free(head);
|
|
|
|
|
|
|
|
if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
|
|
|
|
die(_("unable to write new index file"));
|
|
|
|
|
|
|
|
err |= run_hooks_l("post-checkout", oid_to_hex(null_oid()),
|
|
|
|
oid_to_hex(&oid), "1", NULL);
|
|
|
|
|
|
|
|
if (!err && (option_recurse_submodules.nr > 0)) {
|
|
|
|
struct strvec args = STRVEC_INIT;
|
|
|
|
strvec_pushl(&args, "submodule", "update", "--require-init", "--recursive", NULL);
|
|
|
|
|
|
|
|
if (option_shallow_submodules == 1)
|
|
|
|
strvec_push(&args, "--depth=1");
|
|
|
|
|
|
|
|
if (max_jobs != -1)
|
|
|
|
strvec_pushf(&args, "--jobs=%d", max_jobs);
|
|
|
|
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (submodule_progress)
|
|
|
|
strvec_push(&args, "--progress");
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
|
|
|
|
if (option_verbosity < 0)
|
|
|
|
strvec_push(&args, "--quiet");
|
|
|
|
|
|
|
|
if (option_remote_submodules) {
|
|
|
|
strvec_push(&args, "--remote");
|
|
|
|
strvec_push(&args, "--no-fetch");
|
|
|
|
}
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (filter_submodules && filter_options.choice)
|
|
|
|
strvec_pushf(&args, "--filter=%s",
|
|
|
|
expand_list_objects_filter_spec(&filter_options));
|
|
|
|
|
|
|
|
if (option_single_branch >= 0)
|
|
|
|
strvec_push(&args, option_single_branch ?
|
|
|
|
"--single-branch" :
|
|
|
|
"--no-single-branch");
|
|
|
|
|
|
|
|
err = run_command_v_opt(args.v, RUN_GIT_CMD);
|
|
|
|
strvec_clear(&args);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int git_clone_config(const char *k, const char *v, void *cb)
|
|
|
|
{
|
|
|
|
if (!strcmp(k, "clone.defaultremotename")) {
|
|
|
|
free(remote_name);
|
|
|
|
remote_name = xstrdup(v);
|
|
|
|
}
|
|
|
|
if (!strcmp(k, "clone.rejectshallow"))
|
|
|
|
config_reject_shallow = git_config_bool(k, v);
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (!strcmp(k, "clone.filtersubmodules"))
|
|
|
|
config_filter_submodules = git_config_bool(k, v);
|
|
|
|
|
|
|
|
return git_default_config(k, v, cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_one_config(const char *key, const char *value, void *data)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* give git_clone_config a chance to write config values back to the
|
|
|
|
* environment, since git_config_set_multivar_gently only deals with
|
|
|
|
* config-file writes
|
|
|
|
*/
|
|
|
|
int apply_failed = git_clone_config(key, value, data);
|
|
|
|
if (apply_failed)
|
|
|
|
return apply_failed;
|
|
|
|
|
|
|
|
return git_config_set_multivar_gently(key,
|
|
|
|
value ? value : "true",
|
|
|
|
CONFIG_REGEX_NONE, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_config(struct string_list *config)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < config->nr; i++) {
|
|
|
|
if (git_config_parse_parameter(config->items[i].string,
|
|
|
|
write_one_config, NULL) < 0)
|
|
|
|
die(_("unable to write parameters to config file"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_refspec_config(const char *src_ref_prefix,
|
|
|
|
const struct ref *our_head_points_at,
|
|
|
|
const struct ref *remote_head_points_at,
|
|
|
|
struct strbuf *branch_top)
|
|
|
|
{
|
|
|
|
struct strbuf key = STRBUF_INIT;
|
|
|
|
struct strbuf value = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (option_mirror || !option_bare) {
|
|
|
|
if (option_single_branch && !option_mirror) {
|
|
|
|
if (option_branch) {
|
|
|
|
if (starts_with(our_head_points_at->name, "refs/tags/"))
|
|
|
|
strbuf_addf(&value, "+%s:%s", our_head_points_at->name,
|
|
|
|
our_head_points_at->name);
|
|
|
|
else
|
|
|
|
strbuf_addf(&value, "+%s:%s%s", our_head_points_at->name,
|
|
|
|
branch_top->buf, option_branch);
|
|
|
|
} else if (remote_head_points_at) {
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
const char *head = remote_head_points_at->name;
|
|
|
|
if (!skip_prefix(head, "refs/heads/", &head))
|
|
|
|
BUG("remote HEAD points at non-head?");
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
strbuf_addf(&value, "+%s:%s%s", remote_head_points_at->name,
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
branch_top->buf, head);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* otherwise, the next "git fetch" will
|
|
|
|
* simply fetch from HEAD without updating
|
|
|
|
* any remote-tracking branch, which is what
|
|
|
|
* we want.
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
strbuf_addf(&value, "+%s*:%s*", src_ref_prefix, branch_top->buf);
|
|
|
|
}
|
|
|
|
/* Configure the remote */
|
|
|
|
if (value.len) {
|
|
|
|
strbuf_addf(&key, "remote.%s.fetch", remote_name);
|
|
|
|
git_config_set_multivar(key.buf, value.buf, "^$", 0);
|
|
|
|
strbuf_reset(&key);
|
|
|
|
|
|
|
|
if (option_mirror) {
|
|
|
|
strbuf_addf(&key, "remote.%s.mirror", remote_name);
|
|
|
|
git_config_set(key.buf, "true");
|
|
|
|
strbuf_reset(&key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&key);
|
|
|
|
strbuf_release(&value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dissociate_from_references(void)
|
|
|
|
{
|
|
|
|
static const char* argv[] = { "repack", "-a", "-d", NULL };
|
|
|
|
char *alternates = git_pathdup("objects/info/alternates");
|
|
|
|
|
|
|
|
if (!access(alternates, F_OK)) {
|
|
|
|
if (run_command_v_opt(argv, RUN_GIT_CMD|RUN_COMMAND_NO_STDIN))
|
|
|
|
die(_("cannot repack to clean up"));
|
|
|
|
if (unlink(alternates) && errno != ENOENT)
|
|
|
|
die_errno(_("cannot unlink temporary alternates file"));
|
|
|
|
}
|
|
|
|
free(alternates);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int path_exists(const char *path)
|
|
|
|
{
|
|
|
|
struct stat sb;
|
|
|
|
return !stat(path, &sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_clone(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
int is_bundle = 0, is_local;
|
|
|
|
int reject_shallow = 0;
|
|
|
|
const char *repo_name, *repo, *work_tree, *git_dir;
|
|
|
|
char *path = NULL, *dir, *display_repo = NULL;
|
|
|
|
int dest_exists, real_dest_exists = 0;
|
|
|
|
const struct ref *refs, *remote_head;
|
|
|
|
struct ref *remote_head_points_at = NULL;
|
|
|
|
const struct ref *our_head_points_at;
|
|
|
|
struct ref *mapped_refs = NULL;
|
|
|
|
const struct ref *ref;
|
|
|
|
struct strbuf key = STRBUF_INIT;
|
|
|
|
struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
|
|
|
|
struct transport *transport = NULL;
|
|
|
|
const char *src_ref_prefix = "refs/heads/";
|
|
|
|
struct remote *remote;
|
|
|
|
int err = 0, complete_refs_before_fetch = 1;
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
int submodule_progress;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
int filter_submodules = 0;
|
|
|
|
|
|
|
|
struct transport_ls_refs_options transport_ls_refs_options =
|
|
|
|
TRANSPORT_LS_REFS_OPTIONS_INIT;
|
|
|
|
|
|
|
|
packet_trace_identity("clone");
|
|
|
|
|
|
|
|
git_config(git_clone_config, NULL);
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, prefix, builtin_clone_options,
|
|
|
|
builtin_clone_usage, 0);
|
|
|
|
|
|
|
|
if (argc > 2)
|
|
|
|
usage_msg_opt(_("Too many arguments."),
|
|
|
|
builtin_clone_usage, builtin_clone_options);
|
|
|
|
|
|
|
|
if (argc == 0)
|
|
|
|
usage_msg_opt(_("You must specify a repository to clone."),
|
|
|
|
builtin_clone_usage, builtin_clone_options);
|
|
|
|
|
|
|
|
if (option_depth || option_since || option_not.nr)
|
|
|
|
deepen = 1;
|
|
|
|
if (option_single_branch == -1)
|
|
|
|
option_single_branch = deepen ? 1 : 0;
|
|
|
|
|
|
|
|
if (option_mirror)
|
|
|
|
option_bare = 1;
|
|
|
|
|
|
|
|
if (option_bare) {
|
|
|
|
if (option_origin)
|
|
|
|
die(_("options '%s' and '%s %s' cannot be used together"),
|
|
|
|
"--bare", "--origin", option_origin);
|
|
|
|
if (real_git_dir)
|
|
|
|
die(_("options '%s' and '%s' cannot be used together"), "--bare", "--separate-git-dir");
|
|
|
|
option_no_checkout = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
repo_name = argv[0];
|
|
|
|
|
|
|
|
path = get_repo_path(repo_name, &is_bundle);
|
|
|
|
if (path) {
|
|
|
|
FREE_AND_NULL(path);
|
|
|
|
repo = absolute_pathdup(repo_name);
|
|
|
|
} else if (strchr(repo_name, ':')) {
|
|
|
|
repo = repo_name;
|
|
|
|
display_repo = transport_anonymize_url(repo);
|
|
|
|
} else
|
|
|
|
die(_("repository '%s' does not exist"), repo_name);
|
|
|
|
|
|
|
|
/* no need to be strict, transport_set_option() will validate it again */
|
|
|
|
if (option_depth && atoi(option_depth) < 1)
|
|
|
|
die(_("depth %s is not a positive number"), option_depth);
|
|
|
|
|
|
|
|
if (argc == 2)
|
|
|
|
dir = xstrdup(argv[1]);
|
|
|
|
else
|
|
|
|
dir = git_url_basename(repo_name, is_bundle, option_bare);
|
|
|
|
strip_dir_trailing_slashes(dir);
|
|
|
|
|
|
|
|
dest_exists = path_exists(dir);
|
|
|
|
if (dest_exists && !is_empty_dir(dir))
|
|
|
|
die(_("destination path '%s' already exists and is not "
|
|
|
|
"an empty directory."), dir);
|
|
|
|
|
|
|
|
if (real_git_dir) {
|
|
|
|
real_dest_exists = path_exists(real_git_dir);
|
|
|
|
if (real_dest_exists && !is_empty_dir(real_git_dir))
|
|
|
|
die(_("repository path '%s' already exists and is not "
|
|
|
|
"an empty directory."), real_git_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
strbuf_addf(&reflog_msg, "clone: from %s",
|
|
|
|
display_repo ? display_repo : repo);
|
|
|
|
free(display_repo);
|
|
|
|
|
|
|
|
if (option_bare)
|
|
|
|
work_tree = NULL;
|
|
|
|
else {
|
|
|
|
work_tree = getenv("GIT_WORK_TREE");
|
|
|
|
if (work_tree && path_exists(work_tree))
|
|
|
|
die(_("working tree '%s' already exists."), work_tree);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (option_bare || work_tree)
|
|
|
|
git_dir = xstrdup(dir);
|
|
|
|
else {
|
|
|
|
work_tree = dir;
|
|
|
|
git_dir = mkpathdup("%s/.git", dir);
|
|
|
|
}
|
|
|
|
|
clone: initialize atexit cleanup handler earlier
If clone fails, we generally try to clean up any directories
we've created. We do this by installing an atexit handler,
so that we don't have to manually trigger cleanup. However,
since we install this after touching the filesystem, any
errors between our initial mkdir() and our atexit() call
will result in us leaving a crufty directory around.
We can fix this by moving our atexit() call earlier. It's OK
to do it before the junk_work_tree variable is set, because
remove_junk makes sure the variable is initialized. This
means we "activate" the handler by assigning to the
junk_work_tree variable, which we now bump down to just
after we call mkdir(). We probably do not want to do it
before, because a plausible reason for mkdir() to fail is
EEXIST (i.e., we are racing with another "git init"), and we
would not want to remove their work.
OTOH, this is probably not that big a deal; we will allow
cloning into an empty directory (and skip the mkdir), which
is already racy (i.e., one clone may see the other's empty
dir and start writing into it). Still, it does not hurt to
err on the side of caution here.
Note that writing into junk_work_tree and junk_git_dir after
installing the handler is also technically racy, as we call
our handler on an async signal. Depending on the platform,
we could see a sheared write to the variables. Traditionally
we have not worried about this, and indeed we already do
this later in the function. If we want to address that, it
can come as a separate topic.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
atexit(remove_junk);
|
|
|
|
sigchain_push_common(remove_junk_on_signal);
|
|
|
|
|
|
|
|
if (!option_bare) {
|
|
|
|
if (safe_create_leading_directories_const(work_tree) < 0)
|
|
|
|
die_errno(_("could not create leading directories of '%s'"),
|
|
|
|
work_tree);
|
|
|
|
if (dest_exists)
|
|
|
|
junk_work_tree_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
else if (mkdir(work_tree, 0777))
|
|
|
|
die_errno(_("could not create work tree dir '%s'"),
|
|
|
|
work_tree);
|
clone: initialize atexit cleanup handler earlier
If clone fails, we generally try to clean up any directories
we've created. We do this by installing an atexit handler,
so that we don't have to manually trigger cleanup. However,
since we install this after touching the filesystem, any
errors between our initial mkdir() and our atexit() call
will result in us leaving a crufty directory around.
We can fix this by moving our atexit() call earlier. It's OK
to do it before the junk_work_tree variable is set, because
remove_junk makes sure the variable is initialized. This
means we "activate" the handler by assigning to the
junk_work_tree variable, which we now bump down to just
after we call mkdir(). We probably do not want to do it
before, because a plausible reason for mkdir() to fail is
EEXIST (i.e., we are racing with another "git init"), and we
would not want to remove their work.
OTOH, this is probably not that big a deal; we will allow
cloning into an empty directory (and skip the mkdir), which
is already racy (i.e., one clone may see the other's empty
dir and start writing into it). Still, it does not hurt to
err on the side of caution here.
Note that writing into junk_work_tree and junk_git_dir after
installing the handler is also technically racy, as we call
our handler on an async signal. Depending on the platform,
we could see a sheared write to the variables. Traditionally
we have not worried about this, and indeed we already do
this later in the function. If we want to address that, it
can come as a separate topic.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
junk_work_tree = work_tree;
|
|
|
|
set_git_work_tree(work_tree);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (real_git_dir) {
|
|
|
|
if (real_dest_exists)
|
|
|
|
junk_git_dir_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
junk_git_dir = real_git_dir;
|
|
|
|
} else {
|
|
|
|
if (dest_exists)
|
|
|
|
junk_git_dir_flags |= REMOVE_DIR_KEEP_TOPLEVEL;
|
|
|
|
junk_git_dir = git_dir;
|
|
|
|
}
|
|
|
|
if (safe_create_leading_directories_const(git_dir) < 0)
|
|
|
|
die(_("could not create leading directories of '%s'"), git_dir);
|
|
|
|
|
|
|
|
if (0 <= option_verbosity) {
|
|
|
|
if (option_bare)
|
|
|
|
fprintf(stderr, _("Cloning into bare repository '%s'...\n"), dir);
|
|
|
|
else
|
|
|
|
fprintf(stderr, _("Cloning into '%s'...\n"), dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (option_recurse_submodules.nr > 0) {
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
/* remove duplicates */
|
|
|
|
string_list_sort(&option_recurse_submodules);
|
|
|
|
string_list_remove_duplicates(&option_recurse_submodules, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NEEDSWORK: In a multi-working-tree world, this needs to be
|
|
|
|
* set in the per-worktree config.
|
|
|
|
*/
|
|
|
|
for_each_string_list_item(item, &option_recurse_submodules) {
|
|
|
|
strbuf_addf(&sb, "submodule.active=%s",
|
|
|
|
item->string);
|
|
|
|
string_list_append(&option_config,
|
|
|
|
strbuf_detach(&sb, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!git_config_get_bool("submodule.stickyRecursiveClone", &val) &&
|
|
|
|
val)
|
|
|
|
string_list_append(&option_config, "submodule.recurse=true");
|
|
|
|
|
|
|
|
if (option_required_reference.nr &&
|
|
|
|
option_optional_reference.nr)
|
|
|
|
die(_("clone --recursive is not compatible with "
|
|
|
|
"both --reference and --reference-if-able"));
|
|
|
|
else if (option_required_reference.nr) {
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateLocation=superproject");
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateErrorStrategy=die");
|
|
|
|
} else if (option_optional_reference.nr) {
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateLocation=superproject");
|
|
|
|
string_list_append(&option_config,
|
|
|
|
"submodule.alternateErrorStrategy=info");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, NULL,
|
|
|
|
INIT_DB_QUIET);
|
|
|
|
|
|
|
|
if (real_git_dir) {
|
|
|
|
free((char *)git_dir);
|
|
|
|
git_dir = real_git_dir;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* additional config can be injected with -c, make sure it's included
|
|
|
|
* after init_db, which clears the entire config environment.
|
|
|
|
*/
|
|
|
|
write_config(&option_config);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re-read config after init_db and write_config to pick up any config
|
|
|
|
* injected by --template and --config, respectively.
|
|
|
|
*/
|
|
|
|
git_config(git_clone_config, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If option_reject_shallow is specified from CLI option,
|
|
|
|
* ignore config_reject_shallow from git_clone_config.
|
|
|
|
*/
|
|
|
|
if (config_reject_shallow != -1)
|
|
|
|
reject_shallow = config_reject_shallow;
|
|
|
|
if (option_reject_shallow != -1)
|
|
|
|
reject_shallow = option_reject_shallow;
|
|
|
|
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
/*
|
|
|
|
* If option_filter_submodules is specified from CLI option,
|
|
|
|
* ignore config_filter_submodules from git_clone_config.
|
|
|
|
*/
|
|
|
|
if (config_filter_submodules != -1)
|
|
|
|
filter_submodules = config_filter_submodules;
|
|
|
|
if (option_filter_submodules != -1)
|
|
|
|
filter_submodules = option_filter_submodules;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exit if the user seems to be doing something silly with submodule
|
|
|
|
* filter flags (but not with filter configs, as those should be
|
|
|
|
* set-and-forget).
|
|
|
|
*/
|
|
|
|
if (option_filter_submodules > 0 && !filter_options.choice)
|
|
|
|
die(_("the option '%s' requires '%s'"),
|
|
|
|
"--also-filter-submodules", "--filter");
|
|
|
|
if (option_filter_submodules > 0 && !option_recurse_submodules.nr)
|
|
|
|
die(_("the option '%s' requires '%s'"),
|
|
|
|
"--also-filter-submodules", "--recurse-submodules");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* apply the remote name provided by --origin only after this second
|
|
|
|
* call to git_config, to ensure it overrides all config-based values.
|
|
|
|
*/
|
|
|
|
if (option_origin != NULL)
|
|
|
|
remote_name = xstrdup(option_origin);
|
|
|
|
|
|
|
|
if (remote_name == NULL)
|
|
|
|
remote_name = xstrdup("origin");
|
|
|
|
|
|
|
|
if (!valid_remote_name(remote_name))
|
|
|
|
die(_("'%s' is not a valid remote name"), remote_name);
|
|
|
|
|
|
|
|
if (option_bare) {
|
|
|
|
if (option_mirror)
|
|
|
|
src_ref_prefix = "refs/";
|
|
|
|
strbuf_addstr(&branch_top, src_ref_prefix);
|
|
|
|
|
|
|
|
git_config_set("core.bare", "true");
|
|
|
|
} else {
|
|
|
|
strbuf_addf(&branch_top, "refs/remotes/%s/", remote_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addf(&key, "remote.%s.url", remote_name);
|
|
|
|
git_config_set(key.buf, repo);
|
|
|
|
strbuf_reset(&key);
|
|
|
|
|
|
|
|
if (option_no_tags) {
|
|
|
|
strbuf_addf(&key, "remote.%s.tagOpt", remote_name);
|
|
|
|
git_config_set(key.buf, "--no-tags");
|
|
|
|
strbuf_reset(&key);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (option_required_reference.nr || option_optional_reference.nr)
|
|
|
|
setup_reference();
|
|
|
|
|
|
|
|
if (option_sparse_checkout && git_sparse_checkout_init(dir))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
remote = remote_get(remote_name);
|
|
|
|
|
|
|
|
refspec_appendf(&remote->fetch, "+%s*:%s*", src_ref_prefix,
|
|
|
|
branch_top.buf);
|
|
|
|
|
|
|
|
transport = transport_get(remote, remote->url[0]);
|
|
|
|
transport_set_verbosity(transport, option_verbosity, option_progress);
|
|
|
|
transport->family = family;
|
|
|
|
|
|
|
|
path = get_repo_path(remote->url[0], &is_bundle);
|
|
|
|
is_local = option_local != 0 && path && !is_bundle;
|
|
|
|
if (is_local) {
|
|
|
|
if (option_depth)
|
|
|
|
warning(_("--depth is ignored in local clones; use file:// instead."));
|
|
|
|
if (option_since)
|
|
|
|
warning(_("--shallow-since is ignored in local clones; use file:// instead."));
|
|
|
|
if (option_not.nr)
|
|
|
|
warning(_("--shallow-exclude is ignored in local clones; use file:// instead."));
|
|
|
|
if (filter_options.choice)
|
|
|
|
warning(_("--filter is ignored in local clones; use file:// instead."));
|
|
|
|
if (!access(mkpath("%s/shallow", path), F_OK)) {
|
|
|
|
if (reject_shallow)
|
|
|
|
die(_("source repository is shallow, reject to clone."));
|
|
|
|
if (option_local > 0)
|
|
|
|
warning(_("source repository is shallow, ignoring --local"));
|
|
|
|
is_local = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (option_local > 0 && !is_local)
|
|
|
|
warning(_("--local is ignored"));
|
|
|
|
transport->cloning = 1;
|
|
|
|
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
transport_set_option(transport, TRANS_OPT_KEEP, "yes");
|
|
|
|
|
|
|
|
if (reject_shallow)
|
|
|
|
transport_set_option(transport, TRANS_OPT_REJECT_SHALLOW, "1");
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (option_depth)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEPTH,
|
|
|
|
option_depth);
|
|
|
|
if (option_since)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEEPEN_SINCE,
|
|
|
|
option_since);
|
|
|
|
if (option_not.nr)
|
|
|
|
transport_set_option(transport, TRANS_OPT_DEEPEN_NOT,
|
|
|
|
(const char *)&option_not);
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (option_single_branch)
|
|
|
|
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
|
|
|
|
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (option_upload_pack)
|
|
|
|
transport_set_option(transport, TRANS_OPT_UPLOADPACK,
|
|
|
|
option_upload_pack);
|
|
|
|
|
|
|
|
if (server_options.nr)
|
|
|
|
transport->server_options = &server_options;
|
|
|
|
|
|
|
|
if (filter_options.choice) {
|
|
|
|
const char *spec =
|
|
|
|
expand_list_objects_filter_spec(&filter_options);
|
|
|
|
transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
|
|
|
|
spec);
|
|
|
|
transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (transport->smart_options && !deepen && !filter_options.choice)
|
clone: always set transport options
A clone will always create a transport struct, whether we
are cloning locally or using an actual protocol. In the
local case, we only use the transport to get the list of
refs, and then transfer the objects out-of-band.
However, there are many options that we do not bother
setting up in the local case. For the most part, these are
noops, because they only affect the object-fetching stage
(e.g., the --depth option). However, some options do have a
visible impact. For example, giving the path to upload-pack
via "-u" does not currently work for a local clone, even
though we need upload-pack to get the ref list.
We can just drop the conditional entirely and set these
options for both local and non-local clones. Rather than
keep track of which options impact the object versus the ref
fetching stage, we can simply let the noops be noops (and
the cost of setting the options in the first place is not
high).
The one exception is that we also check that the transport
provides both a "get_refs_list" and a "fetch" method. We
will now be checking the former for both cases (which is
good, since a transport that cannot fetch refs would not
work for a local clone), and we tweak the conditional to
check for a "fetch" only when we are non-local.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
transport->smart_options->check_self_contained_and_connected = 1;
|
|
|
|
|
|
|
|
|
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD");
|
|
|
|
refspec_ref_prefixes(&remote->fetch,
|
|
|
|
&transport_ls_refs_options.ref_prefixes);
|
|
|
|
if (option_branch)
|
|
|
|
expand_ref_prefix(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
option_branch);
|
|
|
|
if (!option_no_tags)
|
|
|
|
strvec_push(&transport_ls_refs_options.ref_prefixes,
|
|
|
|
"refs/tags/");
|
|
|
|
|
|
|
|
refs = transport_get_remote_refs(transport, &transport_ls_refs_options);
|
|
|
|
|
|
|
|
if (refs)
|
|
|
|
mapped_refs = wanted_peer_refs(refs, &remote->fetch);
|
|
|
|
|
|
|
|
if (mapped_refs) {
|
|
|
|
int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we know what algorithm the remote side is using,
|
|
|
|
* let's set ours to the same thing.
|
|
|
|
*/
|
builtin/clone: avoid failure with GIT_DEFAULT_HASH
If a user is cloning a SHA-1 repository with GIT_DEFAULT_HASH set to
"sha256", then we can end up with a repository where the repository
format version is 0 but the extensions.objectformat key is set to
"sha256". This is both wrong (the user has a SHA-1 repository) and
nonfunctional (because the extension cannot be used in a v0 repository).
This happens because in a clone, we initially set up the repository, and
then change its algorithm based on what the remote side tells us it's
using. We've initially set up the repository as SHA-256 in this case,
and then later on reset the repository version without clearing the
extension.
We could just always set the extension in this case, but that would mean
that our SHA-1 repositories weren't compatible with older Git versions,
even though there's no reason why they shouldn't be. And we also don't
want to initialize the repository as SHA-1 initially, since that means
if we're cloning an empty repository, we'll have failed to honor the
GIT_DEFAULT_HASH variable and will end up with a SHA-1 repository, not a
SHA-256 repository.
Neither of those are appealing, so let's tell the repository
initialization code if we're doing a reinit like this, and if so, to
clear the extension if we're using SHA-1. This makes sure we produce a
valid and functional repository and doesn't break any of our other use
cases.
Reported-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
initialize_repository_version(hash_algo, 1);
|
|
|
|
repo_set_hash_algo(the_repository, hash_algo);
|
|
|
|
/*
|
|
|
|
* transport_get_remote_refs() may return refs with null sha-1
|
|
|
|
* in mapped_refs (see struct transport->get_refs_list
|
|
|
|
* comment). In that case we need fetch it early because
|
|
|
|
* remote_head code below relies on it.
|
|
|
|
*
|
|
|
|
* for normal clones, transport_get_remote_refs() should
|
|
|
|
* return reliable ref set, we can delay cloning until after
|
|
|
|
* remote HEAD check.
|
|
|
|
*/
|
|
|
|
for (ref = refs; ref; ref = ref->next)
|
|
|
|
if (is_null_oid(&ref->old_oid)) {
|
|
|
|
complete_refs_before_fetch = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_local && !complete_refs_before_fetch) {
|
clone: clean up directory after transport_fetch_refs() failure
git-clone started respecting errors from the transport subsystem in
aab179d937 (builtin/clone.c: don't ignore transport_fetch_refs() errors,
2020-12-03). However, that commit didn't handle the cleanup of the
filesystem quite right.
The cleanup of the directory that cmd_clone() creates is done by an
atexit() handler, which we control with a flag. It starts as
JUNK_LEAVE_NONE ("clean up everything"), then progresses to
JUNK_LEAVE_REPO when we know we have a valid repo but not working tree,
and then finally JUNK_LEAVE_ALL when we have a successful checkout.
Most errors cause us to die(), which then triggers the handler to do the
right thing based on how far into cmd_clone() we got. But the checks
added by aab179d937 instead set the "err" variable and then jump to a
new "cleanup" label, which then returns our non-zero status. However,
the code after the cleanup label includes setting the flag to
JUNK_LEAVE_ALL, and so we accidentally leave the repository and working
tree in place.
One obvious option to fix this is to reorder the end of the function to
set the flag first, before cleanup code, and put the label between them.
But we can observe another small bug: the error return from
transport_fetch_refs() is generally "-1", and we propagate that to the
return value of cmd_clone(), which ultimately becomes the exit code of
the process. And we try to avoid transmitting negative values via exit
codes (only the low 8 bits are passed along as an unsigned value, though
in practice for "-1" this at least retains the property that it's
non-zero).
Instead, let's just die(). That makes us consistent with rest of the
code in the function. It does add a new "fatal:" line to the output, but
I'd argue that's a good thing:
- in the rare case that the transport code didn't say anything, now
the user gets _some_ error message
- even if the transport code said something like "error: ssh died of
signal 9", it's nice to also say "fatal" to indicate that we
considered that to be a show-stopper.
Triggering this in the test suite turns out to be surprisingly
difficult. Almost every error we'd encounter, including ones deep inside
the transport code, cause us to just die() right there! However, one way
is to put a fake wrapper around git-upload-pack that sends the complete
packfile but exits with a failure code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (transport_fetch_refs(transport, mapped_refs))
|
|
|
|
die(_("remote transport reported error"));
|
|
|
|
}
|
|
|
|
|
|
|
|
remote_head = find_ref_by_name(refs, "HEAD");
|
|
|
|
remote_head_points_at =
|
|
|
|
guess_remote_head(remote_head, mapped_refs, 0);
|
|
|
|
|
|
|
|
if (option_branch) {
|
|
|
|
our_head_points_at =
|
|
|
|
find_remote_branch(mapped_refs, option_branch);
|
|
|
|
|
|
|
|
if (!our_head_points_at)
|
|
|
|
die(_("Remote branch %s not found in upstream %s"),
|
|
|
|
option_branch, remote_name);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
our_head_points_at = remote_head_points_at;
|
|
|
|
}
|
|
|
|
else {
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
const char *branch;
|
|
|
|
const char *ref;
|
|
|
|
char *ref_free = NULL;
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
|
|
|
|
if (option_branch)
|
|
|
|
die(_("Remote branch %s not found in upstream %s"),
|
|
|
|
option_branch, remote_name);
|
|
|
|
|
|
|
|
warning(_("You appear to have cloned an empty repository."));
|
|
|
|
our_head_points_at = NULL;
|
|
|
|
remote_head_points_at = NULL;
|
|
|
|
remote_head = NULL;
|
|
|
|
option_no_checkout = 1;
|
|
|
|
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
if (transport_ls_refs_options.unborn_head_target &&
|
|
|
|
skip_prefix(transport_ls_refs_options.unborn_head_target,
|
|
|
|
"refs/heads/", &branch)) {
|
|
|
|
ref = transport_ls_refs_options.unborn_head_target;
|
|
|
|
create_symref("HEAD", ref, reflog_msg.buf);
|
|
|
|
} else {
|
|
|
|
branch = git_default_branch_name(0);
|
|
|
|
ref_free = xstrfmt("refs/heads/%s", branch);
|
|
|
|
ref = ref_free;
|
|
|
|
}
|
clone: handle unborn branch in bare repos
When cloning a repository with an unborn HEAD, we'll set the local HEAD
to match it only if the local repository is non-bare. This is
inconsistent with all other combinations:
remote HEAD | local repo | local HEAD
-----------------------------------------------
points to commit | non-bare | same as remote
points to commit | bare | same as remote
unborn | non-bare | same as remote
unborn | bare | local default
So I don't think this is some clever or subtle behavior, but just a bug
in 4f37d45706 (clone: respect remote unborn HEAD, 2021-02-05). And it's
easy to see how we ended up there. Before that commit, the code to set
up the HEAD for an empty repo was guarded by "if (!option_bare)". That's
because the only thing it did was call install_branch_config(), and we
don't want to do so for a bare repository (unborn HEAD or not).
That commit put the handling of unborn HEADs into the same block, since
those also need to call install_branch_config(). But the unborn case has
an additional side effect of calling create_symref(), and we want that
to happen whether we are bare or not.
This patch just pulls all of the "figure out the default branch" code
out of the "!option_bare" block. Only the actual config installation is
kept there.
Note that this does mean we might allocate "ref" and not use it (if the
remote is empty but did not advertise an unborn HEAD). But that's not
really a big deal since this isn't a hot code path, and it keeps the
code simple. The alternative would be handling unborn_head_target
separately, but that gets confusing since its memory ownership is
tangled up with the "ref" variable.
There's just one new test, for the case we're fixing. The other ones in
the table are handled elsewhere (the unborn non-bare case just above,
and the actually-born cases in t5601, t5606, and t5609, as they do not
require v2's "unborn" protocol extension).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
|
|
|
|
if (!option_bare)
|
|
|
|
install_branch_config(0, branch, remote_name, ref);
|
|
|
|
free(ref_free);
|
|
|
|
}
|
|
|
|
|
|
|
|
write_refspec_config(src_ref_prefix, our_head_points_at,
|
|
|
|
remote_head_points_at, &branch_top);
|
|
|
|
|
|
|
|
if (filter_options.choice)
|
|
|
|
partial_clone_register(remote_name, &filter_options);
|
|
|
|
|
|
|
|
if (is_local)
|
|
|
|
clone_local(path, git_dir);
|
|
|
|
else if (mapped_refs && complete_refs_before_fetch) {
|
clone: clean up directory after transport_fetch_refs() failure
git-clone started respecting errors from the transport subsystem in
aab179d937 (builtin/clone.c: don't ignore transport_fetch_refs() errors,
2020-12-03). However, that commit didn't handle the cleanup of the
filesystem quite right.
The cleanup of the directory that cmd_clone() creates is done by an
atexit() handler, which we control with a flag. It starts as
JUNK_LEAVE_NONE ("clean up everything"), then progresses to
JUNK_LEAVE_REPO when we know we have a valid repo but not working tree,
and then finally JUNK_LEAVE_ALL when we have a successful checkout.
Most errors cause us to die(), which then triggers the handler to do the
right thing based on how far into cmd_clone() we got. But the checks
added by aab179d937 instead set the "err" variable and then jump to a
new "cleanup" label, which then returns our non-zero status. However,
the code after the cleanup label includes setting the flag to
JUNK_LEAVE_ALL, and so we accidentally leave the repository and working
tree in place.
One obvious option to fix this is to reorder the end of the function to
set the flag first, before cleanup code, and put the label between them.
But we can observe another small bug: the error return from
transport_fetch_refs() is generally "-1", and we propagate that to the
return value of cmd_clone(), which ultimately becomes the exit code of
the process. And we try to avoid transmitting negative values via exit
codes (only the low 8 bits are passed along as an unsigned value, though
in practice for "-1" this at least retains the property that it's
non-zero).
Instead, let's just die(). That makes us consistent with rest of the
code in the function. It does add a new "fatal:" line to the output, but
I'd argue that's a good thing:
- in the rare case that the transport code didn't say anything, now
the user gets _some_ error message
- even if the transport code said something like "error: ssh died of
signal 9", it's nice to also say "fatal" to indicate that we
considered that to be a show-stopper.
Triggering this in the test suite turns out to be surprisingly
difficult. Almost every error we'd encounter, including ones deep inside
the transport code, cause us to just die() right there! However, one way
is to put a fake wrapper around git-upload-pack that sends the complete
packfile but exits with a failure code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (transport_fetch_refs(transport, mapped_refs))
|
|
|
|
die(_("remote transport reported error"));
|
|
|
|
}
|
|
|
|
|
|
|
|
update_remote_refs(refs, mapped_refs, remote_head_points_at,
|
|
|
|
branch_top.buf, reflog_msg.buf, transport,
|
connected: always use partial clone optimization
With 50033772d5 ("connected: verify promisor-ness of partial clone",
2020-01-30), the fast path (checking promisor packs) in
check_connected() now passes a subset of the slow path (rev-list) - if
all objects to be checked are found in promisor packs, both the fast
path and the slow path will pass; otherwise, the fast path will
definitely not pass. This means that we can always attempt the fast path
whenever we need to do the slow path.
The fast path is currently guarded by a flag; therefore, remove that
flag. Also, make the fast path fallback to the slow path - if the fast
path fails, the failing OID and all remaining OIDs will be passed to
rev-list.
The main user-visible benefit is the performance of fetch from a partial
clone - specifically, the speedup of the connectivity check done before
the fetch. In particular, a no-op fetch into a partial clone on my
computer was sped up from 7 seconds to 0.01 seconds. This is a
complement to the work in 2df1aa239c ("fetch: forgo full
connectivity check if --filter", 2020-01-30), which is the child of the
aforementioned 50033772d5. In that commit, the connectivity check
*after* the fetch was sped up.
The addition of the fast path might cause performance reductions in
these cases:
- If a partial clone or a fetch into a partial clone fails, Git will
fruitlessly run rev-list (it is expected that everything fetched
would go into promisor packs, so if that didn't happen, it is most
likely that rev-list will fail too).
- Any connectivity checks done by receive-pack, in the (in my opinion,
unlikely) event that a partial clone serves receive-pack.
I think that these cases are rare enough, and the performance reduction
in this case minor enough (additional object DB access), that the
benefit of avoiding a flag outweighs these.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Reviewed-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
!is_local);
|
|
|
|
|
|
|
|
update_head(our_head_points_at, remote_head, reflog_msg.buf);
|
|
|
|
|
clone: pass --progress decision to recursive submodules
When cloning with "--recursive", we'd generally expect
submodules to show progress reports if the main clone did,
too.
In older versions of git, this mostly worked out of the
box. Since we show progress by default when stderr is a tty,
and since the child clones inherit the parent stderr, then
both processes would come to the same decision by default.
If the parent clone was asked for "--quiet", we passed down
"--quiet" to the child. However, if stderr was not a tty and
the user specified "--progress", we did not propagate this
to the child.
That's a minor bug, but things got much worse when we
switched recently to submodule--helper's update_clone
command. With that change, the stderr of the child clones
are always connected to a pipe, and we never output
progress at all.
This patch teaches git-submodule and git-submodule--helper
how to pass down an explicit "--progress" flag when cloning.
The clone command then decides to propagate that flag based
on the cloning decision made earlier (which takes into
account isatty(2) of the parent process, existing --progress
or --quiet flags, etc). Since the child processes always run
without a tty on stderr, we don't have to worry about
passing an explicit "--no-progress"; it's the default for
them.
This fixes the recent loss of progress during recursive
clones. And as a bonus, it makes:
git clone --recursive --progress ... 2>&1 | cat
work by triggering progress explicitly in the children.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
/*
|
|
|
|
* We want to show progress for recursive submodule clones iff
|
|
|
|
* we did so for the main clone. But only the transport knows
|
|
|
|
* the final decision for this flag, so we need to rescue the value
|
|
|
|
* before we free the transport.
|
|
|
|
*/
|
|
|
|
submodule_progress = transport->progress;
|
|
|
|
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
transport_unlock_pack(transport, 0);
|
|
|
|
transport_disconnect(transport);
|
|
|
|
|
|
|
|
if (option_dissociate) {
|
|
|
|
close_object_store(the_repository->objects);
|
|
|
|
dissociate_from_references();
|
|
|
|
}
|
|
|
|
|
|
|
|
junk_mode = JUNK_LEAVE_REPO;
|
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
3 years ago
|
|
|
err = checkout(submodule_progress, filter_submodules);
|
|
|
|
|
|
|
|
free(remote_name);
|
|
|
|
strbuf_release(&reflog_msg);
|
|
|
|
strbuf_release(&branch_top);
|
|
|
|
strbuf_release(&key);
|
|
|
|
free_refs(mapped_refs);
|
|
|
|
free_refs(remote_head_points_at);
|
|
|
|
free(dir);
|
|
|
|
free(path);
|
|
|
|
UNLEAK(repo);
|
|
|
|
junk_mode = JUNK_LEAVE_ALL;
|
|
|
|
|
|
|
|
transport_ls_refs_options_release(&transport_ls_refs_options);
|
|
|
|
return err;
|
|
|
|
}
|