You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

926 lines
26 KiB

#include "cache.h"
#include "submodule.h"
#include "dir.h"
#include "diff.h"
#include "commit.h"
#include "revision.h"
#include "run-command.h"
#include "diffcore.h"
#include "refs.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
#include "string-list.h"
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
#include "sha1-array.h"
#include "argv-array.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
Fix sparse warnings Fix warnings from 'make check'. - These files don't include 'builtin.h' causing sparse to complain that cmd_* isn't declared: builtin/clone.c:364, builtin/fetch-pack.c:797, builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78, builtin/merge-index.c:69, builtin/merge-recursive.c:22 builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426 builtin/notes.c:822, builtin/pack-redundant.c:596, builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149, builtin/remote.c:1512, builtin/remote-ext.c:240, builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384, builtin/unpack-file.c:25, builtin/var.c:75 - These files have symbols which should be marked static since they're only file scope: submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13, submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79, unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123, url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48 - These files redeclare symbols to be different types: builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571, usage.c:49, usage.c:58, usage.c:63, usage.c:72 - These files use a literal integer 0 when they really should use a NULL pointer: daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362 While we're in the area, clean up some unused #includes in builtin files (mostly exec_cmd.h). Signed-off-by: Stephen Boyd <bebarino@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
static struct string_list config_name_for_path;
static struct string_list config_fetch_recurse_submodules_for_name;
static struct string_list config_ignore_for_name;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
static struct string_list changed_submodule_paths;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
static int initialized_fetch_ref_tips;
static struct sha1_array ref_tips_before_fetch;
static struct sha1_array ref_tips_after_fetch;
/*
* The following flag is set if the .gitmodules file is unmerged. We then
* disable recursion for all submodules where .git/config doesn't have a
* matching config entry because we can't guess what might be configured in
* .gitmodules unless the user resolves the conflict. When a command line
* option is given (which always overrides configuration) this flag will be
* ignored.
*/
static int gitmodules_is_unmerged;
static int add_submodule_odb(const char *path)
{
struct strbuf objects_directory = STRBUF_INIT;
struct alternate_object_database *alt_odb;
int ret = 0;
const char *git_dir;
strbuf_addf(&objects_directory, "%s/.git", path);
git_dir = read_gitfile(objects_directory.buf);
if (git_dir) {
strbuf_reset(&objects_directory);
strbuf_addstr(&objects_directory, git_dir);
}
strbuf_addstr(&objects_directory, "/objects/");
if (!is_directory(objects_directory.buf)) {
ret = -1;
goto done;
}
/* avoid adding it twice */
for (alt_odb = alt_odb_list; alt_odb; alt_odb = alt_odb->next)
if (alt_odb->name - alt_odb->base == objects_directory.len &&
!strncmp(alt_odb->base, objects_directory.buf,
objects_directory.len))
goto done;
alt_odb = xmalloc(objects_directory.len + 42 + sizeof(*alt_odb));
alt_odb->next = alt_odb_list;
strcpy(alt_odb->base, objects_directory.buf);
alt_odb->name = alt_odb->base + objects_directory.len;
alt_odb->name[2] = '/';
alt_odb->name[40] = '\0';
alt_odb->name[41] = '\0';
alt_odb_list = alt_odb;
/* add possible alternates from the submodule */
read_info_alternates(objects_directory.buf, 0);
prepare_alt_odb();
done:
strbuf_release(&objects_directory);
return ret;
}
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
const char *path)
{
struct string_list_item *path_option, *ignore_option;
path_option = unsorted_string_list_lookup(&config_name_for_path, path);
if (path_option) {
ignore_option = unsorted_string_list_lookup(&config_ignore_for_name, path_option->util);
if (ignore_option)
handle_ignore_submodules_arg(diffopt, ignore_option->util);
else if (gitmodules_is_unmerged)
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
}
}
int submodule_config(const char *var, const char *value, void *cb)
{
if (!prefixcmp(var, "submodule."))
return parse_submodule_config_option(var, value);
else if (!strcmp(var, "fetch.recursesubmodules")) {
config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value);
return 0;
}
return 0;
}
void gitmodules_config(void)
{
const char *work_tree = get_git_work_tree();
if (work_tree) {
struct strbuf gitmodules_path = STRBUF_INIT;
int pos;
strbuf_addstr(&gitmodules_path, work_tree);
strbuf_addstr(&gitmodules_path, "/.gitmodules");
if (read_cache() < 0)
die("index file corrupt");
pos = cache_name_pos(".gitmodules", 11);
if (pos < 0) { /* .gitmodules not found or isn't merged */
pos = -1 - pos;
if (active_nr > pos) { /* there is a .gitmodules */
const struct cache_entry *ce = active_cache[pos];
if (ce_namelen(ce) == 11 &&
!memcmp(ce->name, ".gitmodules", 11))
gitmodules_is_unmerged = 1;
}
}
if (!gitmodules_is_unmerged)
git_config_from_file(submodule_config, gitmodules_path.buf, NULL);
strbuf_release(&gitmodules_path);
}
}
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
int parse_submodule_config_option(const char *var, const char *value)
{
int len;
struct string_list_item *config;
struct strbuf submodname = STRBUF_INIT;
var += 10; /* Skip "submodule." */
len = strlen(var);
if ((len > 5) && !strcmp(var + len - 5, ".path")) {
strbuf_add(&submodname, var, len - 5);
config = unsorted_string_list_lookup(&config_name_for_path, value);
if (config)
free(config->util);
else
config = string_list_append(&config_name_for_path, xstrdup(value));
config->util = strbuf_detach(&submodname, NULL);
strbuf_release(&submodname);
} else if ((len > 23) && !strcmp(var + len - 23, ".fetchrecursesubmodules")) {
strbuf_add(&submodname, var, len - 23);
config = unsorted_string_list_lookup(&config_fetch_recurse_submodules_for_name, submodname.buf);
if (!config)
config = string_list_append(&config_fetch_recurse_submodules_for_name,
strbuf_detach(&submodname, NULL));
config->util = (void *)(intptr_t)parse_fetch_recurse_submodules_arg(var, value);
strbuf_release(&submodname);
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
} else if ((len > 7) && !strcmp(var + len - 7, ".ignore")) {
if (strcmp(value, "untracked") && strcmp(value, "dirty") &&
strcmp(value, "all") && strcmp(value, "none")) {
warning("Invalid parameter \"%s\" for config option \"submodule.%s.ignore\"", value, var);
return 0;
}
strbuf_add(&submodname, var, len - 7);
config = unsorted_string_list_lookup(&config_ignore_for_name, submodname.buf);
if (config)
free(config->util);
else
config = string_list_append(&config_ignore_for_name,
strbuf_detach(&submodname, NULL));
strbuf_release(&submodname);
config->util = xstrdup(value);
return 0;
}
return 0;
}
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
void handle_ignore_submodules_arg(struct diff_options *diffopt,
const char *arg)
{
DIFF_OPT_CLR(diffopt, IGNORE_SUBMODULES);
DIFF_OPT_CLR(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES);
DIFF_OPT_CLR(diffopt, IGNORE_DIRTY_SUBMODULES);
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
if (!strcmp(arg, "all"))
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
else if (!strcmp(arg, "untracked"))
DIFF_OPT_SET(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES);
else if (!strcmp(arg, "dirty"))
DIFF_OPT_SET(diffopt, IGNORE_DIRTY_SUBMODULES);
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
else if (strcmp(arg, "none"))
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
die("bad --ignore-submodules argument: %s", arg);
}
static int prepare_submodule_summary(struct rev_info *rev, const char *path,
struct commit *left, struct commit *right,
int *fast_forward, int *fast_backward)
{
struct commit_list *merge_bases, *list;
init_revisions(rev, NULL);
setup_revisions(0, NULL, rev, NULL);
rev->left_right = 1;
rev->first_parent_only = 1;
left->object.flags |= SYMMETRIC_LEFT;
add_pending_object(rev, &left->object, path);
add_pending_object(rev, &right->object, path);
merge_bases = get_merge_bases(left, right, 1);
if (merge_bases) {
if (merge_bases->item == left)
*fast_forward = 1;
else if (merge_bases->item == right)
*fast_backward = 1;
}
for (list = merge_bases; list; list = list->next) {
list->item->object.flags |= UNINTERESTING;
add_pending_object(rev, &list->item->object,
sha1_to_hex(list->item->object.sha1));
}
return prepare_revision_walk(rev);
}
static void print_submodule_summary(struct rev_info *rev, FILE *f,
const char *del, const char *add, const char *reset)
{
static const char format[] = " %m %s";
struct strbuf sb = STRBUF_INIT;
struct commit *commit;
while ((commit = get_revision(rev))) {
struct pretty_print_context ctx = {0};
ctx.date_mode = rev->date_mode;
strbuf_setlen(&sb, 0);
if (commit->object.flags & SYMMETRIC_LEFT) {
if (del)
strbuf_addstr(&sb, del);
}
else if (add)
strbuf_addstr(&sb, add);
format_commit_message(commit, format, &sb, &ctx);
if (reset)
strbuf_addstr(&sb, reset);
strbuf_addch(&sb, '\n');
fprintf(f, "%s", sb.buf);
}
strbuf_release(&sb);
}
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
int parse_fetch_recurse_submodules_arg(const char *opt, const char *arg)
{
switch (git_config_maybe_bool(opt, arg)) {
case 1:
return RECURSE_SUBMODULES_ON;
case 0:
return RECURSE_SUBMODULES_OFF;
default:
if (!strcmp(arg, "on-demand"))
return RECURSE_SUBMODULES_ON_DEMAND;
die("bad %s argument: %s", opt, arg);
}
}
void show_submodule_summary(FILE *f, const char *path,
unsigned char one[20], unsigned char two[20],
unsigned dirty_submodule,
const char *del, const char *add, const char *reset)
{
struct rev_info rev;
struct commit *left = left, *right = right;
const char *message = NULL;
struct strbuf sb = STRBUF_INIT;
int fast_forward = 0, fast_backward = 0;
if (is_null_sha1(two))
message = "(submodule deleted)";
else if (add_submodule_odb(path))
message = "(not checked out)";
else if (is_null_sha1(one))
message = "(new submodule)";
else if (!(left = lookup_commit_reference(one)) ||
!(right = lookup_commit_reference(two)))
message = "(commits not present)";
if (!message &&
prepare_submodule_summary(&rev, path, left, right,
&fast_forward, &fast_backward))
message = "(revision walker failed)";
if (dirty_submodule & DIRTY_SUBMODULE_UNTRACKED)
fprintf(f, "Submodule %s contains untracked content\n", path);
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
fprintf(f, "Submodule %s contains modified content\n", path);
if (!hashcmp(one, two)) {
strbuf_release(&sb);
return;
}
strbuf_addf(&sb, "Submodule %s %s..", path,
find_unique_abbrev(one, DEFAULT_ABBREV));
if (!fast_backward && !fast_forward)
strbuf_addch(&sb, '.');
strbuf_addf(&sb, "%s", find_unique_abbrev(two, DEFAULT_ABBREV));
if (message)
strbuf_addf(&sb, " %s\n", message);
else
strbuf_addf(&sb, "%s:\n", fast_backward ? " (rewind)" : "");
fwrite(sb.buf, sb.len, 1, f);
if (!message) {
print_submodule_summary(&rev, f, del, add, reset);
clear_commit_marks(left, ~0);
clear_commit_marks(right, ~0);
}
strbuf_release(&sb);
}
void set_config_fetch_recurse_submodules(int value)
{
config_fetch_recurse_submodules = value;
}
static int has_remote(const char *refname, const unsigned char *sha1, int flags, void *cb_data)
{
return 1;
}
static int submodule_needs_pushing(const char *path, const unsigned char sha1[20])
{
if (add_submodule_odb(path) || !lookup_commit_reference(sha1))
return 0;
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp;
const char *argv[] = {"rev-list", NULL, "--not", "--remotes", "-n", "1" , NULL};
struct strbuf buf = STRBUF_INIT;
int needs_pushing = 0;
argv[1] = sha1_to_hex(sha1);
memset(&cp, 0, sizeof(cp));
cp.argv = argv;
cp.env = local_repo_env;
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die("Could not run 'git rev-list %s --not --remotes -n 1' command in submodule %s",
sha1_to_hex(sha1), path);
if (strbuf_read(&buf, cp.out, 41))
needs_pushing = 1;
finish_command(&cp);
close(cp.out);
strbuf_release(&buf);
return needs_pushing;
}
return 0;
}
static void collect_submodules_from_diff(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
{
int i;
struct string_list *needs_pushing = data;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (!S_ISGITLINK(p->two->mode))
continue;
if (submodule_needs_pushing(p->two->path, p->two->sha1))
string_list_insert(needs_pushing, p->two->path);
}
}
static void find_unpushed_submodule_commits(struct commit *commit,
struct string_list *needs_pushing)
{
struct rev_info rev;
init_revisions(&rev, NULL);
rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
rev.diffopt.format_callback = collect_submodules_from_diff;
rev.diffopt.format_callback_data = needs_pushing;
diff_tree_combined_merge(commit, 1, &rev);
}
int find_unpushed_submodules(unsigned char new_sha1[20],
const char *remotes_name, struct string_list *needs_pushing)
{
struct rev_info rev;
struct commit *commit;
const char *argv[] = {NULL, NULL, "--not", "NULL", NULL};
int argc = ARRAY_SIZE(argv) - 1;
char *sha1_copy;
struct strbuf remotes_arg = STRBUF_INIT;
strbuf_addf(&remotes_arg, "--remotes=%s", remotes_name);
init_revisions(&rev, NULL);
sha1_copy = xstrdup(sha1_to_hex(new_sha1));
argv[1] = sha1_copy;
argv[3] = remotes_arg.buf;
setup_revisions(argc, argv, &rev, NULL);
if (prepare_revision_walk(&rev))
die("revision walk setup failed");
while ((commit = get_revision(&rev)) != NULL)
find_unpushed_submodule_commits(commit, needs_pushing);
reset_revision_walk();
free(sha1_copy);
strbuf_release(&remotes_arg);
return needs_pushing->nr;
}
static int push_submodule(const char *path)
{
if (add_submodule_odb(path))
return 1;
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp;
const char *argv[] = {"push", NULL};
memset(&cp, 0, sizeof(cp));
cp.argv = argv;
cp.env = local_repo_env;
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
if (run_command(&cp))
return 0;
close(cp.out);
}
return 1;
}
int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name)
{
int i, ret = 1;
struct string_list needs_pushing;
memset(&needs_pushing, 0, sizeof(struct string_list));
needs_pushing.strdup_strings = 1;
if (!find_unpushed_submodules(new_sha1, remotes_name, &needs_pushing))
return 1;
for (i = 0; i < needs_pushing.nr; i++) {
const char *path = needs_pushing.items[i].string;
fprintf(stderr, "Pushing submodule '%s'\n", path);
if (!push_submodule(path)) {
fprintf(stderr, "Unable to push submodule '%s'\n", path);
ret = 0;
}
}
string_list_clear(&needs_pushing, 0);
return ret;
}
static int is_submodule_commit_present(const char *path, unsigned char sha1[20])
{
int is_present = 0;
if (!add_submodule_odb(path) && lookup_commit_reference(sha1)) {
/* Even if the submodule is checked out and the commit is
* present, make sure it is reachable from a ref. */
struct child_process cp;
const char *argv[] = {"rev-list", "-n", "1", NULL, "--not", "--all", NULL};
struct strbuf buf = STRBUF_INIT;
argv[3] = sha1_to_hex(sha1);
memset(&cp, 0, sizeof(cp));
cp.argv = argv;
cp.env = local_repo_env;
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (!run_command(&cp) && !strbuf_read(&buf, cp.out, 1024))
is_present = 1;
close(cp.out);
strbuf_release(&buf);
}
return is_present;
}
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
static void submodule_collect_changed_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
{
int i;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (!S_ISGITLINK(p->two->mode))
continue;
if (S_ISGITLINK(p->one->mode)) {
/* NEEDSWORK: We should honor the name configured in
* the .gitmodules file of the commit we are examining
* here to be able to correctly follow submodules
* being moved around. */
struct string_list_item *path;
path = unsorted_string_list_lookup(&changed_submodule_paths, p->two->path);
if (!path && !is_submodule_commit_present(p->two->path, p->two->sha1))
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
string_list_append(&changed_submodule_paths, xstrdup(p->two->path));
} else {
/* Submodule is new or was moved here */
/* NEEDSWORK: When the .git directories of submodules
* live inside the superprojects .git directory some
* day we should fetch new submodules directly into
* that location too when config or options request
* that so they can be checked out from there. */
continue;
}
}
}
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
static int add_sha1_to_array(const char *ref, const unsigned char *sha1,
int flags, void *data)
{
sha1_array_append(data, sha1);
return 0;
}
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
void check_for_new_submodule_commits(unsigned char new_sha1[20])
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
{
if (!initialized_fetch_ref_tips) {
for_each_ref(add_sha1_to_array, &ref_tips_before_fetch);
initialized_fetch_ref_tips = 1;
}
sha1_array_append(&ref_tips_after_fetch, new_sha1);
}
static void add_sha1_to_argv(const unsigned char sha1[20], void *data)
{
argv_array_push(data, sha1_to_hex(sha1));
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
}
static void calculate_changed_submodule_paths(void)
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
{
struct rev_info rev;
struct commit *commit;
struct argv_array argv = ARGV_ARRAY_INIT;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
/* No need to check if there are no submodules configured */
if (!config_name_for_path.nr)
return;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
init_revisions(&rev, NULL);
argv_array_push(&argv, "--"); /* argv[0] program name */
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
sha1_array_for_each_unique(&ref_tips_after_fetch,
add_sha1_to_argv, &argv);
argv_array_push(&argv, "--not");
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
sha1_array_for_each_unique(&ref_tips_before_fetch,
add_sha1_to_argv, &argv);
setup_revisions(argv.argc, argv.argv, &rev, NULL);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
if (prepare_revision_walk(&rev))
die("revision walk setup failed");
/*
* Collect all submodules (whether checked out or not) for which new
* commits have been recorded upstream in "changed_submodule_paths".
*/
while ((commit = get_revision(&rev))) {
struct commit_list *parent = commit->parents;
while (parent) {
struct diff_options diff_opts;
diff_setup(&diff_opts);
DIFF_OPT_SET(&diff_opts, RECURSIVE);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
diff_opts.output_format |= DIFF_FORMAT_CALLBACK;
diff_opts.format_callback = submodule_collect_changed_cb;
diff_setup_done(&diff_opts);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
diff_tree_sha1(parent->item->object.sha1, commit->object.sha1, "", &diff_opts);
diffcore_std(&diff_opts);
diff_flush(&diff_opts);
parent = parent->next;
}
}
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
argv_array_clear(&argv);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
sha1_array_clear(&ref_tips_before_fetch);
sha1_array_clear(&ref_tips_after_fetch);
initialized_fetch_ref_tips = 0;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
}
int fetch_populated_submodules(const struct argv_array *options,
const char *prefix, int command_line_option,
int quiet)
{
int i, result = 0;
struct child_process cp;
struct argv_array argv = ARGV_ARRAY_INIT;
struct string_list_item *name_for_path;
const char *work_tree = get_git_work_tree();
if (!work_tree)
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
goto out;
if (!the_index.initialized)
if (read_cache() < 0)
die("index file corrupt");
argv_array_push(&argv, "fetch");
for (i = 0; i < options->argc; i++)
argv_array_push(&argv, options->argv[i]);
argv_array_push(&argv, "--recurse-submodules-default");
/* default value, "--submodule-prefix" and its value are added later */
memset(&cp, 0, sizeof(cp));
cp.env = local_repo_env;
cp.git_cmd = 1;
cp.no_stdin = 1;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
calculate_changed_submodule_paths();
for (i = 0; i < active_nr; i++) {
struct strbuf submodule_path = STRBUF_INIT;
struct strbuf submodule_git_dir = STRBUF_INIT;
struct strbuf submodule_prefix = STRBUF_INIT;
struct cache_entry *ce = active_cache[i];
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
const char *git_dir, *name, *default_argv;
if (!S_ISGITLINK(ce->ce_mode))
continue;
name = ce->name;
name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
if (name_for_path)
name = name_for_path->util;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
default_argv = "yes";
if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
struct string_list_item *fetch_recurse_submodules_option;
fetch_recurse_submodules_option = unsorted_string_list_lookup(&config_fetch_recurse_submodules_for_name, name);
if (fetch_recurse_submodules_option) {
if ((intptr_t)fetch_recurse_submodules_option->util == RECURSE_SUBMODULES_OFF)
continue;
if ((intptr_t)fetch_recurse_submodules_option->util == RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
} else {
if ((config_fetch_recurse_submodules == RECURSE_SUBMODULES_OFF) ||
gitmodules_is_unmerged)
continue;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
if (config_fetch_recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
}
} else if (command_line_option == RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
strbuf_addf(&submodule_path, "%s/%s", work_tree, ce->name);
strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf);
strbuf_addf(&submodule_prefix, "%s%s/", prefix, ce->name);
git_dir = read_gitfile(submodule_git_dir.buf);
if (!git_dir)
git_dir = submodule_git_dir.buf;
if (is_directory(git_dir)) {
if (!quiet)
printf("Fetching submodule %s%s\n", prefix, ce->name);
cp.dir = submodule_path.buf;
argv_array_push(&argv, default_argv);
argv_array_push(&argv, "--submodule-prefix");
argv_array_push(&argv, submodule_prefix.buf);
cp.argv = argv.argv;
if (run_command(&cp))
result = 1;
argv_array_pop(&argv);
argv_array_pop(&argv);
argv_array_pop(&argv);
}
strbuf_release(&submodule_path);
strbuf_release(&submodule_git_dir);
strbuf_release(&submodule_prefix);
}
argv_array_clear(&argv);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
out:
string_list_clear(&changed_submodule_paths, 1);
return result;
}
unsigned is_submodule_modified(const char *path, int ignore_untracked)
{
ssize_t len;
struct child_process cp;
const char *argv[] = {
"status",
"--porcelain",
NULL,
NULL,
};
struct strbuf buf = STRBUF_INIT;
unsigned dirty_submodule = 0;
const char *line, *next_line;
const char *git_dir;
strbuf_addf(&buf, "%s/.git", path);
git_dir = read_gitfile(buf.buf);
if (!git_dir)
git_dir = buf.buf;
if (!is_directory(git_dir)) {
strbuf_release(&buf);
/* The submodule is not checked out, so it is not modified */
return 0;
}
strbuf_reset(&buf);
if (ignore_untracked)
argv[2] = "-uno";
memset(&cp, 0, sizeof(cp));
cp.argv = argv;
cp.env = local_repo_env;
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die("Could not run 'git status --porcelain' in submodule %s", path);
len = strbuf_read(&buf, cp.out, 1024);
line = buf.buf;
while (len > 2) {
if ((line[0] == '?') && (line[1] == '?')) {
dirty_submodule |= DIRTY_SUBMODULE_UNTRACKED;
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
break;
} else {
dirty_submodule |= DIRTY_SUBMODULE_MODIFIED;
if (ignore_untracked ||
(dirty_submodule & DIRTY_SUBMODULE_UNTRACKED))
break;
}
next_line = strchr(line, '\n');
if (!next_line)
break;
next_line++;
len -= (next_line - line);
line = next_line;
}
close(cp.out);
if (finish_command(&cp))
die("'git status --porcelain' failed in submodule %s", path);
strbuf_release(&buf);
return dirty_submodule;
}
static int find_first_merges(struct object_array *result, const char *path,
struct commit *a, struct commit *b)
{
int i, j;
struct object_array merges;
struct commit *commit;
int contains_another;
char merged_revision[42];
const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path",
"--all", merged_revision, NULL };
struct rev_info revs;
struct setup_revision_opt rev_opts;
memset(&merges, 0, sizeof(merges));
memset(result, 0, sizeof(struct object_array));
memset(&rev_opts, 0, sizeof(rev_opts));
/* get all revisions that merge commit a */
snprintf(merged_revision, sizeof(merged_revision), "^%s",
sha1_to_hex(a->object.sha1));
init_revisions(&revs, NULL);
rev_opts.submodule = path;
setup_revisions(sizeof(rev_args)/sizeof(char *)-1, rev_args, &revs, &rev_opts);
/* save all revisions from the above list that contain b */
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
while ((commit = get_revision(&revs)) != NULL) {
struct object *o = &(commit->object);
if (in_merge_bases(b, commit))
add_object_array(o, NULL, &merges);
}
reset_revision_walk();
/* Now we've got all merges that contain a and b. Prune all
* merges that contain another found merge and save them in
* result.
*/
for (i = 0; i < merges.nr; i++) {
struct commit *m1 = (struct commit *) merges.objects[i].item;
contains_another = 0;
for (j = 0; j < merges.nr; j++) {
struct commit *m2 = (struct commit *) merges.objects[j].item;
if (i != j && in_merge_bases(m2, m1)) {
contains_another = 1;
break;
}
}
if (!contains_another)
add_object_array(merges.objects[i].item,
merges.objects[i].name, result);
}
free(merges.objects);
return result->nr;
}
static void print_commit(struct commit *commit)
{
struct strbuf sb = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.date_mode = DATE_NORMAL;
format_commit_message(commit, " %h: %m %s", &sb, &ctx);
fprintf(stderr, "%s\n", sb.buf);
strbuf_release(&sb);
}
#define MERGE_WARNING(path, msg) \
warning("Failed to merge submodule %s (%s)", path, msg);
int merge_submodule(unsigned char result[20], const char *path,
const unsigned char base[20], const unsigned char a[20],
const unsigned char b[20], int search)
{
struct commit *commit_base, *commit_a, *commit_b;
int parent_count;
struct object_array merges;
int i;
/* store a in result in case we fail */
hashcpy(result, a);
/* we can not handle deletion conflicts */
if (is_null_sha1(base))
return 0;
if (is_null_sha1(a))
return 0;
if (is_null_sha1(b))
return 0;
if (add_submodule_odb(path)) {
MERGE_WARNING(path, "not checked out");
return 0;
}
if (!(commit_base = lookup_commit_reference(base)) ||
!(commit_a = lookup_commit_reference(a)) ||
!(commit_b = lookup_commit_reference(b))) {
MERGE_WARNING(path, "commits not present");
return 0;
}
/* check whether both changes are forward */
if (!in_merge_bases(commit_base, commit_a) ||
!in_merge_bases(commit_base, commit_b)) {
MERGE_WARNING(path, "commits don't follow merge-base");
return 0;
}
/* Case #1: a is contained in b or vice versa */
if (in_merge_bases(commit_a, commit_b)) {
hashcpy(result, b);
return 1;
}
if (in_merge_bases(commit_b, commit_a)) {
hashcpy(result, a);
return 1;
}
/*
* Case #2: There are one or more merges that contain a and b in
* the submodule. If there is only one, then present it as a
* suggestion to the user, but leave it marked unmerged so the
* user needs to confirm the resolution.
*/
/* Skip the search if makes no sense to the calling context. */
if (!search)
return 0;
/* find commit which merges them */
parent_count = find_first_merges(&merges, path, commit_a, commit_b);
switch (parent_count) {
case 0:
MERGE_WARNING(path, "merge following commits not found");
break;
case 1:
MERGE_WARNING(path, "not fast-forward");
fprintf(stderr, "Found a possible merge resolution "
"for the submodule:\n");
print_commit((struct commit *) merges.objects[0].item);
fprintf(stderr,
"If this is correct simply add it to the index "
"for example\n"
"by using:\n\n"
" git update-index --cacheinfo 160000 %s \"%s\"\n\n"
"which will accept this suggestion.\n",
sha1_to_hex(merges.objects[0].item->sha1), path);
break;
default:
MERGE_WARNING(path, "multiple merges found");
for (i = 0; i < merges.nr; i++)
print_commit((struct commit *) merges.objects[i].item);
}
free(merges.objects);
return 0;
}