You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

669 lines
18 KiB

/*
* "git fetch"
*/
#include "cache.h"
#include "refs.h"
#include "commit.h"
#include "builtin.h"
#include "string-list.h"
#include "remote.h"
#include "transport.h"
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
17 years ago
#include "run-command.h"
#include "parse-options.h"
static const char * const builtin_fetch_usage[] = {
"git fetch [options] [<repository> <refspec>...]",
NULL
};
enum {
TAGS_UNSET = 0,
TAGS_DEFAULT = 1,
TAGS_SET = 2
};
static int append, force, keep, update_head_ok, verbose, quiet;
static int tags = TAGS_DEFAULT;
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
17 years ago
static const char *depth;
static const char *upload_pack;
static struct strbuf default_rla = STRBUF_INIT;
static struct transport *transport;
static struct option builtin_fetch_options[] = {
OPT__QUIET(&quiet),
OPT__VERBOSE(&verbose),
OPT_BOOLEAN('a', "append", &append,
"append to .git/FETCH_HEAD instead of overwriting"),
OPT_STRING(0, "upload-pack", &upload_pack, "PATH",
"path to upload pack on remote end"),
OPT_BOOLEAN('f', "force", &force,
"force overwrite of local branch"),
OPT_SET_INT('t', "tags", &tags,
"fetch all tags and associated objects", TAGS_SET),
OPT_SET_INT('n', NULL, &tags,
"do not fetch all tags (--no-tags)", TAGS_UNSET),
OPT_BOOLEAN('k', "keep", &keep, "keep downloaded pack"),
OPT_BOOLEAN('u', "update-head-ok", &update_head_ok,
"allow updating of HEAD ref"),
OPT_STRING(0, "depth", &depth, "DEPTH",
"deepen history of shallow clone"),
OPT_END()
};
static void unlock_pack(void)
{
if (transport)
transport_unlock_pack(transport);
}
static void unlock_pack_on_signal(int signo)
{
unlock_pack();
signal(SIGINT, SIG_DFL);
raise(signo);
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
static void add_merge_config(struct ref **head,
const struct ref *remote_refs,
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
struct branch *branch,
struct ref ***tail)
{
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
int i;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
for (i = 0; i < branch->merge_nr; i++) {
struct ref *rm, **old_tail = *tail;
struct refspec refspec;
for (rm = *head; rm; rm = rm->next) {
if (branch_merge_matches(branch, i, rm->name)) {
rm->merge = 1;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
break;
}
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
if (rm)
continue;
/*
* Not fetched to a tracking branch? We need to fetch
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
* it anyway to allow this branch's "branch.$name.merge"
* to be honored by 'git pull', but we do not have to
* fail if branch.$name.merge is misconfigured to point
* at a nonexisting branch. If we were indeed called by
* 'git pull', it will notice the misconfiguration because
* there is no entry in the resulting FETCH_HEAD marked
* for merging.
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
*/
refspec.src = branch->merge[i]->src;
refspec.dst = NULL;
refspec.pattern = 0;
refspec.force = 0;
get_fetch_map(remote_refs, &refspec, tail, 1);
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
for (rm = *old_tail; rm; rm = rm->next)
rm->merge = 1;
}
}
static void find_non_local_tags(struct transport *transport,
struct ref **head,
struct ref ***tail);
static struct ref *get_ref_map(struct transport *transport,
struct refspec *refs, int ref_count, int tags,
int *autotags)
{
int i;
struct ref *rm;
struct ref *ref_map = NULL;
struct ref **tail = &ref_map;
const struct ref *remote_refs = transport_get_remote_refs(transport);
if (ref_count || tags == TAGS_SET) {
for (i = 0; i < ref_count; i++) {
get_fetch_map(remote_refs, &refs[i], &tail, 0);
if (refs[i].dst && refs[i].dst[0])
*autotags = 1;
}
/* Merge everything on the command line, but not --tags */
for (rm = ref_map; rm; rm = rm->next)
rm->merge = 1;
if (tags == TAGS_SET)
get_fetch_map(remote_refs, tag_refspec, &tail, 0);
} else {
/* Use the defaults */
struct remote *remote = transport->remote;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
struct branch *branch = branch_get(NULL);
int has_merge = branch_has_merge_config(branch);
if (remote && (remote->fetch_refspec_nr || has_merge)) {
for (i = 0; i < remote->fetch_refspec_nr; i++) {
get_fetch_map(remote_refs, &remote->fetch[i], &tail, 0);
if (remote->fetch[i].dst &&
remote->fetch[i].dst[0])
*autotags = 1;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
if (!i && !has_merge && ref_map &&
!remote->fetch[0].pattern)
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
ref_map->merge = 1;
}
/*
* if the remote we're fetching from is the same
* as given in branch.<name>.remote, we add the
* ref given in branch.<name>.merge, too.
*/
if (has_merge &&
!strcmp(branch->remote_name, remote->name))
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
18 years ago
add_merge_config(&ref_map, remote_refs, branch, &tail);
} else {
ref_map = get_remote_ref(remote_refs, "HEAD");
if (!ref_map)
die("Couldn't find remote ref HEAD");
ref_map->merge = 1;
tail = &ref_map->next;
}
}
if (tags == TAGS_DEFAULT && *autotags)
find_non_local_tags(transport, &ref_map, &tail);
ref_remove_duplicates(ref_map);
return ref_map;
}
static int s_update_ref(const char *action,
struct ref *ref,
int check_old)
{
char msg[1024];
char *rla = getenv("GIT_REFLOG_ACTION");
static struct ref_lock *lock;
if (!rla)
rla = default_rla.buf;
snprintf(msg, sizeof(msg), "%s: %s", rla, action);
lock = lock_any_ref_for_update(ref->name,
check_old ? ref->old_sha1 : NULL, 0);
if (!lock)
return 2;
if (write_ref_sha1(lock, ref->new_sha1, msg) < 0)
return 2;
return 0;
}
#define SUMMARY_WIDTH (2 * DEFAULT_ABBREV + 3)
#define REFCOL_WIDTH 10
static int update_local_ref(struct ref *ref,
const char *remote,
int verbose,
char *display)
{
struct commit *current = NULL, *updated;
enum object_type type;
struct branch *current_branch = branch_get(NULL);
const char *pretty_ref = ref->name + (
!prefixcmp(ref->name, "refs/heads/") ? 11 :
!prefixcmp(ref->name, "refs/tags/") ? 10 :
!prefixcmp(ref->name, "refs/remotes/") ? 13 :
0);
*display = 0;
type = sha1_object_info(ref->new_sha1, NULL);
if (type < 0)
die("object %s not found", sha1_to_hex(ref->new_sha1));
if (!hashcmp(ref->old_sha1, ref->new_sha1)) {
if (verbose)
sprintf(display, "= %-*s %-*s -> %s", SUMMARY_WIDTH,
"[up to date]", REFCOL_WIDTH, remote,
pretty_ref);
return 0;
}
if (current_branch &&
!strcmp(ref->name, current_branch->name) &&
!(update_head_ok || is_bare_repository()) &&
!is_null_sha1(ref->old_sha1)) {
/*
* If this is the head, and it's not okay to update
* the head, and the old value of the head isn't empty...
*/
sprintf(display, "! %-*s %-*s -> %s (can't fetch in current branch)",
SUMMARY_WIDTH, "[rejected]", REFCOL_WIDTH, remote,
pretty_ref);
return 1;
}
if (!is_null_sha1(ref->old_sha1) &&
!prefixcmp(ref->name, "refs/tags/")) {
int r;
r = s_update_ref("updating tag", ref, 0);
sprintf(display, "%c %-*s %-*s -> %s%s", r ? '!' : '-',
SUMMARY_WIDTH, "[tag update]", REFCOL_WIDTH, remote,
pretty_ref, r ? " (unable to update local ref)" : "");
return r;
}
current = lookup_commit_reference_gently(ref->old_sha1, 1);
updated = lookup_commit_reference_gently(ref->new_sha1, 1);
if (!current || !updated) {
const char *msg;
const char *what;
int r;
if (!strncmp(ref->name, "refs/tags/", 10)) {
msg = "storing tag";
what = "[new tag]";
}
else {
msg = "storing head";
what = "[new branch]";
}
r = s_update_ref(msg, ref, 0);
sprintf(display, "%c %-*s %-*s -> %s%s", r ? '!' : '*',
SUMMARY_WIDTH, what, REFCOL_WIDTH, remote, pretty_ref,
r ? " (unable to update local ref)" : "");
return r;
}
if (in_merge_bases(current, &updated, 1)) {
char quickref[83];
int r;
strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV));
strcat(quickref, "..");
strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV));
r = s_update_ref("fast forward", ref, 1);
sprintf(display, "%c %-*s %-*s -> %s%s", r ? '!' : ' ',
SUMMARY_WIDTH, quickref, REFCOL_WIDTH, remote,
pretty_ref, r ? " (unable to update local ref)" : "");
return r;
} else if (force || ref->force) {
char quickref[84];
int r;
strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV));
strcat(quickref, "...");
strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV));
r = s_update_ref("forced-update", ref, 1);
sprintf(display, "%c %-*s %-*s -> %s (%s)", r ? '!' : '+',
SUMMARY_WIDTH, quickref, REFCOL_WIDTH, remote,
pretty_ref,
r ? "unable to update local ref" : "forced update");
return r;
} else {
sprintf(display, "! %-*s %-*s -> %s (non fast forward)",
SUMMARY_WIDTH, "[rejected]", REFCOL_WIDTH, remote,
pretty_ref);
return 1;
}
}
static int store_updated_refs(const char *url, const char *remote_name,
struct ref *ref_map)
{
FILE *fp;
struct commit *commit;
int url_len, i, note_len, shown_url = 0, rc = 0;
char note[1024];
const char *what, *kind;
struct ref *rm;
char *filename = git_path("FETCH_HEAD");
fp = fopen(filename, "a");
if (!fp)
return error("cannot open %s: %s\n", filename, strerror(errno));
for (rm = ref_map; rm; rm = rm->next) {
struct ref *ref = NULL;
if (rm->peer_ref) {
ref = xcalloc(1, sizeof(*ref) + strlen(rm->peer_ref->name) + 1);
strcpy(ref->name, rm->peer_ref->name);
hashcpy(ref->old_sha1, rm->peer_ref->old_sha1);
hashcpy(ref->new_sha1, rm->old_sha1);
ref->force = rm->peer_ref->force;
}
commit = lookup_commit_reference_gently(rm->old_sha1, 1);
if (!commit)
rm->merge = 0;
if (!strcmp(rm->name, "HEAD")) {
kind = "";
what = "";
}
else if (!prefixcmp(rm->name, "refs/heads/")) {
kind = "branch";
what = rm->name + 11;
}
else if (!prefixcmp(rm->name, "refs/tags/")) {
kind = "tag";
what = rm->name + 10;
}
else if (!prefixcmp(rm->name, "refs/remotes/")) {
kind = "remote branch";
what = rm->name + 13;
}
else {
kind = "";
what = rm->name;
}
url_len = strlen(url);
for (i = url_len - 1; url[i] == '/' && 0 <= i; i--)
;
url_len = i + 1;
if (4 < i && !strncmp(".git", url + i - 3, 4))
url_len = i - 3;
note_len = 0;
if (*what) {
if (*kind)
note_len += sprintf(note + note_len, "%s ",
kind);
note_len += sprintf(note + note_len, "'%s' of ", what);
}
note_len += sprintf(note + note_len, "%.*s", url_len, url);
fprintf(fp, "%s\t%s\t%s\n",
sha1_to_hex(commit ? commit->object.sha1 :
rm->old_sha1),
rm->merge ? "" : "not-for-merge",
note);
if (ref)
rc |= update_local_ref(ref, what, verbose, note);
else
sprintf(note, "* %-*s %-*s -> FETCH_HEAD",
SUMMARY_WIDTH, *kind ? kind : "branch",
REFCOL_WIDTH, *what ? what : "HEAD");
if (*note) {
if (!shown_url) {
fprintf(stderr, "From %.*s\n",
url_len, url);
shown_url = 1;
}
fprintf(stderr, " %s\n", note);
}
}
fclose(fp);
if (rc & 2)
error("some local refs could not be updated; try running\n"
" 'git remote prune %s' to remove any old, conflicting "
"branches", remote_name);
return rc;
}
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
17 years ago
/*
* We would want to bypass the object transfer altogether if
* everything we are going to fetch already exists and connected
* locally.
*
* The refs we are going to fetch are in to_fetch (nr_heads in
* total). If running
*
* $ git rev-list --objects to_fetch[0] to_fetch[1] ... --not --all
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
17 years ago
*
* does not error out, that means everything reachable from the
* refs we are going to fetch exists and is connected to some of
* our existing refs.
*/
static int quickfetch(struct ref *ref_map)
{
struct child_process revlist;
struct ref *ref;
char **argv;
int i, err;
/*
* If we are deepening a shallow clone we already have these
* objects reachable. Running rev-list here will return with
* a good (0) exit status and we'll bypass the fetch that we
* really need to perform. Claiming failure now will ensure
* we perform the network exchange to deepen our history.
*/
if (depth)
return -1;
for (i = 0, ref = ref_map; ref; ref = ref->next)
i++;
if (!i)
return 0;
argv = xmalloc(sizeof(*argv) * (i + 6));
i = 0;
argv[i++] = xstrdup("rev-list");
argv[i++] = xstrdup("--quiet");
argv[i++] = xstrdup("--objects");
for (ref = ref_map; ref; ref = ref->next)
argv[i++] = xstrdup(sha1_to_hex(ref->old_sha1));
argv[i++] = xstrdup("--not");
argv[i++] = xstrdup("--all");
argv[i++] = NULL;
memset(&revlist, 0, sizeof(revlist));
revlist.argv = (const char**)argv;
revlist.git_cmd = 1;
revlist.no_stdin = 1;
revlist.no_stdout = 1;
revlist.no_stderr = 1;
err = run_command(&revlist);
for (i = 0; argv[i]; i++)
free(argv[i]);
free(argv);
return err;
}
static int fetch_refs(struct transport *transport, struct ref *ref_map)
{
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
17 years ago
int ret = quickfetch(ref_map);
if (ret)
ret = transport_fetch_refs(transport, ref_map);
if (!ret)
ret |= store_updated_refs(transport->url,
transport->remote->name,
ref_map);
transport_unlock_pack(transport);
return ret;
}
static int add_existing(const char *refname, const unsigned char *sha1,
int flag, void *cbdata)
{
struct string_list *list = (struct string_list *)cbdata;
string_list_insert(refname, list);
return 0;
}
static int will_fetch(struct ref **head, const unsigned char *sha1)
{
struct ref *rm = *head;
while (rm) {
if (!hashcmp(rm->old_sha1, sha1))
return 1;
rm = rm->next;
}
return 0;
}
static void find_non_local_tags(struct transport *transport,
struct ref **head,
struct ref ***tail)
{
struct string_list existing_refs = { NULL, 0, 0, 0 };
struct string_list new_refs = { NULL, 0, 0, 1 };
char *ref_name;
int ref_name_len;
const unsigned char *ref_sha1;
const struct ref *tag_ref;
struct ref *rm = NULL;
const struct ref *ref;
for_each_ref(add_existing, &existing_refs);
for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
if (prefixcmp(ref->name, "refs/tags"))
continue;
ref_name = xstrdup(ref->name);
ref_name_len = strlen(ref_name);
ref_sha1 = ref->old_sha1;
if (!strcmp(ref_name + ref_name_len - 3, "^{}")) {
ref_name[ref_name_len - 3] = 0;
tag_ref = transport_get_remote_refs(transport);
while (tag_ref) {
if (!strcmp(tag_ref->name, ref_name)) {
ref_sha1 = tag_ref->old_sha1;
break;
}
tag_ref = tag_ref->next;
}
}
if (!string_list_has_string(&existing_refs, ref_name) &&
!string_list_has_string(&new_refs, ref_name) &&
(has_sha1_file(ref->old_sha1) ||
will_fetch(head, ref->old_sha1))) {
string_list_insert(ref_name, &new_refs);
rm = alloc_ref_from_str(ref_name);
rm->peer_ref = alloc_ref_from_str(ref_name);
hashcpy(rm->old_sha1, ref_sha1);
**tail = rm;
*tail = &rm->next;
}
free(ref_name);
}
string_list_clear(&existing_refs, 0);
string_list_clear(&new_refs, 0);
}
static int do_fetch(struct transport *transport,
struct refspec *refs, int ref_count)
{
struct ref *ref_map;
struct ref *rm;
int autotags = (transport->remote->fetch_tags == 1);
if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET)
tags = TAGS_SET;
if (transport->remote->fetch_tags == -1)
tags = TAGS_UNSET;
if (!transport->get_refs_list || !transport->fetch)
die("Don't know how to fetch from %s", transport->url);
/* if not appending, truncate FETCH_HEAD */
if (!append) {
char *filename = git_path("FETCH_HEAD");
FILE *fp = fopen(filename, "w");
if (!fp)
return error("cannot open %s: %s\n", filename, strerror(errno));
fclose(fp);
}
ref_map = get_ref_map(transport, refs, ref_count, tags, &autotags);
for (rm = ref_map; rm; rm = rm->next) {
if (rm->peer_ref)
read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1);
}
if (tags == TAGS_DEFAULT && autotags)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
if (fetch_refs(transport, ref_map)) {
free_refs(ref_map);
return 1;
}
free_refs(ref_map);
/* if neither --no-tags nor --tags was specified, do automated tag
* following ... */
if (tags == TAGS_DEFAULT && autotags) {
struct ref **tail = &ref_map;
ref_map = NULL;
find_non_local_tags(transport, &ref_map, &tail);
if (ref_map) {
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
fetch_refs(transport, ref_map);
}
free_refs(ref_map);
}
return 0;
}
static void set_option(const char *name, const char *value)
{
int r = transport_set_option(transport, name, value);
if (r < 0)
die("Option \"%s\" value \"%s\" is not valid for %s\n",
name, value, transport->url);
if (r > 0)
warning("Option \"%s\" is ignored for %s\n",
name, transport->url);
}
int cmd_fetch(int argc, const char **argv, const char *prefix)
{
struct remote *remote;
int i;
static const char **refs = NULL;
int ref_nr = 0;
int exit_code;
/* Record the command line for the reflog */
strbuf_addstr(&default_rla, "fetch");
for (i = 1; i < argc; i++)
strbuf_addf(&default_rla, " %s", argv[i]);
argc = parse_options(argc, argv,
builtin_fetch_options, builtin_fetch_usage, 0);
if (argc == 0)
remote = remote_get(NULL);
else
remote = remote_get(argv[0]);
transport = transport_get(remote, remote->url[0]);
if (verbose >= 2)
transport->verbose = 1;
if (quiet)
transport->verbose = -1;
if (upload_pack)
set_option(TRANS_OPT_UPLOADPACK, upload_pack);
if (keep)
set_option(TRANS_OPT_KEEP, "yes");
if (depth)
set_option(TRANS_OPT_DEPTH, depth);
if (!transport->url)
die("Where do you want to fetch from today?");
if (argc > 1) {
int j = 0;
refs = xcalloc(argc + 1, sizeof(const char *));
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "tag")) {
char *ref;
i++;
if (i >= argc)
die("You need to specify a tag name.");
ref = xmalloc(strlen(argv[i]) * 2 + 22);
strcpy(ref, "refs/tags/");
strcat(ref, argv[i]);
strcat(ref, ":refs/tags/");
strcat(ref, argv[i]);
refs[j++] = ref;
} else
refs[j++] = argv[i];
}
refs[j] = NULL;
ref_nr = j;
}
signal(SIGINT, unlock_pack_on_signal);
atexit(unlock_pack);
exit_code = do_fetch(transport,
parse_fetch_refspec(ref_nr, refs), ref_nr);
transport_disconnect(transport);
transport = NULL;
return exit_code;
}