|
|
|
#include "cache.h"
|
|
|
|
#include "repository.h"
|
|
|
|
#include "tempfile.h"
|
|
|
|
#include "lockfile.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "pkt-line.h"
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
#include "remote.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "oid-array.h"
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
#include "diff.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "commit-slab.h"
|
shallow.c: implement a generic shallow boundary finder based on rev-list
Instead of a custom commit walker like get_shallow_commits(), this new
function uses rev-list to mark NOT_SHALLOW to all reachable commits,
except borders. The definition of reachable is to be defined by the
protocol later. This makes it more flexible to define shallow boundary.
The way we find border is paint all reachable commits NOT_SHALLOW. Any
of them that "touches" commits without NOT_SHALLOW flag are considered
shallow (e.g. zero parents via grafting mechanism). Shallow commits and
their true parents are all marked SHALLOW. Then NOT_SHALLOW is removed
from shallow commits at the end.
There is an interesting observation. With a generic walker, we can
produce all kinds of shallow cutting. In the following graph, every
commit but "x" is reachable. "b" is a parent of "a".
x -- a -- o
/ /
x -- c -- b -- o
After this function is run, "a" and "c" are both considered shallow
commits. After grafting occurs at the client side, what we see is
a -- o
/
c -- b -- o
Notice that because of grafting, "a" has zero parents, so "b" is no
longer a parent of "a".
This is unfortunate and may be solved in two ways. The first is change
the way shallow grafting works and keep "a -- b" connection if "b"
exists and always ends at shallow commits (iow, no loose ends). This is
hard to detect, or at least not cheap to do.
The second way is mark one "x" as shallow commit instead of "a" and
produce this graph at client side:
x -- a -- o
/ /
c -- b -- o
More commits, but simpler grafting rules.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
#include "list-objects.h"
|
|
|
|
#include "commit-reach.h"
|
|
|
|
#include "shallow.h"
|
|
|
|
|
|
|
|
void set_alternate_shallow_file(struct repository *r, const char *path, int override)
|
|
|
|
{
|
|
|
|
if (r->parsed_objects->is_shallow != -1)
|
|
|
|
BUG("is_repository_shallow must not be called before set_alternate_shallow_file");
|
|
|
|
if (r->parsed_objects->alternate_shallow_file && !override)
|
|
|
|
return;
|
|
|
|
free(r->parsed_objects->alternate_shallow_file);
|
|
|
|
r->parsed_objects->alternate_shallow_file = xstrdup_or_null(path);
|
|
|
|
}
|
|
|
|
|
|
|
|
int register_shallow(struct repository *r, const struct object_id *oid)
|
|
|
|
{
|
|
|
|
struct commit_graft *graft =
|
|
|
|
xmalloc(sizeof(struct commit_graft));
|
|
|
|
struct commit *commit = lookup_commit(the_repository, oid);
|
|
|
|
|
|
|
|
oidcpy(&graft->oid, oid);
|
|
|
|
graft->nr_parent = -1;
|
|
|
|
if (commit && commit->object.parsed)
|
|
|
|
commit->parents = NULL;
|
|
|
|
return register_commit_graft(r, graft, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int unregister_shallow(const struct object_id *oid)
|
|
|
|
{
|
|
|
|
int pos = commit_graft_pos(the_repository, oid->hash);
|
|
|
|
if (pos < 0)
|
|
|
|
return -1;
|
|
|
|
if (pos + 1 < the_repository->parsed_objects->grafts_nr)
|
|
|
|
MOVE_ARRAY(the_repository->parsed_objects->grafts + pos,
|
|
|
|
the_repository->parsed_objects->grafts + pos + 1,
|
|
|
|
the_repository->parsed_objects->grafts_nr - pos - 1);
|
|
|
|
the_repository->parsed_objects->grafts_nr--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_repository_shallow(struct repository *r)
|
|
|
|
{
|
|
|
|
FILE *fp;
|
|
|
|
char buf[1024];
|
|
|
|
const char *path = r->parsed_objects->alternate_shallow_file;
|
|
|
|
|
|
|
|
if (r->parsed_objects->is_shallow >= 0)
|
|
|
|
return r->parsed_objects->is_shallow;
|
|
|
|
|
|
|
|
if (!path)
|
|
|
|
path = git_path_shallow(r);
|
|
|
|
/*
|
|
|
|
* fetch-pack sets '--shallow-file ""' as an indicator that no
|
|
|
|
* shallow file should be used. We could just open it and it
|
|
|
|
* will likely fail. But let's do an explicit check instead.
|
|
|
|
*/
|
|
|
|
if (!*path || (fp = fopen(path, "r")) == NULL) {
|
|
|
|
stat_validity_clear(r->parsed_objects->shallow_stat);
|
|
|
|
r->parsed_objects->is_shallow = 0;
|
|
|
|
return r->parsed_objects->is_shallow;
|
|
|
|
}
|
|
|
|
stat_validity_update(r->parsed_objects->shallow_stat, fileno(fp));
|
|
|
|
r->parsed_objects->is_shallow = 1;
|
|
|
|
|
|
|
|
while (fgets(buf, sizeof(buf), fp)) {
|
|
|
|
struct object_id oid;
|
|
|
|
if (get_oid_hex(buf, &oid))
|
|
|
|
die("bad shallow line: %s", buf);
|
|
|
|
register_shallow(r, &oid);
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
return r->parsed_objects->is_shallow;
|
|
|
|
}
|
|
|
|
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
static void reset_repository_shallow(struct repository *r)
|
|
|
|
{
|
|
|
|
r->parsed_objects->is_shallow = -1;
|
|
|
|
stat_validity_clear(r->parsed_objects->shallow_stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
int commit_shallow_file(struct repository *r, struct shallow_lock *lk)
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
{
|
|
|
|
int res = commit_lock_file(&lk->lock);
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
reset_repository_shallow(r);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
{
|
|
|
|
rollback_lock_file(&lk->lock);
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
reset_repository_shallow(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO: use "int" elemtype instead of "int *" when/if commit-slab
|
|
|
|
* supports a "valid" flag.
|
|
|
|
*/
|
|
|
|
define_commit_slab(commit_depth, int *);
|
|
|
|
static void free_depth_in_slab(int **ptr)
|
|
|
|
{
|
|
|
|
FREE_AND_NULL(*ptr);
|
|
|
|
}
|
|
|
|
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
|
|
|
|
int shallow_flag, int not_shallow_flag)
|
|
|
|
{
|
|
|
|
int i = 0, cur_depth = 0;
|
|
|
|
struct commit_list *result = NULL;
|
|
|
|
struct object_array stack = OBJECT_ARRAY_INIT;
|
|
|
|
struct commit *commit = NULL;
|
|
|
|
struct commit_graft *graft;
|
|
|
|
struct commit_depth depths;
|
|
|
|
|
|
|
|
init_commit_depth(&depths);
|
|
|
|
while (commit || i < heads->nr || stack.nr) {
|
|
|
|
struct commit_list *p;
|
|
|
|
if (!commit) {
|
|
|
|
if (i < heads->nr) {
|
|
|
|
int **depth_slot;
|
|
|
|
commit = (struct commit *)
|
|
|
|
deref_tag(the_repository,
|
|
|
|
heads->objects[i++].item,
|
|
|
|
NULL, 0);
|
|
|
|
if (!commit || commit->object.type != OBJ_COMMIT) {
|
|
|
|
commit = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
depth_slot = commit_depth_at(&depths, commit);
|
|
|
|
if (!*depth_slot)
|
|
|
|
*depth_slot = xmalloc(sizeof(int));
|
|
|
|
**depth_slot = 0;
|
|
|
|
cur_depth = 0;
|
|
|
|
} else {
|
|
|
|
commit = (struct commit *)
|
object_array: add and use `object_array_pop()`
In a couple of places, we pop objects off an object array `foo` by
decreasing `foo.nr`. We access `foo.nr` in many places, but most if not
all other times we do so read-only, e.g., as we iterate over the array.
But when we change `foo.nr` behind the array's back, it feels a bit
nasty and looks like it might leak memory.
Leaks happen if the popped element has an allocated `name` or `path`.
At the moment, that is not the case. Still, 1) the object array might
gain more fields that want to be freed, 2) a code path where we pop
might start using names or paths, 3) one of these code paths might be
copied to somewhere where we do, and 4) using a dedicated function for
popping is conceptually cleaner.
Introduce and use `object_array_pop()` instead. Release memory in the
new function. Document that popping an object leaves the associated
elements in limbo.
The converted places were identified by grepping for "\.nr\>" and
looking for "--".
Make the new function return NULL on an empty array. This is consistent
with `pop_commit()` and allows the following:
while ((o = object_array_pop(&foo)) != NULL) {
// do something
}
But as noted above, we don't need to go out of our way to avoid reading
`foo.nr`. This is probably more readable:
while (foo.nr) {
... o = object_array_pop(&foo);
// do something
}
The name of `object_array_pop()` does not quite align with
`add_object_array()`. That is unfortunate. On the other hand, it matches
`object_array_clear()`. Arguably it's `add_...` that is the odd one out,
since it reads like it's used to "add" an "object array". For that
reason, side with `object_array_clear()`.
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
object_array_pop(&stack);
|
|
|
|
cur_depth = **commit_depth_at(&depths, commit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
parse_commit_or_die(commit);
|
|
|
|
cur_depth++;
|
|
|
|
if ((depth != INFINITE_DEPTH && cur_depth >= depth) ||
|
|
|
|
(is_repository_shallow(the_repository) && !commit->parents &&
|
|
|
|
(graft = lookup_commit_graft(the_repository, &commit->object.oid)) != NULL &&
|
|
|
|
graft->nr_parent < 0)) {
|
|
|
|
commit_list_insert(commit, &result);
|
|
|
|
commit->object.flags |= shallow_flag;
|
|
|
|
commit = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
commit->object.flags |= not_shallow_flag;
|
|
|
|
for (p = commit->parents, commit = NULL; p; p = p->next) {
|
|
|
|
int **depth_slot = commit_depth_at(&depths, p->item);
|
|
|
|
if (!*depth_slot) {
|
|
|
|
*depth_slot = xmalloc(sizeof(int));
|
|
|
|
**depth_slot = cur_depth;
|
|
|
|
} else {
|
|
|
|
if (cur_depth >= **depth_slot)
|
|
|
|
continue;
|
|
|
|
**depth_slot = cur_depth;
|
|
|
|
}
|
|
|
|
if (p->next)
|
|
|
|
add_object_array(&p->item->object,
|
|
|
|
NULL, &stack);
|
|
|
|
else {
|
|
|
|
commit = p->item;
|
|
|
|
cur_depth = **commit_depth_at(&depths, commit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
deep_clear_commit_depth(&depths, free_depth_in_slab);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
shallow.c: implement a generic shallow boundary finder based on rev-list
Instead of a custom commit walker like get_shallow_commits(), this new
function uses rev-list to mark NOT_SHALLOW to all reachable commits,
except borders. The definition of reachable is to be defined by the
protocol later. This makes it more flexible to define shallow boundary.
The way we find border is paint all reachable commits NOT_SHALLOW. Any
of them that "touches" commits without NOT_SHALLOW flag are considered
shallow (e.g. zero parents via grafting mechanism). Shallow commits and
their true parents are all marked SHALLOW. Then NOT_SHALLOW is removed
from shallow commits at the end.
There is an interesting observation. With a generic walker, we can
produce all kinds of shallow cutting. In the following graph, every
commit but "x" is reachable. "b" is a parent of "a".
x -- a -- o
/ /
x -- c -- b -- o
After this function is run, "a" and "c" are both considered shallow
commits. After grafting occurs at the client side, what we see is
a -- o
/
c -- b -- o
Notice that because of grafting, "a" has zero parents, so "b" is no
longer a parent of "a".
This is unfortunate and may be solved in two ways. The first is change
the way shallow grafting works and keep "a -- b" connection if "b"
exists and always ends at shallow commits (iow, no loose ends). This is
hard to detect, or at least not cheap to do.
The second way is mark one "x" as shallow commit instead of "a" and
produce this graph at client side:
x -- a -- o
/ /
c -- b -- o
More commits, but simpler grafting rules.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
static void show_commit(struct commit *commit, void *data)
|
|
|
|
{
|
|
|
|
commit_list_insert(commit, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given rev-list arguments, run rev-list. All reachable commits
|
|
|
|
* except border ones are marked with not_shallow_flag. Border commits
|
|
|
|
* are marked with shallow_flag. The list of border/shallow commits
|
|
|
|
* are also returned.
|
|
|
|
*/
|
|
|
|
struct commit_list *get_shallow_commits_by_rev_list(int ac, const char **av,
|
|
|
|
int shallow_flag,
|
|
|
|
int not_shallow_flag)
|
|
|
|
{
|
|
|
|
struct commit_list *result = NULL, *p;
|
|
|
|
struct commit_list *not_shallow_list = NULL;
|
|
|
|
struct rev_info revs;
|
|
|
|
int both_flags = shallow_flag | not_shallow_flag;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SHALLOW (excluded) and NOT_SHALLOW (included) should not be
|
|
|
|
* set at this point. But better be safe than sorry.
|
|
|
|
*/
|
|
|
|
clear_object_flags(both_flags);
|
|
|
|
|
|
|
|
is_repository_shallow(the_repository); /* make sure shallows are read */
|
shallow.c: implement a generic shallow boundary finder based on rev-list
Instead of a custom commit walker like get_shallow_commits(), this new
function uses rev-list to mark NOT_SHALLOW to all reachable commits,
except borders. The definition of reachable is to be defined by the
protocol later. This makes it more flexible to define shallow boundary.
The way we find border is paint all reachable commits NOT_SHALLOW. Any
of them that "touches" commits without NOT_SHALLOW flag are considered
shallow (e.g. zero parents via grafting mechanism). Shallow commits and
their true parents are all marked SHALLOW. Then NOT_SHALLOW is removed
from shallow commits at the end.
There is an interesting observation. With a generic walker, we can
produce all kinds of shallow cutting. In the following graph, every
commit but "x" is reachable. "b" is a parent of "a".
x -- a -- o
/ /
x -- c -- b -- o
After this function is run, "a" and "c" are both considered shallow
commits. After grafting occurs at the client side, what we see is
a -- o
/
c -- b -- o
Notice that because of grafting, "a" has zero parents, so "b" is no
longer a parent of "a".
This is unfortunate and may be solved in two ways. The first is change
the way shallow grafting works and keep "a -- b" connection if "b"
exists and always ends at shallow commits (iow, no loose ends). This is
hard to detect, or at least not cheap to do.
The second way is mark one "x" as shallow commit instead of "a" and
produce this graph at client side:
x -- a -- o
/ /
c -- b -- o
More commits, but simpler grafting rules.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
|
|
|
|
repo_init_revisions(the_repository, &revs, NULL);
|
shallow.c: implement a generic shallow boundary finder based on rev-list
Instead of a custom commit walker like get_shallow_commits(), this new
function uses rev-list to mark NOT_SHALLOW to all reachable commits,
except borders. The definition of reachable is to be defined by the
protocol later. This makes it more flexible to define shallow boundary.
The way we find border is paint all reachable commits NOT_SHALLOW. Any
of them that "touches" commits without NOT_SHALLOW flag are considered
shallow (e.g. zero parents via grafting mechanism). Shallow commits and
their true parents are all marked SHALLOW. Then NOT_SHALLOW is removed
from shallow commits at the end.
There is an interesting observation. With a generic walker, we can
produce all kinds of shallow cutting. In the following graph, every
commit but "x" is reachable. "b" is a parent of "a".
x -- a -- o
/ /
x -- c -- b -- o
After this function is run, "a" and "c" are both considered shallow
commits. After grafting occurs at the client side, what we see is
a -- o
/
c -- b -- o
Notice that because of grafting, "a" has zero parents, so "b" is no
longer a parent of "a".
This is unfortunate and may be solved in two ways. The first is change
the way shallow grafting works and keep "a -- b" connection if "b"
exists and always ends at shallow commits (iow, no loose ends). This is
hard to detect, or at least not cheap to do.
The second way is mark one "x" as shallow commit instead of "a" and
produce this graph at client side:
x -- a -- o
/ /
c -- b -- o
More commits, but simpler grafting rules.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
save_commit_buffer = 0;
|
|
|
|
setup_revisions(ac, av, &revs, NULL);
|
|
|
|
|
|
|
|
if (prepare_revision_walk(&revs))
|
|
|
|
die("revision walk setup failed");
|
|
|
|
traverse_commit_list(&revs, show_commit, NULL, ¬_shallow_list);
|
|
|
|
|
|
|
|
if (!not_shallow_list)
|
|
|
|
die("no commits selected for shallow requests");
|
|
|
|
|
shallow.c: implement a generic shallow boundary finder based on rev-list
Instead of a custom commit walker like get_shallow_commits(), this new
function uses rev-list to mark NOT_SHALLOW to all reachable commits,
except borders. The definition of reachable is to be defined by the
protocol later. This makes it more flexible to define shallow boundary.
The way we find border is paint all reachable commits NOT_SHALLOW. Any
of them that "touches" commits without NOT_SHALLOW flag are considered
shallow (e.g. zero parents via grafting mechanism). Shallow commits and
their true parents are all marked SHALLOW. Then NOT_SHALLOW is removed
from shallow commits at the end.
There is an interesting observation. With a generic walker, we can
produce all kinds of shallow cutting. In the following graph, every
commit but "x" is reachable. "b" is a parent of "a".
x -- a -- o
/ /
x -- c -- b -- o
After this function is run, "a" and "c" are both considered shallow
commits. After grafting occurs at the client side, what we see is
a -- o
/
c -- b -- o
Notice that because of grafting, "a" has zero parents, so "b" is no
longer a parent of "a".
This is unfortunate and may be solved in two ways. The first is change
the way shallow grafting works and keep "a -- b" connection if "b"
exists and always ends at shallow commits (iow, no loose ends). This is
hard to detect, or at least not cheap to do.
The second way is mark one "x" as shallow commit instead of "a" and
produce this graph at client side:
x -- a -- o
/ /
c -- b -- o
More commits, but simpler grafting rules.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
/* Mark all reachable commits as NOT_SHALLOW */
|
|
|
|
for (p = not_shallow_list; p; p = p->next)
|
|
|
|
p->item->object.flags |= not_shallow_flag;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mark border commits SHALLOW + NOT_SHALLOW.
|
|
|
|
* We cannot clear NOT_SHALLOW right now. Imagine border
|
|
|
|
* commit A is processed first, then commit B, whose parent is
|
|
|
|
* A, later. If NOT_SHALLOW on A is cleared at step 1, B
|
|
|
|
* itself is considered border at step 2, which is incorrect.
|
|
|
|
*/
|
|
|
|
for (p = not_shallow_list; p; p = p->next) {
|
|
|
|
struct commit *c = p->item;
|
|
|
|
struct commit_list *parent;
|
|
|
|
|
|
|
|
if (parse_commit(c))
|
|
|
|
die("unable to parse commit %s",
|
|
|
|
oid_to_hex(&c->object.oid));
|
|
|
|
|
|
|
|
for (parent = c->parents; parent; parent = parent->next)
|
|
|
|
if (!(parent->item->object.flags & not_shallow_flag)) {
|
|
|
|
c->object.flags |= shallow_flag;
|
|
|
|
commit_list_insert(c, &result);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free_commit_list(not_shallow_list);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we can clean up NOT_SHALLOW on border commits. Having
|
|
|
|
* both flags set can confuse the caller.
|
|
|
|
*/
|
|
|
|
for (p = result; p; p = p->next) {
|
|
|
|
struct object *o = &p->item->object;
|
|
|
|
if ((o->flags & both_flags) == both_flags)
|
|
|
|
o->flags &= ~not_shallow_flag;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check_shallow_file_for_update(struct repository *r)
|
|
|
|
{
|
|
|
|
if (r->parsed_objects->is_shallow == -1)
|
|
|
|
BUG("shallow must be initialized by now");
|
|
|
|
|
|
|
|
if (!stat_validity_check(r->parsed_objects->shallow_stat,
|
|
|
|
git_path_shallow(r)))
|
|
|
|
die("shallow file has changed since we read it");
|
|
|
|
}
|
|
|
|
|
|
|
|
#define SEEN_ONLY 1
|
|
|
|
#define VERBOSE 2
|
|
|
|
#define QUICK 4
|
|
|
|
|
|
|
|
struct write_shallow_data {
|
|
|
|
struct strbuf *out;
|
|
|
|
int use_pack_protocol;
|
|
|
|
int count;
|
|
|
|
unsigned flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
|
|
|
|
{
|
|
|
|
struct write_shallow_data *data = cb_data;
|
|
|
|
const char *hex = oid_to_hex(&graft->oid);
|
|
|
|
if (graft->nr_parent != -1)
|
|
|
|
return 0;
|
|
|
|
if (data->flags & QUICK) {
|
|
|
|
if (!has_object_file(&graft->oid))
|
|
|
|
return 0;
|
|
|
|
} else if (data->flags & SEEN_ONLY) {
|
|
|
|
struct commit *c = lookup_commit(the_repository, &graft->oid);
|
|
|
|
if (!c || !(c->object.flags & SEEN)) {
|
|
|
|
if (data->flags & VERBOSE)
|
|
|
|
printf("Removing %s from .git/shallow\n",
|
|
|
|
oid_to_hex(&c->object.oid));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
data->count++;
|
|
|
|
if (data->use_pack_protocol)
|
|
|
|
packet_buf_write(data->out, "shallow %s", hex);
|
|
|
|
else {
|
|
|
|
strbuf_addstr(data->out, hex);
|
|
|
|
strbuf_addch(data->out, '\n');
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_shallow_commits_1(struct strbuf *out, int use_pack_protocol,
|
|
|
|
const struct oid_array *extra,
|
|
|
|
unsigned flags)
|
|
|
|
{
|
|
|
|
struct write_shallow_data data;
|
|
|
|
int i;
|
|
|
|
data.out = out;
|
|
|
|
data.use_pack_protocol = use_pack_protocol;
|
|
|
|
data.count = 0;
|
|
|
|
data.flags = flags;
|
|
|
|
for_each_commit_graft(write_one_shallow, &data);
|
|
|
|
if (!extra)
|
|
|
|
return data.count;
|
|
|
|
for (i = 0; i < extra->nr; i++) {
|
|
|
|
strbuf_addstr(out, oid_to_hex(extra->oid + i));
|
|
|
|
strbuf_addch(out, '\n');
|
|
|
|
data.count++;
|
|
|
|
}
|
|
|
|
return data.count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int write_shallow_commits(struct strbuf *out, int use_pack_protocol,
|
|
|
|
const struct oid_array *extra)
|
|
|
|
{
|
|
|
|
return write_shallow_commits_1(out, use_pack_protocol, extra, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *setup_temporary_shallow(const struct oid_array *extra)
|
|
|
|
{
|
tempfile: auto-allocate tempfiles on heap
The previous commit taught the tempfile code to give up
ownership over tempfiles that have been renamed or deleted.
That makes it possible to use a stack variable like this:
struct tempfile t;
create_tempfile(&t, ...);
...
if (!err)
rename_tempfile(&t, ...);
else
delete_tempfile(&t);
But doing it this way has a high potential for creating
memory errors. The tempfile we pass to create_tempfile()
ends up on a global linked list, and it's not safe for it to
go out of scope until we've called one of those two
deactivation functions.
Imagine that we add an early return from the function that
forgets to call delete_tempfile(). With a static or heap
tempfile variable, the worst case is that the tempfile hangs
around until the program exits (and some functions like
setup_shallow_temporary rely on this intentionally, creating
a tempfile and then leaving it for later cleanup).
But with a stack variable as above, this is a serious memory
error: the variable goes out of scope and may be filled with
garbage by the time the tempfile code looks at it. Let's
see if we can make it harder to get this wrong.
Since many callers need to allocate arbitrary numbers of
tempfiles, we can't rely on static storage as a general
solution. So we need to turn to the heap. We could just ask
all callers to pass us a heap variable, but that puts the
burden on them to call free() at the right time.
Instead, let's have the tempfile code handle the heap
allocation _and_ the deallocation (when the tempfile is
deactivated and removed from the list).
This changes the return value of all of the creation
functions. For the cleanup functions (delete and rename),
we'll add one extra bit of safety: instead of taking a
tempfile pointer, we'll take a pointer-to-pointer and set it
to NULL after freeing the object. This makes it safe to
double-call functions like delete_tempfile(), as the second
call treats the NULL input as a noop. Several callsites
follow this pattern.
The resulting patch does have a fair bit of noise, as each
caller needs to be converted to handle:
1. Storing a pointer instead of the struct itself.
2. Passing the pointer instead of taking the struct
address.
3. Handling a "struct tempfile *" return instead of a file
descriptor.
We could play games to make this less noisy. For example, by
defining the tempfile like this:
struct tempfile {
struct heap_allocated_part_of_tempfile {
int fd;
...etc
} *actual_data;
}
Callers would continue to have a "struct tempfile", and it
would be "active" only when the inner pointer was non-NULL.
But that just makes things more awkward in the long run.
There aren't that many callers, so we can simply bite
the bullet and adjust all of them. And the compiler makes it
easy for us to find them all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
struct tempfile *temp;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (write_shallow_commits(&sb, 0, extra)) {
|
tempfile: auto-allocate tempfiles on heap
The previous commit taught the tempfile code to give up
ownership over tempfiles that have been renamed or deleted.
That makes it possible to use a stack variable like this:
struct tempfile t;
create_tempfile(&t, ...);
...
if (!err)
rename_tempfile(&t, ...);
else
delete_tempfile(&t);
But doing it this way has a high potential for creating
memory errors. The tempfile we pass to create_tempfile()
ends up on a global linked list, and it's not safe for it to
go out of scope until we've called one of those two
deactivation functions.
Imagine that we add an early return from the function that
forgets to call delete_tempfile(). With a static or heap
tempfile variable, the worst case is that the tempfile hangs
around until the program exits (and some functions like
setup_shallow_temporary rely on this intentionally, creating
a tempfile and then leaving it for later cleanup).
But with a stack variable as above, this is a serious memory
error: the variable goes out of scope and may be filled with
garbage by the time the tempfile code looks at it. Let's
see if we can make it harder to get this wrong.
Since many callers need to allocate arbitrary numbers of
tempfiles, we can't rely on static storage as a general
solution. So we need to turn to the heap. We could just ask
all callers to pass us a heap variable, but that puts the
burden on them to call free() at the right time.
Instead, let's have the tempfile code handle the heap
allocation _and_ the deallocation (when the tempfile is
deactivated and removed from the list).
This changes the return value of all of the creation
functions. For the cleanup functions (delete and rename),
we'll add one extra bit of safety: instead of taking a
tempfile pointer, we'll take a pointer-to-pointer and set it
to NULL after freeing the object. This makes it safe to
double-call functions like delete_tempfile(), as the second
call treats the NULL input as a noop. Several callsites
follow this pattern.
The resulting patch does have a fair bit of noise, as each
caller needs to be converted to handle:
1. Storing a pointer instead of the struct itself.
2. Passing the pointer instead of taking the struct
address.
3. Handling a "struct tempfile *" return instead of a file
descriptor.
We could play games to make this less noisy. For example, by
defining the tempfile like this:
struct tempfile {
struct heap_allocated_part_of_tempfile {
int fd;
...etc
} *actual_data;
}
Callers would continue to have a "struct tempfile", and it
would be "active" only when the inner pointer was non-NULL.
But that just makes things more awkward in the long run.
There aren't that many callers, so we can simply bite
the bullet and adjust all of them. And the compiler makes it
easy for us to find them all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
temp = xmks_tempfile(git_path("shallow_XXXXXX"));
|
|
|
|
|
|
|
|
if (write_in_full(temp->fd, sb.buf, sb.len) < 0 ||
|
tempfile: auto-allocate tempfiles on heap
The previous commit taught the tempfile code to give up
ownership over tempfiles that have been renamed or deleted.
That makes it possible to use a stack variable like this:
struct tempfile t;
create_tempfile(&t, ...);
...
if (!err)
rename_tempfile(&t, ...);
else
delete_tempfile(&t);
But doing it this way has a high potential for creating
memory errors. The tempfile we pass to create_tempfile()
ends up on a global linked list, and it's not safe for it to
go out of scope until we've called one of those two
deactivation functions.
Imagine that we add an early return from the function that
forgets to call delete_tempfile(). With a static or heap
tempfile variable, the worst case is that the tempfile hangs
around until the program exits (and some functions like
setup_shallow_temporary rely on this intentionally, creating
a tempfile and then leaving it for later cleanup).
But with a stack variable as above, this is a serious memory
error: the variable goes out of scope and may be filled with
garbage by the time the tempfile code looks at it. Let's
see if we can make it harder to get this wrong.
Since many callers need to allocate arbitrary numbers of
tempfiles, we can't rely on static storage as a general
solution. So we need to turn to the heap. We could just ask
all callers to pass us a heap variable, but that puts the
burden on them to call free() at the right time.
Instead, let's have the tempfile code handle the heap
allocation _and_ the deallocation (when the tempfile is
deactivated and removed from the list).
This changes the return value of all of the creation
functions. For the cleanup functions (delete and rename),
we'll add one extra bit of safety: instead of taking a
tempfile pointer, we'll take a pointer-to-pointer and set it
to NULL after freeing the object. This makes it safe to
double-call functions like delete_tempfile(), as the second
call treats the NULL input as a noop. Several callsites
follow this pattern.
The resulting patch does have a fair bit of noise, as each
caller needs to be converted to handle:
1. Storing a pointer instead of the struct itself.
2. Passing the pointer instead of taking the struct
address.
3. Handling a "struct tempfile *" return instead of a file
descriptor.
We could play games to make this less noisy. For example, by
defining the tempfile like this:
struct tempfile {
struct heap_allocated_part_of_tempfile {
int fd;
...etc
} *actual_data;
}
Callers would continue to have a "struct tempfile", and it
would be "active" only when the inner pointer was non-NULL.
But that just makes things more awkward in the long run.
There aren't that many callers, so we can simply bite
the bullet and adjust all of them. And the compiler makes it
easy for us to find them all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
close_tempfile_gently(temp) < 0)
|
|
|
|
die_errno("failed to write to %s",
|
tempfile: auto-allocate tempfiles on heap
The previous commit taught the tempfile code to give up
ownership over tempfiles that have been renamed or deleted.
That makes it possible to use a stack variable like this:
struct tempfile t;
create_tempfile(&t, ...);
...
if (!err)
rename_tempfile(&t, ...);
else
delete_tempfile(&t);
But doing it this way has a high potential for creating
memory errors. The tempfile we pass to create_tempfile()
ends up on a global linked list, and it's not safe for it to
go out of scope until we've called one of those two
deactivation functions.
Imagine that we add an early return from the function that
forgets to call delete_tempfile(). With a static or heap
tempfile variable, the worst case is that the tempfile hangs
around until the program exits (and some functions like
setup_shallow_temporary rely on this intentionally, creating
a tempfile and then leaving it for later cleanup).
But with a stack variable as above, this is a serious memory
error: the variable goes out of scope and may be filled with
garbage by the time the tempfile code looks at it. Let's
see if we can make it harder to get this wrong.
Since many callers need to allocate arbitrary numbers of
tempfiles, we can't rely on static storage as a general
solution. So we need to turn to the heap. We could just ask
all callers to pass us a heap variable, but that puts the
burden on them to call free() at the right time.
Instead, let's have the tempfile code handle the heap
allocation _and_ the deallocation (when the tempfile is
deactivated and removed from the list).
This changes the return value of all of the creation
functions. For the cleanup functions (delete and rename),
we'll add one extra bit of safety: instead of taking a
tempfile pointer, we'll take a pointer-to-pointer and set it
to NULL after freeing the object. This makes it safe to
double-call functions like delete_tempfile(), as the second
call treats the NULL input as a noop. Several callsites
follow this pattern.
The resulting patch does have a fair bit of noise, as each
caller needs to be converted to handle:
1. Storing a pointer instead of the struct itself.
2. Passing the pointer instead of taking the struct
address.
3. Handling a "struct tempfile *" return instead of a file
descriptor.
We could play games to make this less noisy. For example, by
defining the tempfile like this:
struct tempfile {
struct heap_allocated_part_of_tempfile {
int fd;
...etc
} *actual_data;
}
Callers would continue to have a "struct tempfile", and it
would be "active" only when the inner pointer was non-NULL.
But that just makes things more awkward in the long run.
There aren't that many callers, so we can simply bite
the bullet and adjust all of them. And the compiler makes it
easy for us to find them all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
get_tempfile_path(temp));
|
|
|
|
strbuf_release(&sb);
|
tempfile: auto-allocate tempfiles on heap
The previous commit taught the tempfile code to give up
ownership over tempfiles that have been renamed or deleted.
That makes it possible to use a stack variable like this:
struct tempfile t;
create_tempfile(&t, ...);
...
if (!err)
rename_tempfile(&t, ...);
else
delete_tempfile(&t);
But doing it this way has a high potential for creating
memory errors. The tempfile we pass to create_tempfile()
ends up on a global linked list, and it's not safe for it to
go out of scope until we've called one of those two
deactivation functions.
Imagine that we add an early return from the function that
forgets to call delete_tempfile(). With a static or heap
tempfile variable, the worst case is that the tempfile hangs
around until the program exits (and some functions like
setup_shallow_temporary rely on this intentionally, creating
a tempfile and then leaving it for later cleanup).
But with a stack variable as above, this is a serious memory
error: the variable goes out of scope and may be filled with
garbage by the time the tempfile code looks at it. Let's
see if we can make it harder to get this wrong.
Since many callers need to allocate arbitrary numbers of
tempfiles, we can't rely on static storage as a general
solution. So we need to turn to the heap. We could just ask
all callers to pass us a heap variable, but that puts the
burden on them to call free() at the right time.
Instead, let's have the tempfile code handle the heap
allocation _and_ the deallocation (when the tempfile is
deactivated and removed from the list).
This changes the return value of all of the creation
functions. For the cleanup functions (delete and rename),
we'll add one extra bit of safety: instead of taking a
tempfile pointer, we'll take a pointer-to-pointer and set it
to NULL after freeing the object. This makes it safe to
double-call functions like delete_tempfile(), as the second
call treats the NULL input as a noop. Several callsites
follow this pattern.
The resulting patch does have a fair bit of noise, as each
caller needs to be converted to handle:
1. Storing a pointer instead of the struct itself.
2. Passing the pointer instead of taking the struct
address.
3. Handling a "struct tempfile *" return instead of a file
descriptor.
We could play games to make this less noisy. For example, by
defining the tempfile like this:
struct tempfile {
struct heap_allocated_part_of_tempfile {
int fd;
...etc
} *actual_data;
}
Callers would continue to have a "struct tempfile", and it
would be "active" only when the inner pointer was non-NULL.
But that just makes things more awkward in the long run.
There aren't that many callers, so we can simply bite
the bullet and adjust all of them. And the compiler makes it
easy for us to find them all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
return get_tempfile_path(temp);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* is_repository_shallow() sees empty string as "no shallow
|
|
|
|
* file".
|
|
|
|
*/
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
void setup_alternate_shallow(struct shallow_lock *shallow_lock,
|
|
|
|
const char **alternate_shallow_file,
|
|
|
|
const struct oid_array *extra)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
fd = hold_lock_file_for_update(&shallow_lock->lock,
|
|
|
|
git_path_shallow(the_repository),
|
|
|
|
LOCK_DIE_ON_ERROR);
|
|
|
|
check_shallow_file_for_update(the_repository);
|
|
|
|
if (write_shallow_commits(&sb, 0, extra)) {
|
avoid "write_in_full(fd, buf, len) != len" pattern
The return value of write_in_full() is either "-1", or the
requested number of bytes[1]. If we make a partial write
before seeing an error, we still return -1, not a partial
value. This goes back to f6aa66cb95 (write_in_full: really
write in full or return error on disk full., 2007-01-11).
So checking anything except "was the return value negative"
is pointless. And there are a couple of reasons not to do
so:
1. It can do a funny signed/unsigned comparison. If your
"len" is signed (e.g., a size_t) then the compiler will
promote the "-1" to its unsigned variant.
This works out for "!= len" (unless you really were
trying to write the maximum size_t bytes), but is a
bug if you check "< len" (an example of which was fixed
recently in config.c).
We should avoid promoting the mental model that you
need to check the length at all, so that new sites are
not tempted to copy us.
2. Checking for a negative value is shorter to type,
especially when the length is an expression.
3. Linus says so. In d34cf19b89 (Clean up write_in_full()
users, 2007-01-11), right after the write_in_full()
semantics were changed, he wrote:
I really wish every "write_in_full()" user would just
check against "<0" now, but this fixes the nasty and
stupid ones.
Appeals to authority aside, this makes it clear that
writing it this way does not have an intentional
benefit. It's a historical curiosity that we never
bothered to clean up (and which was undoubtedly
cargo-culted into new sites).
So let's convert these obviously-correct cases (this
includes write_str_in_full(), which is just a wrapper for
write_in_full()).
[1] A careful reader may notice there is one way that
write_in_full() can return a different value. If we ask
write() to write N bytes and get a return value that is
_larger_ than N, we could return a larger total. But
besides the fact that this would imply a totally broken
version of write(), it would already invoke undefined
behavior. Our internal remaining counter is an unsigned
size_t, which means that subtracting too many byte will
wrap it around to a very large number. So we'll instantly
begin reading off the end of the buffer, trying to write
gigabytes (or petabytes) of data.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (write_in_full(fd, sb.buf, sb.len) < 0)
|
|
|
|
die_errno("failed to write to %s",
|
|
|
|
get_lock_file_path(&shallow_lock->lock));
|
|
|
|
*alternate_shallow_file = get_lock_file_path(&shallow_lock->lock);
|
|
|
|
} else
|
|
|
|
/*
|
|
|
|
* is_repository_shallow() sees empty string as "no
|
|
|
|
* shallow file".
|
|
|
|
*/
|
|
|
|
*alternate_shallow_file = "";
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int advertise_shallow_grafts_cb(const struct commit_graft *graft, void *cb)
|
|
|
|
{
|
|
|
|
int fd = *(int *)cb;
|
|
|
|
if (graft->nr_parent == -1)
|
|
|
|
packet_write_fmt(fd, "shallow %s\n", oid_to_hex(&graft->oid));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void advertise_shallow_grafts(int fd)
|
|
|
|
{
|
|
|
|
if (!is_repository_shallow(the_repository))
|
|
|
|
return;
|
|
|
|
for_each_commit_graft(advertise_shallow_grafts_cb, &fd);
|
|
|
|
}
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* mark_reachable_objects() should have been run prior to this and all
|
|
|
|
* reachable commits marked as "SEEN", except when quick_prune is non-zero,
|
|
|
|
* in which case lines are excised from the shallow file if they refer to
|
|
|
|
* commits that do not exist (any longer).
|
|
|
|
*/
|
|
|
|
void prune_shallow(unsigned options)
|
|
|
|
{
|
|
|
|
struct shallow_lock shallow_lock = SHALLOW_LOCK_INIT;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
unsigned flags = SEEN_ONLY;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
if (options & PRUNE_QUICK)
|
|
|
|
flags |= QUICK;
|
|
|
|
|
|
|
|
if (options & PRUNE_SHOW_ONLY) {
|
|
|
|
flags |= VERBOSE;
|
|
|
|
write_shallow_commits_1(&sb, 0, NULL, flags);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
fd = hold_lock_file_for_update(&shallow_lock.lock,
|
|
|
|
git_path_shallow(the_repository),
|
|
|
|
LOCK_DIE_ON_ERROR);
|
|
|
|
check_shallow_file_for_update(the_repository);
|
|
|
|
if (write_shallow_commits_1(&sb, 0, NULL, flags)) {
|
avoid "write_in_full(fd, buf, len) != len" pattern
The return value of write_in_full() is either "-1", or the
requested number of bytes[1]. If we make a partial write
before seeing an error, we still return -1, not a partial
value. This goes back to f6aa66cb95 (write_in_full: really
write in full or return error on disk full., 2007-01-11).
So checking anything except "was the return value negative"
is pointless. And there are a couple of reasons not to do
so:
1. It can do a funny signed/unsigned comparison. If your
"len" is signed (e.g., a size_t) then the compiler will
promote the "-1" to its unsigned variant.
This works out for "!= len" (unless you really were
trying to write the maximum size_t bytes), but is a
bug if you check "< len" (an example of which was fixed
recently in config.c).
We should avoid promoting the mental model that you
need to check the length at all, so that new sites are
not tempted to copy us.
2. Checking for a negative value is shorter to type,
especially when the length is an expression.
3. Linus says so. In d34cf19b89 (Clean up write_in_full()
users, 2007-01-11), right after the write_in_full()
semantics were changed, he wrote:
I really wish every "write_in_full()" user would just
check against "<0" now, but this fixes the nasty and
stupid ones.
Appeals to authority aside, this makes it clear that
writing it this way does not have an intentional
benefit. It's a historical curiosity that we never
bothered to clean up (and which was undoubtedly
cargo-culted into new sites).
So let's convert these obviously-correct cases (this
includes write_str_in_full(), which is just a wrapper for
write_in_full()).
[1] A careful reader may notice there is one way that
write_in_full() can return a different value. If we ask
write() to write N bytes and get a return value that is
_larger_ than N, we could return a larger total. But
besides the fact that this would imply a totally broken
version of write(), it would already invoke undefined
behavior. Our internal remaining counter is an unsigned
size_t, which means that subtracting too many byte will
wrap it around to a very large number. So we'll instantly
begin reading off the end of the buffer, trying to write
gigabytes (or petabytes) of data.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (write_in_full(fd, sb.buf, sb.len) < 0)
|
|
|
|
die_errno("failed to write to %s",
|
|
|
|
get_lock_file_path(&shallow_lock.lock));
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
commit_shallow_file(the_repository, &shallow_lock);
|
|
|
|
} else {
|
|
|
|
unlink(git_path_shallow(the_repository));
|
shallow.c: use '{commit,rollback}_shallow_file'
In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.
This is a problem for e.g., fetching with '--update-shallow' in a
shallow repository with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
isn't shallow when it is, thereby circumventing the commit-graph
compatibility check.
This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
objects, and therefore can't take the reachability closure over commits
when writing a commit-graph).
Address this by introducing thin wrappers over 'commit_lock_file' and
'rollback_lock_file' for use specifically when the lock is held over
'.git/shallow'. These wrappers (appropriately called
'commit_shallow_file' and 'rollback_shallow_file') call into their
respective functions in 'lockfile.h', but additionally reset validity
checks used by the shallow machinery.
Replace each instance of 'commit_lock_file' and 'rollback_lock_file'
with 'commit_shallow_file' and 'rollback_shallow_file' when the lock
being held is over the '.git/shallow' file.
As a result, 'prune_shallow' can now only be called once (since
'check_shallow_file_for_update' will die after calling
'reset_repository_shallow'). But, this is OK since we only call
'prune_shallow' at most once per process.
Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
|
|
|
rollback_shallow_file(the_repository, &shallow_lock);
|
|
|
|
}
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct trace_key trace_shallow = TRACE_KEY_INIT(SHALLOW);
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 1, split sender shallow commits into "ours" and "theirs"
|
|
|
|
* Step 2, clean "ours" based on .git/shallow
|
|
|
|
*/
|
|
|
|
void prepare_shallow_info(struct shallow_info *info, struct oid_array *sa)
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
{
|
|
|
|
int i;
|
|
|
|
trace_printf_key(&trace_shallow, "shallow: prepare_shallow_info\n");
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
memset(info, 0, sizeof(*info));
|
|
|
|
info->shallow = sa;
|
|
|
|
if (!sa)
|
|
|
|
return;
|
|
|
|
ALLOC_ARRAY(info->ours, sa->nr);
|
|
|
|
ALLOC_ARRAY(info->theirs, sa->nr);
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
for (i = 0; i < sa->nr; i++) {
|
|
|
|
if (has_object_file(sa->oid + i)) {
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
struct commit_graft *graft;
|
|
|
|
graft = lookup_commit_graft(the_repository,
|
|
|
|
&sa->oid[i]);
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (graft && graft->nr_parent < 0)
|
|
|
|
continue;
|
|
|
|
info->ours[info->nr_ours++] = i;
|
|
|
|
} else
|
|
|
|
info->theirs[info->nr_theirs++] = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void clear_shallow_info(struct shallow_info *info)
|
|
|
|
{
|
|
|
|
free(info->ours);
|
|
|
|
free(info->theirs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Step 4, remove non-existent ones in "theirs" after getting the pack */
|
|
|
|
|
|
|
|
void remove_nonexistent_theirs_shallow(struct shallow_info *info)
|
|
|
|
{
|
|
|
|
struct object_id *oid = info->shallow->oid;
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
int i, dst;
|
|
|
|
trace_printf_key(&trace_shallow, "shallow: remove_nonexistent_theirs_shallow\n");
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
for (i = dst = 0; i < info->nr_theirs; i++) {
|
|
|
|
if (i != dst)
|
|
|
|
info->theirs[dst] = info->theirs[i];
|
|
|
|
if (has_object_file(oid + info->theirs[i]))
|
shallow.c: the 8 steps to select new commits for .git/shallow
Suppose a fetch or push is requested between two shallow repositories
(with no history deepening or shortening). A pack that contains
necessary objects is transferred over together with .git/shallow of
the sender. The receiver has to determine whether it needs to update
.git/shallow if new refs needs new shallow comits.
The rule here is avoid updating .git/shallow by default. But we don't
want to waste the received pack. If the pack contains two refs, one
needs new shallow commits installed in .git/shallow and one does not,
we keep the latter and reject/warn about the former.
Even if .git/shallow update is allowed, we only add shallow commits
strictly necessary for the former ref (remember the sender can send
more shallow commits than necessary) and pay attention not to
accidentally cut the receiver history short (no history shortening is
asked for)
So the steps to figure out what ref need what new shallow commits are:
1. Split the sender shallow commit list into "ours" and "theirs" list
by has_sha1_file. Those that exist in current repo in "ours", the
remaining in "theirs".
2. Check the receiver .git/shallow, remove from "ours" the ones that
also exist in .git/shallow.
3. Fetch the new pack. Either install or unpack it.
4. Do has_sha1_file on "theirs" list again. Drop the ones that fail
has_sha1_file. Obviously the new pack does not need them.
5. If the pack is kept, remove from "ours" the ones that do not exist
in the new pack.
6. Walk the new refs to answer the question "what shallow commits,
both ours and theirs, are required in .git/shallow in order to add
this ref?". Shallow commits not associated to any refs are removed
from their respective list.
7. (*) Check reachability (from the current refs) of all remaining
commits in "ours". Those reachable are removed. We do not want to
cut any part of our (reachable) history. We only check up
commits. True reachability test is done by
check_everything_connected() at the end as usual.
8. Combine the final "ours" and "theirs" and add them all to
.git/shallow. Install new refs. The case where some hook rejects
some refs on a push is explained in more detail in the push
patches.
Of these steps, #6 and #7 are expensive. Both require walking through
some commits, or in the worst case all commits. And we rather avoid
them in at least common case, where the transferred pack does not
contain any shallow commits that the sender advertises. Let's look at
each scenario:
1) the sender has longer history than the receiver
All shallow commits from the sender will be put into "theirs" list
at step 1 because none of them exists in current repo. In the
common case, "theirs" becomes empty at step 4 and exit early.
2) the sender has shorter history than the receiver
All shallow commits from the sender are likely in "ours" list at
step 1. In the common case, if the new pack is kept, we could empty
"ours" and exit early at step 5.
If the pack is not kept, we hit the expensive step 6 then exit
after "ours" is emptied. There'll be only a handful of objects to
walk in fast-forward case. If it's forced update, we may need to
walk to the bottom.
3) the sender has same .git/shallow as the receiver
This is similar to case 2 except that "ours" should be emptied at
step 2 and exit early.
A fetch after "clone --depth=X" is case 1. A fetch after "clone" (from
a shallow repo) is case 3. Luckily they're cheap for the common case.
A push from "clone --depth=X" falls into case 2, which is expensive.
Some more work may be done at the sender/client side to avoid more
work on the server side: if the transferred pack does not contain any
shallow commits, send-pack should not send any shallow commits to the
receive-pack, effectively turning it into a normal push and avoid all
steps.
This patch implements all steps except #3, already handled by
fetch-pack and receive-pack, #6 and #7, which has their own patch due
to their size.
(*) in previous versions step 7 was put before step 3. I reorder it so
that the common case that keeps the pack does not need to walk
commits at all. In future if we implement faster commit
reachability check (maybe with the help of pack bitmaps or commit
cache), step 7 could become cheap and be moved up before 6 again.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
dst++;
|
|
|
|
}
|
|
|
|
info->nr_theirs = dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
define_commit_slab(ref_bitmap, uint32_t *);
|
|
|
|
|
|
|
|
#define POOL_SIZE (512 * 1024)
|
|
|
|
|
|
|
|
struct paint_info {
|
|
|
|
struct ref_bitmap ref_bitmap;
|
|
|
|
unsigned nr_bits;
|
|
|
|
char **pools;
|
|
|
|
char *free, *end;
|
|
|
|
unsigned pool_count;
|
|
|
|
};
|
|
|
|
|
|
|
|
static uint32_t *paint_alloc(struct paint_info *info)
|
|
|
|
{
|
|
|
|
unsigned nr = DIV_ROUND_UP(info->nr_bits, 32);
|
|
|
|
unsigned size = nr * sizeof(uint32_t);
|
|
|
|
void *p;
|
|
|
|
if (!info->pool_count || size > info->end - info->free) {
|
|
|
|
if (size > POOL_SIZE)
|
|
|
|
BUG("pool size too small for %d in paint_alloc()",
|
|
|
|
size);
|
|
|
|
info->pool_count++;
|
|
|
|
REALLOC_ARRAY(info->pools, info->pool_count);
|
|
|
|
info->free = xmalloc(POOL_SIZE);
|
|
|
|
info->pools[info->pool_count - 1] = info->free;
|
|
|
|
info->end = info->free + POOL_SIZE;
|
|
|
|
}
|
|
|
|
p = info->free;
|
|
|
|
info->free += size;
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a commit SHA-1, walk down to parents until either SEEN,
|
|
|
|
* UNINTERESTING or BOTTOM is hit. Set the id-th bit in ref_bitmap for
|
|
|
|
* all walked commits.
|
|
|
|
*/
|
|
|
|
static void paint_down(struct paint_info *info, const struct object_id *oid,
|
|
|
|
unsigned int id)
|
|
|
|
{
|
|
|
|
unsigned int i, nr;
|
|
|
|
struct commit_list *head = NULL;
|
|
|
|
int bitmap_nr = DIV_ROUND_UP(info->nr_bits, 32);
|
|
|
|
size_t bitmap_size = st_mult(sizeof(uint32_t), bitmap_nr);
|
|
|
|
struct commit *c = lookup_commit_reference_gently(the_repository, oid,
|
|
|
|
1);
|
|
|
|
uint32_t *tmp; /* to be freed before return */
|
|
|
|
uint32_t *bitmap;
|
|
|
|
|
|
|
|
if (!c)
|
|
|
|
return;
|
|
|
|
|
|
|
|
tmp = xmalloc(bitmap_size);
|
|
|
|
bitmap = paint_alloc(info);
|
|
|
|
memset(bitmap, 0, bitmap_size);
|
|
|
|
bitmap[id / 32] |= (1U << (id % 32));
|
|
|
|
commit_list_insert(c, &head);
|
|
|
|
while (head) {
|
|
|
|
struct commit_list *p;
|
|
|
|
struct commit *c = pop_commit(&head);
|
|
|
|
uint32_t **refs = ref_bitmap_at(&info->ref_bitmap, c);
|
|
|
|
|
|
|
|
/* XXX check "UNINTERESTING" from pack bitmaps if available */
|
|
|
|
if (c->object.flags & (SEEN | UNINTERESTING))
|
|
|
|
continue;
|
|
|
|
else
|
|
|
|
c->object.flags |= SEEN;
|
|
|
|
|
|
|
|
if (*refs == NULL)
|
|
|
|
*refs = bitmap;
|
|
|
|
else {
|
|
|
|
memcpy(tmp, *refs, bitmap_size);
|
|
|
|
for (i = 0; i < bitmap_nr; i++)
|
|
|
|
tmp[i] |= bitmap[i];
|
|
|
|
if (memcmp(tmp, *refs, bitmap_size)) {
|
|
|
|
*refs = paint_alloc(info);
|
|
|
|
memcpy(*refs, tmp, bitmap_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c->object.flags & BOTTOM)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (parse_commit(c))
|
|
|
|
die("unable to parse commit %s",
|
|
|
|
oid_to_hex(&c->object.oid));
|
|
|
|
|
|
|
|
for (p = c->parents; p; p = p->next) {
|
|
|
|
if (p->item->object.flags & SEEN)
|
|
|
|
continue;
|
|
|
|
commit_list_insert(p->item, &head);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nr = get_max_object_index();
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
|
|
struct object *o = get_indexed_object(i);
|
|
|
|
if (o && o->type == OBJ_COMMIT)
|
|
|
|
o->flags &= ~SEEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mark_uninteresting(const char *refname, const struct object_id *oid,
|
|
|
|
int flags, void *cb_data)
|
|
|
|
{
|
|
|
|
struct commit *commit = lookup_commit_reference_gently(the_repository,
|
|
|
|
oid, 1);
|
|
|
|
if (!commit)
|
|
|
|
return 0;
|
|
|
|
commit->object.flags |= UNINTERESTING;
|
|
|
|
mark_parents_uninteresting(commit);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void post_assign_shallow(struct shallow_info *info,
|
|
|
|
struct ref_bitmap *ref_bitmap,
|
|
|
|
int *ref_status);
|
|
|
|
/*
|
|
|
|
* Step 6(+7), associate shallow commits with new refs
|
|
|
|
*
|
|
|
|
* info->ref must be initialized before calling this function.
|
|
|
|
*
|
|
|
|
* If used is not NULL, it's an array of info->shallow->nr
|
|
|
|
* bitmaps. The n-th bit set in the m-th bitmap if ref[n] needs the
|
|
|
|
* m-th shallow commit from info->shallow.
|
|
|
|
*
|
|
|
|
* If used is NULL, "ours" and "theirs" are updated. And if ref_status
|
|
|
|
* is not NULL it's an array of ref->nr ints. ref_status[i] is true if
|
|
|
|
* the ref needs some shallow commits from either info->ours or
|
|
|
|
* info->theirs.
|
|
|
|
*/
|
|
|
|
void assign_shallow_commits_to_refs(struct shallow_info *info,
|
|
|
|
uint32_t **used, int *ref_status)
|
|
|
|
{
|
|
|
|
struct object_id *oid = info->shallow->oid;
|
|
|
|
struct oid_array *ref = info->ref;
|
|
|
|
unsigned int i, nr;
|
|
|
|
int *shallow, nr_shallow = 0;
|
|
|
|
struct paint_info pi;
|
|
|
|
|
|
|
|
trace_printf_key(&trace_shallow, "shallow: assign_shallow_commits_to_refs\n");
|
|
|
|
ALLOC_ARRAY(shallow, info->nr_ours + info->nr_theirs);
|
|
|
|
for (i = 0; i < info->nr_ours; i++)
|
|
|
|
shallow[nr_shallow++] = info->ours[i];
|
|
|
|
for (i = 0; i < info->nr_theirs; i++)
|
|
|
|
shallow[nr_shallow++] = info->theirs[i];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare the commit graph to track what refs can reach what
|
|
|
|
* (new) shallow commits.
|
|
|
|
*/
|
|
|
|
nr = get_max_object_index();
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
|
|
struct object *o = get_indexed_object(i);
|
|
|
|
if (!o || o->type != OBJ_COMMIT)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
o->flags &= ~(UNINTERESTING | BOTTOM | SEEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&pi, 0, sizeof(pi));
|
|
|
|
init_ref_bitmap(&pi.ref_bitmap);
|
|
|
|
pi.nr_bits = ref->nr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* "--not --all" to cut short the traversal if new refs
|
|
|
|
* connect to old refs. If not (e.g. force ref updates) it'll
|
|
|
|
* have to go down to the current shallow commits.
|
|
|
|
*/
|
|
|
|
head_ref(mark_uninteresting, NULL);
|
|
|
|
for_each_ref(mark_uninteresting, NULL);
|
|
|
|
|
|
|
|
/* Mark potential bottoms so we won't go out of bound */
|
|
|
|
for (i = 0; i < nr_shallow; i++) {
|
|
|
|
struct commit *c = lookup_commit(the_repository,
|
|
|
|
&oid[shallow[i]]);
|
|
|
|
c->object.flags |= BOTTOM;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ref->nr; i++)
|
|
|
|
paint_down(&pi, ref->oid + i, i);
|
|
|
|
|
|
|
|
if (used) {
|
|
|
|
int bitmap_size = DIV_ROUND_UP(pi.nr_bits, 32) * sizeof(uint32_t);
|
|
|
|
memset(used, 0, sizeof(*used) * info->shallow->nr);
|
|
|
|
for (i = 0; i < nr_shallow; i++) {
|
|
|
|
const struct commit *c = lookup_commit(the_repository,
|
|
|
|
&oid[shallow[i]]);
|
|
|
|
uint32_t **map = ref_bitmap_at(&pi.ref_bitmap, c);
|
|
|
|
if (*map)
|
|
|
|
used[shallow[i]] = xmemdupz(*map, bitmap_size);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* unreachable shallow commits are not removed from
|
|
|
|
* "ours" and "theirs". The user is supposed to run
|
|
|
|
* step 7 on every ref separately and not trust "ours"
|
|
|
|
* and "theirs" any more.
|
|
|
|
*/
|
|
|
|
} else
|
|
|
|
post_assign_shallow(info, &pi.ref_bitmap, ref_status);
|
|
|
|
|
|
|
|
clear_ref_bitmap(&pi.ref_bitmap);
|
|
|
|
for (i = 0; i < pi.pool_count; i++)
|
|
|
|
free(pi.pools[i]);
|
|
|
|
free(pi.pools);
|
|
|
|
free(shallow);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct commit_array {
|
|
|
|
struct commit **commits;
|
|
|
|
int nr, alloc;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int add_ref(const char *refname, const struct object_id *oid,
|
|
|
|
int flags, void *cb_data)
|
|
|
|
{
|
|
|
|
struct commit_array *ca = cb_data;
|
|
|
|
ALLOC_GROW(ca->commits, ca->nr + 1, ca->alloc);
|
|
|
|
ca->commits[ca->nr] = lookup_commit_reference_gently(the_repository,
|
|
|
|
oid, 1);
|
|
|
|
if (ca->commits[ca->nr])
|
|
|
|
ca->nr++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void update_refstatus(int *ref_status, int nr, uint32_t *bitmap)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
if (!ref_status)
|
|
|
|
return;
|
|
|
|
for (i = 0; i < nr; i++)
|
|
|
|
if (bitmap[i / 32] & (1U << (i % 32)))
|
|
|
|
ref_status[i]++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 7, reachability test on "ours" at commit level
|
|
|
|
*/
|
|
|
|
static void post_assign_shallow(struct shallow_info *info,
|
|
|
|
struct ref_bitmap *ref_bitmap,
|
|
|
|
int *ref_status)
|
|
|
|
{
|
|
|
|
struct object_id *oid = info->shallow->oid;
|
|
|
|
struct commit *c;
|
|
|
|
uint32_t **bitmap;
|
|
|
|
int dst, i, j;
|
|
|
|
int bitmap_nr = DIV_ROUND_UP(info->ref->nr, 32);
|
|
|
|
struct commit_array ca;
|
|
|
|
|
|
|
|
trace_printf_key(&trace_shallow, "shallow: post_assign_shallow\n");
|
|
|
|
if (ref_status)
|
|
|
|
memset(ref_status, 0, sizeof(*ref_status) * info->ref->nr);
|
|
|
|
|
|
|
|
/* Remove unreachable shallow commits from "theirs" */
|
|
|
|
for (i = dst = 0; i < info->nr_theirs; i++) {
|
|
|
|
if (i != dst)
|
|
|
|
info->theirs[dst] = info->theirs[i];
|
|
|
|
c = lookup_commit(the_repository, &oid[info->theirs[i]]);
|
|
|
|
bitmap = ref_bitmap_at(ref_bitmap, c);
|
|
|
|
if (!*bitmap)
|
|
|
|
continue;
|
|
|
|
for (j = 0; j < bitmap_nr; j++)
|
|
|
|
if (bitmap[0][j]) {
|
|
|
|
update_refstatus(ref_status, info->ref->nr, *bitmap);
|
|
|
|
dst++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
info->nr_theirs = dst;
|
|
|
|
|
|
|
|
memset(&ca, 0, sizeof(ca));
|
|
|
|
head_ref(add_ref, &ca);
|
|
|
|
for_each_ref(add_ref, &ca);
|
|
|
|
|
|
|
|
/* Remove unreachable shallow commits from "ours" */
|
|
|
|
for (i = dst = 0; i < info->nr_ours; i++) {
|
|
|
|
if (i != dst)
|
|
|
|
info->ours[dst] = info->ours[i];
|
|
|
|
c = lookup_commit(the_repository, &oid[info->ours[i]]);
|
|
|
|
bitmap = ref_bitmap_at(ref_bitmap, c);
|
|
|
|
if (!*bitmap)
|
|
|
|
continue;
|
|
|
|
for (j = 0; j < bitmap_nr; j++)
|
|
|
|
if (bitmap[0][j] &&
|
|
|
|
/* Step 7, reachability test at commit level */
|
|
|
|
!in_merge_bases_many(c, ca.nr, ca.commits)) {
|
|
|
|
update_refstatus(ref_status, info->ref->nr, *bitmap);
|
|
|
|
dst++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
info->nr_ours = dst;
|
|
|
|
|
|
|
|
free(ca.commits);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* (Delayed) step 7, reachability test at commit level */
|
|
|
|
int delayed_reachability_test(struct shallow_info *si, int c)
|
|
|
|
{
|
|
|
|
if (si->need_reachability_test[c]) {
|
|
|
|
struct commit *commit = lookup_commit(the_repository,
|
|
|
|
&si->shallow->oid[c]);
|
|
|
|
|
|
|
|
if (!si->commits) {
|
|
|
|
struct commit_array ca;
|
|
|
|
|
|
|
|
memset(&ca, 0, sizeof(ca));
|
|
|
|
head_ref(add_ref, &ca);
|
|
|
|
for_each_ref(add_ref, &ca);
|
|
|
|
si->commits = ca.commits;
|
|
|
|
si->nr_commits = ca.nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
si->reachable[c] = in_merge_bases_many(commit,
|
|
|
|
si->nr_commits,
|
|
|
|
si->commits);
|
|
|
|
si->need_reachability_test[c] = 0;
|
|
|
|
}
|
|
|
|
return si->reachable[c];
|
|
|
|
}
|