|
|
|
#include "builtin.h"
|
|
|
|
#include "cache.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "run-command.h"
|
|
|
|
#include "sigchain.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "string-list.h"
|
|
|
|
#include "argv-array.h"
|
|
|
|
|
|
|
|
static int delta_base_offset = 1;
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
static int pack_kept_objects = -1;
|
|
|
|
static int write_bitmaps;
|
|
|
|
static char *packdir, *packtmp;
|
|
|
|
|
|
|
|
static const char *const git_repack_usage[] = {
|
|
|
|
N_("git repack [<options>]"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char incremental_bitmap_conflict_error[] = N_(
|
|
|
|
"Incremental repacks are incompatible with bitmap indexes. Use\n"
|
|
|
|
"--no-write-bitmap-index or disable the pack.writebitmaps configuration."
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
static int repack_config(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
if (!strcmp(var, "repack.usedeltabaseoffset")) {
|
|
|
|
delta_base_offset = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (!strcmp(var, "repack.packkeptobjects")) {
|
|
|
|
pack_kept_objects = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(var, "repack.writebitmaps") ||
|
|
|
|
!strcmp(var, "pack.writebitmaps")) {
|
|
|
|
write_bitmaps = git_config_bool(var, value);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return git_default_config(var, value, cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove temporary $GIT_OBJECT_DIRECTORY/pack/.tmp-$$-pack-* files.
|
|
|
|
*/
|
|
|
|
static void remove_temporary_files(void)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
size_t dirlen, prefixlen;
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *e;
|
|
|
|
|
|
|
|
dir = opendir(packdir);
|
|
|
|
if (!dir)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Point at the slash at the end of ".../objects/pack/" */
|
|
|
|
dirlen = strlen(packdir) + 1;
|
|
|
|
strbuf_addstr(&buf, packtmp);
|
|
|
|
/* Hold the length of ".tmp-%d-pack-" */
|
|
|
|
prefixlen = buf.len - dirlen;
|
|
|
|
|
|
|
|
while ((e = readdir(dir))) {
|
|
|
|
if (strncmp(e->d_name, buf.buf + dirlen, prefixlen))
|
|
|
|
continue;
|
|
|
|
strbuf_setlen(&buf, dirlen);
|
|
|
|
strbuf_addstr(&buf, e->d_name);
|
|
|
|
unlink(buf.buf);
|
|
|
|
}
|
|
|
|
closedir(dir);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_pack_on_signal(int signo)
|
|
|
|
{
|
|
|
|
remove_temporary_files();
|
|
|
|
sigchain_pop(signo);
|
|
|
|
raise(signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adds all packs hex strings to the fname list, which do not
|
|
|
|
* have a corresponding .keep or .promisor file. These packs are not to
|
|
|
|
* be kept if we are going to pack everything into one file.
|
|
|
|
*/
|
|
|
|
static void get_non_kept_pack_filenames(struct string_list *fname_list)
|
|
|
|
{
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *e;
|
|
|
|
char *fname;
|
|
|
|
|
|
|
|
if (!(dir = opendir(packdir)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
while ((e = readdir(dir)) != NULL) {
|
|
|
|
size_t len;
|
|
|
|
if (!strip_suffix(e->d_name, ".pack", &len))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
fname = xmemdupz(e->d_name, len);
|
|
|
|
|
|
|
|
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
|
|
|
|
!file_exists(mkpath("%s/%s.promisor", packdir, fname)))
|
|
|
|
string_list_append_nodup(fname_list, fname);
|
|
|
|
else
|
|
|
|
free(fname);
|
|
|
|
}
|
|
|
|
closedir(dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_redundant_pack(const char *dir_name, const char *base_name)
|
|
|
|
{
|
|
|
|
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"};
|
|
|
|
int i;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
size_t plen;
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "%s/%s", dir_name, base_name);
|
|
|
|
plen = buf.len;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(exts); i++) {
|
|
|
|
strbuf_setlen(&buf, plen);
|
|
|
|
strbuf_addstr(&buf, exts[i]);
|
|
|
|
unlink(buf.buf);
|
|
|
|
}
|
|
|
|
strbuf_release(&buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ALL_INTO_ONE 1
|
|
|
|
#define LOOSEN_UNREACHABLE 2
|
|
|
|
|
|
|
|
int cmd_repack(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
struct {
|
|
|
|
const char *name;
|
|
|
|
unsigned optional:1;
|
|
|
|
} exts[] = {
|
|
|
|
{".pack"},
|
|
|
|
{".idx"},
|
|
|
|
{".bitmap", 1},
|
|
|
|
};
|
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct string_list names = STRING_LIST_INIT_DUP;
|
|
|
|
struct string_list rollback = STRING_LIST_INIT_NODUP;
|
|
|
|
struct string_list existing_packs = STRING_LIST_INIT_DUP;
|
|
|
|
struct strbuf line = STRBUF_INIT;
|
|
|
|
int ext, ret, failed;
|
|
|
|
FILE *out;
|
|
|
|
|
|
|
|
/* variables to be filled by option parsing */
|
|
|
|
int pack_everything = 0;
|
|
|
|
int delete_redundant = 0;
|
|
|
|
const char *unpack_unreachable = NULL;
|
repack: add --keep-unreachable option
The usual way to do a full repack (and what is done by
git-gc) is to run "repack -Ad --unpack-unreachable=<when>",
which will loosen any unreachable objects newer than
"<when>", and drop any older ones.
This is a safer alternative to "repack -ad", because
"<when>" becomes a grace period during which we will not
drop any new objects that are about to be referenced.
However, it isn't perfectly safe. It's always possible that
a process is about to reference an old object. Even if that
process were to take care to update the timestamp on the
object, there is no atomicity with a simultaneously running
"repack" process.
So while unlikely, there is a small race wherein we may drop
an object that is in the process of being referenced. If you
do automated repacking on a large number of active
repositories, you may hit it eventually, and the result is a
corrupted repository.
It would be nice to fix that race in the long run, but it's
complicated. In the meantime, there is a much simpler
strategy for automated repository maintenance: do not drop
objects at all. We already have a "--keep-unreachable"
option in pack-objects; we just need to plumb it through
from git-repack.
Note that this _isn't_ plumbed through from git-gc, so at
this point it's strictly a tool for people doing their own
advanced repository maintenance strategy.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
int keep_unreachable = 0;
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
const char *window = NULL, *window_memory = NULL;
|
|
|
|
const char *depth = NULL;
|
|
|
|
const char *threads = NULL;
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
const char *max_pack_size = NULL;
|
|
|
|
int no_reuse_delta = 0, no_reuse_object = 0;
|
|
|
|
int no_update_server_info = 0;
|
|
|
|
int quiet = 0;
|
|
|
|
int local = 0;
|
|
|
|
|
|
|
|
struct option builtin_repack_options[] = {
|
|
|
|
OPT_BIT('a', NULL, &pack_everything,
|
|
|
|
N_("pack everything in a single pack"), ALL_INTO_ONE),
|
|
|
|
OPT_BIT('A', NULL, &pack_everything,
|
|
|
|
N_("same as -a, and turn unreachable objects loose"),
|
|
|
|
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
|
|
|
|
OPT_BOOL('d', NULL, &delete_redundant,
|
|
|
|
N_("remove redundant packs, and run git-prune-packed")),
|
|
|
|
OPT_BOOL('f', NULL, &no_reuse_delta,
|
|
|
|
N_("pass --no-reuse-delta to git-pack-objects")),
|
|
|
|
OPT_BOOL('F', NULL, &no_reuse_object,
|
|
|
|
N_("pass --no-reuse-object to git-pack-objects")),
|
|
|
|
OPT_BOOL('n', NULL, &no_update_server_info,
|
|
|
|
N_("do not run git-update-server-info")),
|
|
|
|
OPT__QUIET(&quiet, N_("be quiet")),
|
|
|
|
OPT_BOOL('l', "local", &local,
|
|
|
|
N_("pass --local to git-pack-objects")),
|
|
|
|
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
|
|
|
|
N_("write bitmap index")),
|
|
|
|
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
|
|
|
|
N_("with -A, do not loosen objects older than this")),
|
repack: add --keep-unreachable option
The usual way to do a full repack (and what is done by
git-gc) is to run "repack -Ad --unpack-unreachable=<when>",
which will loosen any unreachable objects newer than
"<when>", and drop any older ones.
This is a safer alternative to "repack -ad", because
"<when>" becomes a grace period during which we will not
drop any new objects that are about to be referenced.
However, it isn't perfectly safe. It's always possible that
a process is about to reference an old object. Even if that
process were to take care to update the timestamp on the
object, there is no atomicity with a simultaneously running
"repack" process.
So while unlikely, there is a small race wherein we may drop
an object that is in the process of being referenced. If you
do automated repacking on a large number of active
repositories, you may hit it eventually, and the result is a
corrupted repository.
It would be nice to fix that race in the long run, but it's
complicated. In the meantime, there is a much simpler
strategy for automated repository maintenance: do not drop
objects at all. We already have a "--keep-unreachable"
option in pack-objects; we just need to plumb it through
from git-repack.
Note that this _isn't_ plumbed through from git-gc, so at
this point it's strictly a tool for people doing their own
advanced repository maintenance strategy.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
|
|
|
|
N_("with -a, repack unreachable objects")),
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
OPT_STRING(0, "window", &window, N_("n"),
|
|
|
|
N_("size of the window used for delta compression")),
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
OPT_STRING(0, "window-memory", &window_memory, N_("bytes"),
|
|
|
|
N_("same as the above, but limit memory size instead of entries count")),
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
OPT_STRING(0, "depth", &depth, N_("n"),
|
|
|
|
N_("limits the maximum delta depth")),
|
|
|
|
OPT_STRING(0, "threads", &threads, N_("n"),
|
|
|
|
N_("limits the maximum number of threads")),
|
repack: propagate pack-objects options as strings
In the original shell version of git-repack, any options
destined for pack-objects were left as strings, and passed
as a whole. Since the C rewrite in commit a1bbc6c (repack:
rewrite the shell script in C, 2013-09-15), we now parse
these values to integers internally, then reformat the
integers when passing the option to pack-objects.
This has the advantage that we catch format errors earlier
(i.e., when repack is invoked, rather than when pack-objects
is invoked).
It has three disadvantages, though:
1. Our internal data types may not be the right size. In
the case of "--window-memory" and "--max-pack-size",
these are "unsigned long" in pack-objects, but we can
only represent a regular "int".
2. Our parsing routines might not be the same as those of
pack-objects. For the two options above, pack-objects
understands "100m" to mean "100 megabytes", but repack
does not.
3. We have to keep a sentinel value to know whether it is
worth passing the option along. In the case of
"--window-memory", we currently do not pass it if the
value is "0". But that is a meaningful value to
pack-objects, where it overrides any configured value.
We can fix all of these by simply passing the strings from
the user along to pack-objects verbatim. This does not
actually fix anything for "--depth" or "--window", but these
are converted, too, for consistency.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
OPT_STRING(0, "max-pack-size", &max_pack_size, N_("bytes"),
|
|
|
|
N_("maximum size of each packfile")),
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
|
|
|
|
N_("repack objects in packs marked with .keep")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
git_config(repack_config, NULL);
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, prefix, builtin_repack_options,
|
|
|
|
git_repack_usage, 0);
|
|
|
|
|
|
|
|
if (delete_redundant && repository_format_precious_objects)
|
|
|
|
die(_("cannot delete packs in a precious-objects repo"));
|
|
|
|
|
repack: add --keep-unreachable option
The usual way to do a full repack (and what is done by
git-gc) is to run "repack -Ad --unpack-unreachable=<when>",
which will loosen any unreachable objects newer than
"<when>", and drop any older ones.
This is a safer alternative to "repack -ad", because
"<when>" becomes a grace period during which we will not
drop any new objects that are about to be referenced.
However, it isn't perfectly safe. It's always possible that
a process is about to reference an old object. Even if that
process were to take care to update the timestamp on the
object, there is no atomicity with a simultaneously running
"repack" process.
So while unlikely, there is a small race wherein we may drop
an object that is in the process of being referenced. If you
do automated repacking on a large number of active
repositories, you may hit it eventually, and the result is a
corrupted repository.
It would be nice to fix that race in the long run, but it's
complicated. In the meantime, there is a much simpler
strategy for automated repository maintenance: do not drop
objects at all. We already have a "--keep-unreachable"
option in pack-objects; we just need to plumb it through
from git-repack.
Note that this _isn't_ plumbed through from git-gc, so at
this point it's strictly a tool for people doing their own
advanced repository maintenance strategy.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
if (keep_unreachable &&
|
|
|
|
(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
|
|
|
|
die(_("--keep-unreachable and -A are incompatible"));
|
|
|
|
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (pack_kept_objects < 0)
|
|
|
|
pack_kept_objects = write_bitmaps;
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
|
|
|
|
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
|
|
|
|
die(_(incremental_bitmap_conflict_error));
|
|
|
|
|
|
|
|
packdir = mkpathdup("%s/pack", get_object_directory());
|
|
|
|
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
|
|
|
|
|
|
|
|
sigchain_push_common(remove_pack_on_signal);
|
|
|
|
|
|
|
|
argv_array_push(&cmd.args, "pack-objects");
|
|
|
|
argv_array_push(&cmd.args, "--keep-true-parents");
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (!pack_kept_objects)
|
|
|
|
argv_array_push(&cmd.args, "--honor-pack-keep");
|
|
|
|
argv_array_push(&cmd.args, "--non-empty");
|
|
|
|
argv_array_push(&cmd.args, "--all");
|
|
|
|
argv_array_push(&cmd.args, "--reflog");
|
|
|
|
argv_array_push(&cmd.args, "--indexed-objects");
|
|
|
|
if (repository_format_partial_clone)
|
|
|
|
argv_array_push(&cmd.args, "--exclude-promisor-objects");
|
|
|
|
if (window)
|
|
|
|
argv_array_pushf(&cmd.args, "--window=%s", window);
|
|
|
|
if (window_memory)
|
|
|
|
argv_array_pushf(&cmd.args, "--window-memory=%s", window_memory);
|
|
|
|
if (depth)
|
|
|
|
argv_array_pushf(&cmd.args, "--depth=%s", depth);
|
|
|
|
if (threads)
|
|
|
|
argv_array_pushf(&cmd.args, "--threads=%s", threads);
|
|
|
|
if (max_pack_size)
|
|
|
|
argv_array_pushf(&cmd.args, "--max-pack-size=%s", max_pack_size);
|
|
|
|
if (no_reuse_delta)
|
|
|
|
argv_array_pushf(&cmd.args, "--no-reuse-delta");
|
|
|
|
if (no_reuse_object)
|
|
|
|
argv_array_pushf(&cmd.args, "--no-reuse-object");
|
|
|
|
if (write_bitmaps)
|
|
|
|
argv_array_push(&cmd.args, "--write-bitmap-index");
|
|
|
|
|
|
|
|
if (pack_everything & ALL_INTO_ONE) {
|
|
|
|
get_non_kept_pack_filenames(&existing_packs);
|
|
|
|
|
|
|
|
if (existing_packs.nr && delete_redundant) {
|
|
|
|
if (unpack_unreachable) {
|
|
|
|
argv_array_pushf(&cmd.args,
|
|
|
|
"--unpack-unreachable=%s",
|
|
|
|
unpack_unreachable);
|
|
|
|
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
|
|
|
|
} else if (pack_everything & LOOSEN_UNREACHABLE) {
|
|
|
|
argv_array_push(&cmd.args,
|
|
|
|
"--unpack-unreachable");
|
repack: add --keep-unreachable option
The usual way to do a full repack (and what is done by
git-gc) is to run "repack -Ad --unpack-unreachable=<when>",
which will loosen any unreachable objects newer than
"<when>", and drop any older ones.
This is a safer alternative to "repack -ad", because
"<when>" becomes a grace period during which we will not
drop any new objects that are about to be referenced.
However, it isn't perfectly safe. It's always possible that
a process is about to reference an old object. Even if that
process were to take care to update the timestamp on the
object, there is no atomicity with a simultaneously running
"repack" process.
So while unlikely, there is a small race wherein we may drop
an object that is in the process of being referenced. If you
do automated repacking on a large number of active
repositories, you may hit it eventually, and the result is a
corrupted repository.
It would be nice to fix that race in the long run, but it's
complicated. In the meantime, there is a much simpler
strategy for automated repository maintenance: do not drop
objects at all. We already have a "--keep-unreachable"
option in pack-objects; we just need to plumb it through
from git-repack.
Note that this _isn't_ plumbed through from git-gc, so at
this point it's strictly a tool for people doing their own
advanced repository maintenance strategy.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
} else if (keep_unreachable) {
|
|
|
|
argv_array_push(&cmd.args, "--keep-unreachable");
|
repack: extend --keep-unreachable to loose objects
If you use "repack -adk" currently, we will pack all objects
that are already packed into the new pack, and then drop the
old packs. However, loose unreachable objects will be left
as-is. In theory these are meant to expire eventually with
"git prune". But if you are using "repack -k", you probably
want to keep things forever and therefore do not run "git
prune" at all. Meaning those loose objects may build up over
time and end up fooling any object-count heuristics (such as
the one done by "gc --auto", though since git-gc does not
support "repack -k", this really applies to whatever custom
scripts people might have driving "repack -k").
With this patch, we instead stuff any loose unreachable
objects into the pack along with the already-packed
unreachable objects. This may seem wasteful, but it is
really no more so than using "repack -k" in the first place.
We are at a slight disadvantage, in that we have no useful
ordering for the result, or names to hand to the delta code.
However, this is again no worse than what "repack -k" is
already doing for the packed objects. The packing of these
objects doesn't matter much because they should not be
accessed frequently (unless they actually _do_ become
referenced, but then they would get moved to a different
part of the packfile during the next repack).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
argv_array_push(&cmd.args, "--pack-loose-unreachable");
|
|
|
|
} else {
|
|
|
|
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
argv_array_push(&cmd.args, "--unpacked");
|
|
|
|
argv_array_push(&cmd.args, "--incremental");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (local)
|
|
|
|
argv_array_push(&cmd.args, "--local");
|
|
|
|
if (quiet)
|
|
|
|
argv_array_push(&cmd.args, "--quiet");
|
|
|
|
if (delta_base_offset)
|
|
|
|
argv_array_push(&cmd.args, "--delta-base-offset");
|
|
|
|
|
|
|
|
argv_array_push(&cmd.args, packtmp);
|
|
|
|
|
|
|
|
cmd.git_cmd = 1;
|
|
|
|
cmd.out = -1;
|
|
|
|
cmd.no_stdin = 1;
|
|
|
|
|
|
|
|
ret = start_command(&cmd);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
out = xfdopen(cmd.out, "r");
|
|
|
|
while (strbuf_getline_lf(&line, out) != EOF) {
|
|
|
|
if (line.len != 40)
|
|
|
|
die("repack: Expecting 40 character sha1 lines only from pack-objects.");
|
|
|
|
string_list_append(&names, line.buf);
|
|
|
|
}
|
|
|
|
fclose(out);
|
|
|
|
ret = finish_command(&cmd);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (!names.nr && !quiet)
|
|
|
|
printf("Nothing new to pack.\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ok we have prepared all new packfiles.
|
|
|
|
* First see if there are packs of the same name and if so
|
|
|
|
* if we can move them out of the way (this can happen if we
|
|
|
|
* repacked immediately after packing fully.
|
|
|
|
*/
|
|
|
|
failed = 0;
|
|
|
|
for_each_string_list_item(item, &names) {
|
|
|
|
for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
|
|
|
|
char *fname, *fname_old;
|
|
|
|
fname = mkpathdup("%s/pack-%s%s", packdir,
|
|
|
|
item->string, exts[ext].name);
|
|
|
|
if (!file_exists(fname)) {
|
|
|
|
free(fname);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
fname_old = mkpathdup("%s/old-%s%s", packdir,
|
|
|
|
item->string, exts[ext].name);
|
|
|
|
if (file_exists(fname_old))
|
|
|
|
if (unlink(fname_old))
|
|
|
|
failed = 1;
|
|
|
|
|
|
|
|
if (!failed && rename(fname, fname_old)) {
|
|
|
|
free(fname);
|
|
|
|
free(fname_old);
|
|
|
|
failed = 1;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
string_list_append(&rollback, fname);
|
|
|
|
free(fname_old);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (failed)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (failed) {
|
|
|
|
struct string_list rollback_failure = STRING_LIST_INIT_DUP;
|
|
|
|
for_each_string_list_item(item, &rollback) {
|
|
|
|
char *fname, *fname_old;
|
|
|
|
fname = mkpathdup("%s/%s", packdir, item->string);
|
|
|
|
fname_old = mkpathdup("%s/old-%s", packdir, item->string);
|
|
|
|
if (rename(fname_old, fname))
|
|
|
|
string_list_append(&rollback_failure, fname);
|
|
|
|
free(fname);
|
|
|
|
free(fname_old);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rollback_failure.nr) {
|
|
|
|
int i;
|
|
|
|
fprintf(stderr,
|
|
|
|
"WARNING: Some packs in use have been renamed by\n"
|
|
|
|
"WARNING: prefixing old- to their name, in order to\n"
|
|
|
|
"WARNING: replace them with the new version of the\n"
|
|
|
|
"WARNING: file. But the operation failed, and the\n"
|
|
|
|
"WARNING: attempt to rename them back to their\n"
|
|
|
|
"WARNING: original names also failed.\n"
|
|
|
|
"WARNING: Please rename them in %s manually:\n", packdir);
|
|
|
|
for (i = 0; i < rollback_failure.nr; i++)
|
|
|
|
fprintf(stderr, "WARNING: old-%s -> %s\n",
|
|
|
|
rollback_failure.items[i].string,
|
|
|
|
rollback_failure.items[i].string);
|
|
|
|
}
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now the ones with the same name are out of the way... */
|
|
|
|
for_each_string_list_item(item, &names) {
|
|
|
|
for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
|
|
|
|
char *fname, *fname_old;
|
|
|
|
struct stat statbuffer;
|
|
|
|
int exists = 0;
|
|
|
|
fname = mkpathdup("%s/pack-%s%s",
|
|
|
|
packdir, item->string, exts[ext].name);
|
|
|
|
fname_old = mkpathdup("%s-%s%s",
|
|
|
|
packtmp, item->string, exts[ext].name);
|
|
|
|
if (!stat(fname_old, &statbuffer)) {
|
|
|
|
statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
|
|
|
|
chmod(fname_old, statbuffer.st_mode);
|
|
|
|
exists = 1;
|
|
|
|
}
|
|
|
|
if (exists || !exts[ext].optional) {
|
|
|
|
if (rename(fname_old, fname))
|
|
|
|
die_errno(_("renaming '%s' failed"), fname_old);
|
|
|
|
}
|
|
|
|
free(fname);
|
|
|
|
free(fname_old);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the "old-" files */
|
|
|
|
for_each_string_list_item(item, &names) {
|
|
|
|
for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
|
|
|
|
char *fname;
|
|
|
|
fname = mkpathdup("%s/old-%s%s",
|
|
|
|
packdir,
|
|
|
|
item->string,
|
|
|
|
exts[ext].name);
|
|
|
|
if (remove_path(fname))
|
|
|
|
warning(_("failed to remove '%s'"), fname);
|
|
|
|
free(fname);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* End of pack replacement. */
|
|
|
|
|
|
|
|
if (delete_redundant) {
|
|
|
|
int opts = 0;
|
|
|
|
string_list_sort(&names);
|
|
|
|
for_each_string_list_item(item, &existing_packs) {
|
|
|
|
char *sha1;
|
|
|
|
size_t len = strlen(item->string);
|
|
|
|
if (len < 40)
|
|
|
|
continue;
|
|
|
|
sha1 = item->string + len - 40;
|
|
|
|
if (!string_list_has_string(&names, sha1))
|
|
|
|
remove_redundant_pack(packdir, item->string);
|
|
|
|
}
|
|
|
|
if (!quiet && isatty(2))
|
|
|
|
opts |= PRUNE_PACKED_VERBOSE;
|
|
|
|
prune_packed_objects(opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!no_update_server_info)
|
|
|
|
update_server_info(0);
|
|
|
|
remove_temporary_files();
|
|
|
|
string_list_clear(&names, 0);
|
|
|
|
string_list_clear(&rollback, 0);
|
|
|
|
string_list_clear(&existing_packs, 0);
|
|
|
|
strbuf_release(&line);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|