|
|
|
#include "builtin.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "delta.h"
|
|
|
|
#include "pack.h"
|
|
|
|
#include "csum-file.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "progress.h"
|
|
|
|
#include "fsck.h"
|
|
|
|
#include "exec-cmd.h"
|
|
|
|
#include "streaming.h"
|
|
|
|
#include "thread-utils.h"
|
|
|
|
#include "packfile.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "promisor-remote.h"
|
|
|
|
|
|
|
|
static const char index_pack_usage[] =
|
|
|
|
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
|
|
|
|
|
|
|
|
struct object_entry {
|
|
|
|
struct pack_idx_entry idx;
|
|
|
|
unsigned long size;
|
|
|
|
unsigned char hdr_size;
|
|
|
|
signed char type;
|
|
|
|
signed char real_type;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct object_stat {
|
|
|
|
unsigned delta_depth;
|
|
|
|
int base_object_no;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct base_data {
|
|
|
|
/* Initialized by make_base(). */
|
|
|
|
struct base_data *base;
|
|
|
|
struct object_entry *obj;
|
|
|
|
int ref_first, ref_last;
|
|
|
|
int ofs_first, ofs_last;
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
/*
|
|
|
|
* Threads should increment retain_data if they are about to call
|
|
|
|
* patch_delta() using this struct's data as a base, and decrement this
|
|
|
|
* when they are done. While retain_data is nonzero, this struct's data
|
|
|
|
* will not be freed even if the delta base cache limit is exceeded.
|
|
|
|
*/
|
|
|
|
int retain_data;
|
|
|
|
/*
|
|
|
|
* The number of direct children that have not been fully processed
|
|
|
|
* (entered work_head, entered done_head, left done_head). When this
|
|
|
|
* number reaches zero, this struct base_data can be freed.
|
|
|
|
*/
|
|
|
|
int children_remaining;
|
|
|
|
|
|
|
|
/* Not initialized by make_base(). */
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
struct list_head list;
|
|
|
|
void *data;
|
|
|
|
unsigned long size;
|
|
|
|
};
|
|
|
|
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
/*
|
|
|
|
* Stack of struct base_data that have unprocessed children.
|
|
|
|
* threaded_second_pass() uses this as a source of work (the other being the
|
|
|
|
* objects array).
|
|
|
|
*
|
|
|
|
* Guarded by work_mutex.
|
|
|
|
*/
|
|
|
|
static LIST_HEAD(work_head);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Stack of struct base_data that have children, all of whom have been
|
|
|
|
* processed or are being processed, and at least one child is being processed.
|
|
|
|
* These struct base_data must be kept around until the last child is
|
|
|
|
* processed.
|
|
|
|
*
|
|
|
|
* Guarded by work_mutex.
|
|
|
|
*/
|
|
|
|
static LIST_HEAD(done_head);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All threads share one delta base cache.
|
|
|
|
*
|
|
|
|
* base_cache_used is guarded by work_mutex, and base_cache_limit is read-only
|
|
|
|
* in a thread.
|
|
|
|
*/
|
|
|
|
static size_t base_cache_used;
|
|
|
|
static size_t base_cache_limit;
|
|
|
|
|
|
|
|
struct thread_local {
|
|
|
|
pthread_t thread;
|
|
|
|
int pack_fd;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Remember to update object flag allocation in object.h */
|
|
|
|
#define FLAG_LINK (1u<<20)
|
|
|
|
#define FLAG_CHECKED (1u<<21)
|
|
|
|
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ofs_delta_entry {
|
|
|
|
off_t offset;
|
|
|
|
int obj_no;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ref_delta_entry {
|
|
|
|
struct object_id oid;
|
|
|
|
int obj_no;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct object_entry *objects;
|
|
|
|
static struct object_stat *obj_stat;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static struct ofs_delta_entry *ofs_deltas;
|
|
|
|
static struct ref_delta_entry *ref_deltas;
|
|
|
|
static struct thread_local nothread_data;
|
|
|
|
static int nr_objects;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static int nr_ofs_deltas;
|
|
|
|
static int nr_ref_deltas;
|
|
|
|
static int ref_deltas_alloc;
|
|
|
|
static int nr_resolved_deltas;
|
|
|
|
static int nr_threads;
|
|
|
|
|
|
|
|
static int from_stdin;
|
|
|
|
static int strict;
|
|
|
|
static int do_fsck_object;
|
|
|
|
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
|
|
|
|
static int verbose;
|
index-pack: add flag for showing delta-resolution progress
The index-pack command has two progress meters: one for
"receiving objects", and one for "resolving deltas". You get
neither by default, or both with "-v".
But for a push through receive-pack, we would want only the
"resolving deltas" phase, _not_ the "receiving objects"
progress. There are two reasons for this.
One is simply that existing clients are already printing
"writing objects" progress at the same time. Arguably
"receiving" from the far end is more useful, because it
tells you what has actually gotten there, as opposed to what
might be stuck in a buffer somewhere between the client and
server. But that would require a protocol extension to tell
clients not to print their progress. Possible, but
complexity for little gain.
The second reason is much more important. In a full-duplex
connection like git-over-ssh, we can print progress while
the pack is incoming, and it will immediately get to the
client. But for a half-duplex connection like git-over-http,
we should not say anything until we have received the full
request. Anything we write is subject to being stuck in a
buffer by the webserver. Worse, we can end up in a deadlock
if that buffer fills up.
So our best bet is to avoid writing anything that isn't a
small fixed size until we've received the full pack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
static int show_resolving_progress;
|
|
|
|
static int show_stat;
|
|
|
|
static int check_self_contained_and_connected;
|
|
|
|
|
|
|
|
static struct progress *progress;
|
|
|
|
|
|
|
|
/* We always read in 4kB chunks. */
|
|
|
|
static unsigned char input_buffer[4096];
|
|
|
|
static unsigned int input_offset, input_len;
|
|
|
|
static off_t consumed_bytes;
|
|
|
|
static off_t max_input_size;
|
|
|
|
static unsigned deepest_delta;
|
|
|
|
static git_hash_ctx input_ctx;
|
|
|
|
static uint32_t input_crc32;
|
|
|
|
static int input_fd, output_fd;
|
|
|
|
static const char *curr_pack;
|
|
|
|
|
|
|
|
static struct thread_local *thread_data;
|
|
|
|
static int nr_dispatched;
|
|
|
|
static int threads_active;
|
|
|
|
|
|
|
|
static pthread_mutex_t read_mutex;
|
|
|
|
#define read_lock() lock_mutex(&read_mutex)
|
|
|
|
#define read_unlock() unlock_mutex(&read_mutex)
|
|
|
|
|
|
|
|
static pthread_mutex_t counter_mutex;
|
|
|
|
#define counter_lock() lock_mutex(&counter_mutex)
|
|
|
|
#define counter_unlock() unlock_mutex(&counter_mutex)
|
|
|
|
|
|
|
|
static pthread_mutex_t work_mutex;
|
|
|
|
#define work_lock() lock_mutex(&work_mutex)
|
|
|
|
#define work_unlock() unlock_mutex(&work_mutex)
|
|
|
|
|
|
|
|
static pthread_mutex_t deepest_delta_mutex;
|
|
|
|
#define deepest_delta_lock() lock_mutex(&deepest_delta_mutex)
|
|
|
|
#define deepest_delta_unlock() unlock_mutex(&deepest_delta_mutex)
|
|
|
|
|
|
|
|
static pthread_key_t key;
|
|
|
|
|
|
|
|
static inline void lock_mutex(pthread_mutex_t *mutex)
|
|
|
|
{
|
|
|
|
if (threads_active)
|
|
|
|
pthread_mutex_lock(mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void unlock_mutex(pthread_mutex_t *mutex)
|
|
|
|
{
|
|
|
|
if (threads_active)
|
|
|
|
pthread_mutex_unlock(mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mutex and conditional variable can't be statically-initialized on Windows.
|
|
|
|
*/
|
|
|
|
static void init_thread(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
init_recursive_mutex(&read_mutex);
|
|
|
|
pthread_mutex_init(&counter_mutex, NULL);
|
|
|
|
pthread_mutex_init(&work_mutex, NULL);
|
|
|
|
if (show_stat)
|
|
|
|
pthread_mutex_init(&deepest_delta_mutex, NULL);
|
|
|
|
pthread_key_create(&key, NULL);
|
|
|
|
thread_data = xcalloc(nr_threads, sizeof(*thread_data));
|
|
|
|
for (i = 0; i < nr_threads; i++) {
|
|
|
|
thread_data[i].pack_fd = open(curr_pack, O_RDONLY);
|
|
|
|
if (thread_data[i].pack_fd == -1)
|
|
|
|
die_errno(_("unable to open %s"), curr_pack);
|
|
|
|
}
|
|
|
|
|
|
|
|
threads_active = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cleanup_thread(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
if (!threads_active)
|
|
|
|
return;
|
|
|
|
threads_active = 0;
|
|
|
|
pthread_mutex_destroy(&read_mutex);
|
|
|
|
pthread_mutex_destroy(&counter_mutex);
|
|
|
|
pthread_mutex_destroy(&work_mutex);
|
|
|
|
if (show_stat)
|
|
|
|
pthread_mutex_destroy(&deepest_delta_mutex);
|
|
|
|
for (i = 0; i < nr_threads; i++)
|
|
|
|
close(thread_data[i].pack_fd);
|
|
|
|
pthread_key_delete(key);
|
|
|
|
free(thread_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mark_link(struct object *obj, int type, void *data, struct fsck_options *options)
|
|
|
|
{
|
|
|
|
if (!obj)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (type != OBJ_ANY && obj->type != type)
|
|
|
|
die(_("object type mismatch at %s"), oid_to_hex(&obj->oid));
|
|
|
|
|
|
|
|
obj->flags |= FLAG_LINK;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The content of each linked object must have been checked
|
|
|
|
or it must be already present in the object database */
|
|
|
|
static unsigned check_object(struct object *obj)
|
|
|
|
{
|
|
|
|
if (!obj)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!(obj->flags & FLAG_LINK))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!(obj->flags & FLAG_CHECKED)) {
|
|
|
|
unsigned long size;
|
|
|
|
int type = oid_object_info(the_repository, &obj->oid, &size);
|
|
|
|
if (type <= 0)
|
|
|
|
die(_("did not receive expected object %s"),
|
|
|
|
oid_to_hex(&obj->oid));
|
|
|
|
if (type != obj->type)
|
|
|
|
die(_("object %s: expected type %s, found %s"),
|
|
|
|
oid_to_hex(&obj->oid),
|
|
|
|
type_name(obj->type), type_name(type));
|
|
|
|
obj->flags |= FLAG_CHECKED;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned check_objects(void)
|
|
|
|
{
|
|
|
|
unsigned i, max, foreign_nr = 0;
|
|
|
|
|
|
|
|
max = get_max_object_index();
|
|
|
|
|
|
|
|
if (verbose)
|
|
|
|
progress = start_delayed_progress(_("Checking objects"), max);
|
|
|
|
|
|
|
|
for (i = 0; i < max; i++) {
|
|
|
|
foreign_nr += check_object(get_indexed_object(i));
|
|
|
|
display_progress(progress, i + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
stop_progress(&progress);
|
|
|
|
return foreign_nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Discard current buffer used content. */
|
|
|
|
static void flush(void)
|
|
|
|
{
|
|
|
|
if (input_offset) {
|
|
|
|
if (output_fd >= 0)
|
|
|
|
write_or_die(output_fd, input_buffer, input_offset);
|
|
|
|
the_hash_algo->update_fn(&input_ctx, input_buffer, input_offset);
|
|
|
|
memmove(input_buffer, input_buffer + input_offset, input_len);
|
|
|
|
input_offset = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure at least "min" bytes are available in the buffer, and
|
|
|
|
* return the pointer to the buffer.
|
|
|
|
*/
|
|
|
|
static void *fill(int min)
|
|
|
|
{
|
|
|
|
if (min <= input_len)
|
|
|
|
return input_buffer + input_offset;
|
|
|
|
if (min > sizeof(input_buffer))
|
|
|
|
die(Q_("cannot fill %d byte",
|
|
|
|
"cannot fill %d bytes",
|
|
|
|
min),
|
|
|
|
min);
|
|
|
|
flush();
|
|
|
|
do {
|
|
|
|
ssize_t ret = xread(input_fd, input_buffer + input_len,
|
|
|
|
sizeof(input_buffer) - input_len);
|
|
|
|
if (ret <= 0) {
|
|
|
|
if (!ret)
|
|
|
|
die(_("early EOF"));
|
|
|
|
die_errno(_("read error on input"));
|
|
|
|
}
|
|
|
|
input_len += ret;
|
|
|
|
if (from_stdin)
|
|
|
|
display_throughput(progress, consumed_bytes + input_len);
|
|
|
|
} while (input_len < min);
|
|
|
|
return input_buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void use(int bytes)
|
|
|
|
{
|
|
|
|
if (bytes > input_len)
|
|
|
|
die(_("used more bytes than were available"));
|
|
|
|
input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
|
|
|
|
input_len -= bytes;
|
|
|
|
input_offset += bytes;
|
|
|
|
|
|
|
|
/* make sure off_t is sufficiently large not to wrap */
|
|
|
|
if (signed_add_overflows(consumed_bytes, bytes))
|
|
|
|
die(_("pack too large for current definition of off_t"));
|
|
|
|
consumed_bytes += bytes;
|
|
|
|
if (max_input_size && consumed_bytes > max_input_size)
|
|
|
|
die(_("pack exceeds maximum allowed size"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *open_pack_file(const char *pack_name)
|
|
|
|
{
|
|
|
|
if (from_stdin) {
|
|
|
|
input_fd = 0;
|
|
|
|
if (!pack_name) {
|
|
|
|
struct strbuf tmp_file = STRBUF_INIT;
|
|
|
|
output_fd = odb_mkstemp(&tmp_file,
|
|
|
|
"pack/tmp_pack_XXXXXX");
|
|
|
|
pack_name = strbuf_detach(&tmp_file, NULL);
|
|
|
|
} else {
|
|
|
|
output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
|
|
|
|
if (output_fd < 0)
|
|
|
|
die_errno(_("unable to create '%s'"), pack_name);
|
|
|
|
}
|
|
|
|
nothread_data.pack_fd = output_fd;
|
|
|
|
} else {
|
|
|
|
input_fd = open(pack_name, O_RDONLY);
|
|
|
|
if (input_fd < 0)
|
|
|
|
die_errno(_("cannot open packfile '%s'"), pack_name);
|
|
|
|
output_fd = -1;
|
|
|
|
nothread_data.pack_fd = input_fd;
|
|
|
|
}
|
|
|
|
the_hash_algo->init_fn(&input_ctx);
|
|
|
|
return pack_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_pack_header(void)
|
|
|
|
{
|
|
|
|
struct pack_header *hdr = fill(sizeof(struct pack_header));
|
|
|
|
|
|
|
|
/* Header consistency check */
|
|
|
|
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
|
|
|
|
die(_("pack signature mismatch"));
|
|
|
|
if (!pack_version_ok(hdr->hdr_version))
|
|
|
|
die(_("pack version %"PRIu32" unsupported"),
|
|
|
|
ntohl(hdr->hdr_version));
|
|
|
|
|
|
|
|
nr_objects = ntohl(hdr->hdr_entries);
|
|
|
|
use(sizeof(struct pack_header));
|
|
|
|
}
|
|
|
|
|
|
|
|
static NORETURN void bad_object(off_t offset, const char *format,
|
|
|
|
...) __attribute__((format (printf, 2, 3)));
|
|
|
|
|
|
|
|
static NORETURN void bad_object(off_t offset, const char *format, ...)
|
|
|
|
{
|
|
|
|
va_list params;
|
|
|
|
char buf[1024];
|
|
|
|
|
|
|
|
va_start(params, format);
|
|
|
|
vsnprintf(buf, sizeof(buf), format, params);
|
|
|
|
va_end(params);
|
|
|
|
die(_("pack has bad object at offset %"PRIuMAX": %s"),
|
|
|
|
(uintmax_t)offset, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct thread_local *get_thread_data(void)
|
|
|
|
{
|
|
|
|
if (HAVE_THREADS) {
|
|
|
|
if (threads_active)
|
|
|
|
return pthread_getspecific(key);
|
|
|
|
assert(!threads_active &&
|
|
|
|
"This should only be reached when all threads are gone");
|
|
|
|
}
|
|
|
|
return ¬hread_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_thread_data(struct thread_local *data)
|
|
|
|
{
|
|
|
|
if (threads_active)
|
|
|
|
pthread_setspecific(key, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_base_data(struct base_data *c)
|
|
|
|
{
|
|
|
|
if (c->data) {
|
|
|
|
FREE_AND_NULL(c->data);
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
base_cache_used -= c->size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void prune_base_data(struct base_data *retain)
|
|
|
|
{
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
struct list_head *pos;
|
|
|
|
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (base_cache_used <= base_cache_limit)
|
|
|
|
return;
|
|
|
|
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
list_for_each_prev(pos, &done_head) {
|
|
|
|
struct base_data *b = list_entry(pos, struct base_data, list);
|
|
|
|
if (b->retain_data || b == retain)
|
|
|
|
continue;
|
|
|
|
if (b->data) {
|
|
|
|
free_base_data(b);
|
|
|
|
if (base_cache_used <= base_cache_limit)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
list_for_each_prev(pos, &work_head) {
|
|
|
|
struct base_data *b = list_entry(pos, struct base_data, list);
|
|
|
|
if (b->retain_data || b == retain)
|
|
|
|
continue;
|
|
|
|
if (b->data) {
|
|
|
|
free_base_data(b);
|
|
|
|
if (base_cache_used <= base_cache_limit)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int is_delta_type(enum object_type type)
|
|
|
|
{
|
|
|
|
return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *unpack_entry_data(off_t offset, unsigned long size,
|
|
|
|
enum object_type type, struct object_id *oid)
|
|
|
|
{
|
|
|
|
static char fixed_buf[8192];
|
|
|
|
int status;
|
|
|
|
git_zstream stream;
|
|
|
|
void *buf;
|
|
|
|
git_hash_ctx c;
|
|
|
|
char hdr[32];
|
|
|
|
int hdrlen;
|
|
|
|
|
|
|
|
if (!is_delta_type(type)) {
|
|
|
|
hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX,
|
|
|
|
type_name(type),(uintmax_t)size) + 1;
|
|
|
|
the_hash_algo->init_fn(&c);
|
|
|
|
the_hash_algo->update_fn(&c, hdr, hdrlen);
|
|
|
|
} else
|
|
|
|
oid = NULL;
|
|
|
|
if (type == OBJ_BLOB && size > big_file_threshold)
|
|
|
|
buf = fixed_buf;
|
|
|
|
else
|
|
|
|
buf = xmallocz(size);
|
|
|
|
|
|
|
|
memset(&stream, 0, sizeof(stream));
|
|
|
|
git_inflate_init(&stream);
|
|
|
|
stream.next_out = buf;
|
|
|
|
stream.avail_out = buf == fixed_buf ? sizeof(fixed_buf) : size;
|
|
|
|
|
|
|
|
do {
|
|
|
|
unsigned char *last_out = stream.next_out;
|
|
|
|
stream.next_in = fill(1);
|
|
|
|
stream.avail_in = input_len;
|
|
|
|
status = git_inflate(&stream, 0);
|
|
|
|
use(input_len - stream.avail_in);
|
|
|
|
if (oid)
|
|
|
|
the_hash_algo->update_fn(&c, last_out, stream.next_out - last_out);
|
|
|
|
if (buf == fixed_buf) {
|
|
|
|
stream.next_out = buf;
|
|
|
|
stream.avail_out = sizeof(fixed_buf);
|
|
|
|
}
|
|
|
|
} while (status == Z_OK);
|
|
|
|
if (stream.total_out != size || status != Z_STREAM_END)
|
|
|
|
bad_object(offset, _("inflate returned %d"), status);
|
|
|
|
git_inflate_end(&stream);
|
|
|
|
if (oid)
|
|
|
|
the_hash_algo->final_fn(oid->hash, &c);
|
|
|
|
return buf == fixed_buf ? NULL : buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *unpack_raw_entry(struct object_entry *obj,
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
off_t *ofs_offset,
|
|
|
|
struct object_id *ref_oid,
|
|
|
|
struct object_id *oid)
|
|
|
|
{
|
Fix big left-shifts of unsigned char
Shifting 'unsigned char' or 'unsigned short' left can result in sign
extension errors, since the C integer promotion rules means that the
unsigned char/short will get implicitly promoted to a signed 'int' due to
the shift (or due to other operations).
This normally doesn't matter, but if you shift things up sufficiently, it
will now set the sign bit in 'int', and a subsequent cast to a bigger type
(eg 'long' or 'unsigned long') will now sign-extend the value despite the
original expression being unsigned.
One example of this would be something like
unsigned long size;
unsigned char c;
size += c << 24;
where despite all the variables being unsigned, 'c << 24' ends up being a
signed entity, and will get sign-extended when then doing the addition in
an 'unsigned long' type.
Since git uses 'unsigned char' pointers extensively, we actually have this
bug in a couple of places.
I may have missed some, but this is the result of looking at
git grep '[^0-9 ][ ]*<<[ ][a-z]' -- '*.c' '*.h'
git grep '<<[ ]*24'
which catches at least the common byte cases (shifting variables by a
variable amount, and shifting by 24 bits).
I also grepped for just 'unsigned char' variables in general, and
converted the ones that most obviously ended up getting implicitly cast
immediately anyway (eg hash_name(), encode_85()).
In addition to just avoiding 'unsigned char', this patch also tries to use
a common idiom for the delta header size thing. We had three different
variations on it: "& 0x7fUL" in one place (getting the sign extension
right), and "& ~0x80" and "& 0x7f" in two other places (not getting it
right). Apart from making them all just avoid using "unsigned char" at
all, I also unified them to then use a simple "& 0x7f".
I considered making a sparse extension which warns about doing implicit
casts from unsigned types to signed types, but it gets rather complex very
quickly, so this is just a hack.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
unsigned char *p;
|
|
|
|
unsigned long size, c;
|
|
|
|
off_t base_offset;
|
|
|
|
unsigned shift;
|
|
|
|
void *data;
|
|
|
|
|
|
|
|
obj->idx.offset = consumed_bytes;
|
|
|
|
input_crc32 = crc32(0, NULL, 0);
|
|
|
|
|
|
|
|
p = fill(1);
|
|
|
|
c = *p;
|
|
|
|
use(1);
|
|
|
|
obj->type = (c >> 4) & 7;
|
|
|
|
size = (c & 15);
|
|
|
|
shift = 4;
|
|
|
|
while (c & 0x80) {
|
|
|
|
p = fill(1);
|
|
|
|
c = *p;
|
|
|
|
use(1);
|
Fix big left-shifts of unsigned char
Shifting 'unsigned char' or 'unsigned short' left can result in sign
extension errors, since the C integer promotion rules means that the
unsigned char/short will get implicitly promoted to a signed 'int' due to
the shift (or due to other operations).
This normally doesn't matter, but if you shift things up sufficiently, it
will now set the sign bit in 'int', and a subsequent cast to a bigger type
(eg 'long' or 'unsigned long') will now sign-extend the value despite the
original expression being unsigned.
One example of this would be something like
unsigned long size;
unsigned char c;
size += c << 24;
where despite all the variables being unsigned, 'c << 24' ends up being a
signed entity, and will get sign-extended when then doing the addition in
an 'unsigned long' type.
Since git uses 'unsigned char' pointers extensively, we actually have this
bug in a couple of places.
I may have missed some, but this is the result of looking at
git grep '[^0-9 ][ ]*<<[ ][a-z]' -- '*.c' '*.h'
git grep '<<[ ]*24'
which catches at least the common byte cases (shifting variables by a
variable amount, and shifting by 24 bits).
I also grepped for just 'unsigned char' variables in general, and
converted the ones that most obviously ended up getting implicitly cast
immediately anyway (eg hash_name(), encode_85()).
In addition to just avoiding 'unsigned char', this patch also tries to use
a common idiom for the delta header size thing. We had three different
variations on it: "& 0x7fUL" in one place (getting the sign extension
right), and "& ~0x80" and "& 0x7f" in two other places (not getting it
right). Apart from making them all just avoid using "unsigned char" at
all, I also unified them to then use a simple "& 0x7f".
I considered making a sparse extension which warns about doing implicit
casts from unsigned types to signed types, but it gets rather complex very
quickly, so this is just a hack.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
size += (c & 0x7f) << shift;
|
|
|
|
shift += 7;
|
|
|
|
}
|
|
|
|
obj->size = size;
|
|
|
|
|
|
|
|
switch (obj->type) {
|
|
|
|
case OBJ_REF_DELTA:
|
|
|
|
hashcpy(ref_oid->hash, fill(the_hash_algo->rawsz));
|
|
|
|
use(the_hash_algo->rawsz);
|
|
|
|
break;
|
|
|
|
case OBJ_OFS_DELTA:
|
|
|
|
p = fill(1);
|
|
|
|
c = *p;
|
|
|
|
use(1);
|
|
|
|
base_offset = c & 127;
|
|
|
|
while (c & 128) {
|
|
|
|
base_offset += 1;
|
|
|
|
if (!base_offset || MSB(base_offset, 7))
|
|
|
|
bad_object(obj->idx.offset, _("offset value overflow for delta base object"));
|
|
|
|
p = fill(1);
|
|
|
|
c = *p;
|
|
|
|
use(1);
|
|
|
|
base_offset = (base_offset << 7) + (c & 127);
|
|
|
|
}
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
*ofs_offset = obj->idx.offset - base_offset;
|
|
|
|
if (*ofs_offset <= 0 || *ofs_offset >= obj->idx.offset)
|
|
|
|
bad_object(obj->idx.offset, _("delta base offset is out of bound"));
|
|
|
|
break;
|
|
|
|
case OBJ_COMMIT:
|
|
|
|
case OBJ_TREE:
|
|
|
|
case OBJ_BLOB:
|
|
|
|
case OBJ_TAG:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
bad_object(obj->idx.offset, _("unknown object type %d"), obj->type);
|
|
|
|
}
|
|
|
|
obj->hdr_size = consumed_bytes - obj->idx.offset;
|
|
|
|
|
|
|
|
data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, oid);
|
|
|
|
obj->idx.crc32 = input_crc32;
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *unpack_data(struct object_entry *obj,
|
|
|
|
int (*consume)(const unsigned char *, unsigned long, void *),
|
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
off_t from = obj[0].idx.offset + obj[0].hdr_size;
|
|
|
|
off_t len = obj[1].idx.offset - from;
|
|
|
|
unsigned char *data, *inbuf;
|
|
|
|
git_zstream stream;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
data = xmallocz(consume ? 64*1024 : obj->size);
|
|
|
|
inbuf = xmalloc((len < 64*1024) ? (int)len : 64*1024);
|
|
|
|
|
|
|
|
memset(&stream, 0, sizeof(stream));
|
|
|
|
git_inflate_init(&stream);
|
|
|
|
stream.next_out = data;
|
|
|
|
stream.avail_out = consume ? 64*1024 : obj->size;
|
|
|
|
|
|
|
|
do {
|
|
|
|
ssize_t n = (len < 64*1024) ? (ssize_t)len : 64*1024;
|
|
|
|
n = xpread(get_thread_data()->pack_fd, inbuf, n, from);
|
|
|
|
if (n < 0)
|
|
|
|
die_errno(_("cannot pread pack file"));
|
|
|
|
if (!n)
|
|
|
|
die(Q_("premature end of pack file, %"PRIuMAX" byte missing",
|
|
|
|
"premature end of pack file, %"PRIuMAX" bytes missing",
|
|
|
|
(unsigned int)len),
|
|
|
|
(uintmax_t)len);
|
|
|
|
from += n;
|
|
|
|
len -= n;
|
|
|
|
stream.next_in = inbuf;
|
|
|
|
stream.avail_in = n;
|
index-pack: loop while inflating objects in unpack_data
When the unpack_data function is given a consume() callback,
it unpacks only 64K of the input at a time, feeding it to
git_inflate along with a 64K output buffer. However,
because we are inflating, there is a good chance that the
output buffer will fill before consuming all of the input.
In this case, we need to loop on git_inflate until we have
fed the whole input buffer, feeding each chunk of output to
the consume buffer.
The current code does not do this, and as a result, will
fail the loop condition and trigger a fatal "serious inflate
inconsistency" error in this case.
While we're rearranging the loop, let's get rid of the
extra last_out pointer. It is meant to point to the
beginning of the buffer that we feed to git_inflate, but in
practice this is always the beginning of our same 64K
buffer, because:
1. At the beginning of the loop, we are feeding the
buffer.
2. At the end of the loop, if we are using a consume()
function, we reset git_inflate's pointer to the
beginning of the buffer. If we are not using a
consume() function, then we do not care about the value
of last_out at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
if (!consume)
|
|
|
|
status = git_inflate(&stream, 0);
|
|
|
|
else {
|
|
|
|
do {
|
|
|
|
status = git_inflate(&stream, 0);
|
|
|
|
if (consume(data, stream.next_out - data, cb_data)) {
|
|
|
|
free(inbuf);
|
|
|
|
free(data);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
stream.next_out = data;
|
|
|
|
stream.avail_out = 64*1024;
|
|
|
|
} while (status == Z_OK && stream.avail_in);
|
|
|
|
}
|
|
|
|
} while (len && status == Z_OK && !stream.avail_in);
|
|
|
|
|
|
|
|
/* This has been inflated OK when first encountered, so... */
|
|
|
|
if (status != Z_STREAM_END || stream.total_out != obj->size)
|
|
|
|
die(_("serious inflate inconsistency"));
|
|
|
|
|
|
|
|
git_inflate_end(&stream);
|
|
|
|
free(inbuf);
|
|
|
|
if (consume) {
|
|
|
|
FREE_AND_NULL(data);
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *get_data_from_pack(struct object_entry *obj)
|
|
|
|
{
|
|
|
|
return unpack_data(obj, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static int compare_ofs_delta_bases(off_t offset1, off_t offset2,
|
|
|
|
enum object_type type1,
|
|
|
|
enum object_type type2)
|
|
|
|
{
|
|
|
|
int cmp = type1 - type2;
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
index-pack: fix truncation of off_t in comparison
Commit c6458e6 (index-pack: kill union delta_base to save
memory, 2015-04-18) refactored the comparison functions used
in sorting and binary searching our delta list. The
resulting code does something like:
int cmp_offsets(off_t a, off_t b)
{
return a - b;
}
This works most of the time, but produces nonsensical
results when the difference between the two offsets is
larger than what can be stored in an "int". This can lead to
unresolved deltas if the packsize is larger than 2G (even on
64-bit systems, an int is still typically 32 bits):
$ git clone git://github.com/mozilla/gecko-dev
Cloning into 'gecko-dev'...
remote: Counting objects: 4800161, done.
remote: Compressing objects: 100% (178/178), done.
remote: Total 4800161 (delta 88), reused 0 (delta 0), pack-reused 4799978
Receiving objects: 100% (4800161/4800161), 2.21 GiB | 3.26 MiB/s, done.
Resolving deltas: 99% (3808820/3811944), completed with 0 local objects.
fatal: pack has 3124 unresolved deltas
fatal: index-pack failed
We can fix it by doing direct comparisons between the
offsets and returning constants; the callers only care about
the sign of the comparison, not the magnitude.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
return offset1 < offset2 ? -1 :
|
|
|
|
offset1 > offset2 ? 1 :
|
|
|
|
0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int find_ofs_delta(const off_t offset)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
int first = 0, last = nr_ofs_deltas;
|
|
|
|
|
|
|
|
while (first < last) {
|
|
|
|
int next = first + (last - first) / 2;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ofs_delta_entry *delta = &ofs_deltas[next];
|
|
|
|
int cmp;
|
|
|
|
|
|
|
|
cmp = compare_ofs_delta_bases(offset, delta->offset,
|
|
|
|
OBJ_OFS_DELTA,
|
|
|
|
objects[delta->obj_no].type);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (!cmp)
|
|
|
|
return next;
|
|
|
|
if (cmp < 0) {
|
|
|
|
last = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
first = next+1;
|
|
|
|
}
|
|
|
|
return -first-1;
|
|
|
|
}
|
|
|
|
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static void find_ofs_delta_children(off_t offset,
|
|
|
|
int *first_index, int *last_index)
|
|
|
|
{
|
|
|
|
int first = find_ofs_delta(offset);
|
|
|
|
int last = first;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
int end = nr_ofs_deltas - 1;
|
|
|
|
|
|
|
|
if (first < 0) {
|
|
|
|
*first_index = 0;
|
|
|
|
*last_index = -1;
|
|
|
|
return;
|
|
|
|
}
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
while (first > 0 && ofs_deltas[first - 1].offset == offset)
|
|
|
|
--first;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
while (last < end && ofs_deltas[last + 1].offset == offset)
|
|
|
|
++last;
|
|
|
|
*first_index = first;
|
|
|
|
*last_index = last;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_ref_delta_bases(const struct object_id *oid1,
|
|
|
|
const struct object_id *oid2,
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
enum object_type type1,
|
|
|
|
enum object_type type2)
|
|
|
|
{
|
|
|
|
int cmp = type1 - type2;
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
return oidcmp(oid1, oid2);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
}
|
|
|
|
|
|
|
|
static int find_ref_delta(const struct object_id *oid)
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
{
|
|
|
|
int first = 0, last = nr_ref_deltas;
|
|
|
|
|
|
|
|
while (first < last) {
|
|
|
|
int next = first + (last - first) / 2;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ref_delta_entry *delta = &ref_deltas[next];
|
|
|
|
int cmp;
|
|
|
|
|
|
|
|
cmp = compare_ref_delta_bases(oid, &delta->oid,
|
|
|
|
OBJ_REF_DELTA,
|
|
|
|
objects[delta->obj_no].type);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (!cmp)
|
|
|
|
return next;
|
|
|
|
if (cmp < 0) {
|
|
|
|
last = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
first = next+1;
|
|
|
|
}
|
|
|
|
return -first-1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void find_ref_delta_children(const struct object_id *oid,
|
|
|
|
int *first_index, int *last_index)
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
{
|
|
|
|
int first = find_ref_delta(oid);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
int last = first;
|
|
|
|
int end = nr_ref_deltas - 1;
|
|
|
|
|
|
|
|
if (first < 0) {
|
|
|
|
*first_index = 0;
|
|
|
|
*last_index = -1;
|
|
|
|
return;
|
|
|
|
}
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
while (first > 0 && oideq(&ref_deltas[first - 1].oid, oid))
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
--first;
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
while (last < end && oideq(&ref_deltas[last + 1].oid, oid))
|
|
|
|
++last;
|
|
|
|
*first_index = first;
|
|
|
|
*last_index = last;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct compare_data {
|
|
|
|
struct object_entry *entry;
|
|
|
|
struct git_istream *st;
|
|
|
|
unsigned char *buf;
|
|
|
|
unsigned long buf_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int compare_objects(const unsigned char *buf, unsigned long size,
|
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
struct compare_data *data = cb_data;
|
|
|
|
|
|
|
|
if (data->buf_size < size) {
|
|
|
|
free(data->buf);
|
|
|
|
data->buf = xmalloc(size);
|
|
|
|
data->buf_size = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (size) {
|
|
|
|
ssize_t len = read_istream(data->st, data->buf, size);
|
|
|
|
if (len == 0)
|
|
|
|
die(_("SHA1 COLLISION FOUND WITH %s !"),
|
|
|
|
oid_to_hex(&data->entry->idx.oid));
|
|
|
|
if (len < 0)
|
|
|
|
die(_("unable to read %s"),
|
|
|
|
oid_to_hex(&data->entry->idx.oid));
|
|
|
|
if (memcmp(buf, data->buf, len))
|
|
|
|
die(_("SHA1 COLLISION FOUND WITH %s !"),
|
|
|
|
oid_to_hex(&data->entry->idx.oid));
|
|
|
|
size -= len;
|
|
|
|
buf += len;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_collison(struct object_entry *entry)
|
|
|
|
{
|
|
|
|
struct compare_data data;
|
|
|
|
enum object_type type;
|
|
|
|
unsigned long size;
|
|
|
|
|
|
|
|
if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
memset(&data, 0, sizeof(data));
|
|
|
|
data.entry = entry;
|
|
|
|
data.st = open_istream(the_repository, &entry->idx.oid, &type, &size,
|
|
|
|
NULL);
|
|
|
|
if (!data.st)
|
|
|
|
return -1;
|
|
|
|
if (size != entry->size || type != entry->type)
|
|
|
|
die(_("SHA1 COLLISION FOUND WITH %s !"),
|
|
|
|
oid_to_hex(&entry->idx.oid));
|
|
|
|
unpack_data(entry, compare_objects, &data);
|
|
|
|
close_istream(data.st);
|
|
|
|
free(data.buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sha1_object(const void *data, struct object_entry *obj_entry,
|
|
|
|
unsigned long size, enum object_type type,
|
|
|
|
const struct object_id *oid)
|
|
|
|
{
|
|
|
|
void *new_data = NULL;
|
|
|
|
int collision_test_needed = 0;
|
|
|
|
|
|
|
|
assert(data || obj_entry);
|
|
|
|
|
|
|
|
if (startup_info->have_repository) {
|
|
|
|
read_lock();
|
|
|
|
collision_test_needed =
|
|
|
|
has_object_file_with_flags(oid, OBJECT_INFO_QUICK);
|
|
|
|
read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (collision_test_needed && !data) {
|
|
|
|
read_lock();
|
|
|
|
if (!check_collison(obj_entry))
|
|
|
|
collision_test_needed = 0;
|
|
|
|
read_unlock();
|
|
|
|
}
|
|
|
|
if (collision_test_needed) {
|
|
|
|
void *has_data;
|
|
|
|
enum object_type has_type;
|
|
|
|
unsigned long has_size;
|
|
|
|
read_lock();
|
|
|
|
has_type = oid_object_info(the_repository, oid, &has_size);
|
index-pack: detect local corruption in collision check
When we notice that we have a local copy of an incoming
object, we compare the two objects to make sure we haven't
found a collision. Before we get to the actual object
bytes, though, we compare the type and size from
sha1_object_info().
If our local object is corrupted, then the type will be
OBJ_BAD, which obviously will not match the incoming type,
and we'll report "SHA1 COLLISION FOUND" (with capital
letters and everything). This is confusing, as the problem
is not a collision but rather local corruption. We should
report that instead (just like we do if reading the rest of
the object content fails a few lines later).
Note that we _could_ just ignore the error and mark it as a
non-collision. That would let you "git fetch" to replace a
corrupted object. But it's not a very reliable method for
repairing a repository. The earlier want/have negotiation
tries to get the other side to omit objects we already have,
and it would not realize that we are "missing" this
corrupted object. So we're better off complaining loudly
when we see corruption, and letting the user take more
drastic measures to repair (like making a full clone
elsewhere and copying the pack into place).
Note that the test sets transfer.unpackLimit in the
receiving repository so that we use index-pack (which is
what does the collision check). Normally for such a small
push we'd use unpack-objects, which would simply try to
write the loose object, and discard the new one when we see
that there's already an old one.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (has_type < 0)
|
|
|
|
die(_("cannot read existing object info %s"), oid_to_hex(oid));
|
|
|
|
if (has_type != type || has_size != size)
|
|
|
|
die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid));
|
sha1_file: convert read_sha1_file to struct object_id
Convert read_sha1_file to take a pointer to struct object_id and rename
it read_object_file. Do the same for read_sha1_file_extended.
Convert one use in grep.c to use the new function without any other code
change, since the pointer being passed is a void pointer that is already
initialized with a pointer to struct object_id. Update the declaration
and definitions of the modified functions, and apply the following
semantic patch to convert the remaining callers:
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1.hash, E2, E3)
+ read_object_file(&E1, E2, E3)
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1->hash, E2, E3)
+ read_object_file(E1, E2, E3)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1.hash, E2, E3, E4)
+ read_object_file_extended(&E1, E2, E3, E4)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1->hash, E2, E3, E4)
+ read_object_file_extended(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
has_data = read_object_file(oid, &has_type, &has_size);
|
|
|
|
read_unlock();
|
|
|
|
if (!data)
|
|
|
|
data = new_data = get_data_from_pack(obj_entry);
|
|
|
|
if (!has_data)
|
|
|
|
die(_("cannot read existing object %s"), oid_to_hex(oid));
|
|
|
|
if (size != has_size || type != has_type ||
|
|
|
|
memcmp(data, has_data, size) != 0)
|
|
|
|
die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid));
|
|
|
|
free(has_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strict || do_fsck_object) {
|
|
|
|
read_lock();
|
|
|
|
if (type == OBJ_BLOB) {
|
|
|
|
struct blob *blob = lookup_blob(the_repository, oid);
|
|
|
|
if (blob)
|
|
|
|
blob->object.flags |= FLAG_CHECKED;
|
|
|
|
else
|
|
|
|
die(_("invalid blob object %s"), oid_to_hex(oid));
|
index-pack: check .gitmodules files with --strict
Now that the internal fsck code has all of the plumbing we
need, we can start checking incoming .gitmodules files.
Naively, it seems like we would just need to add a call to
fsck_finish() after we've processed all of the objects. And
that would be enough to cover the initial test included
here. But there are two extra bits:
1. We currently don't bother calling fsck_object() at all
for blobs, since it has traditionally been a noop. We'd
actually catch these blobs in fsck_finish() at the end,
but it's more efficient to check them when we already
have the object loaded in memory.
2. The second pass done by fsck_finish() needs to access
the objects, but we're actually indexing the pack in
this process. In theory we could give the fsck code a
special callback for accessing the in-pack data, but
it's actually quite tricky:
a. We don't have an internal efficient index mapping
oids to packfile offsets. We only generate it on
the fly as part of writing out the .idx file.
b. We'd still have to reconstruct deltas, which means
we'd basically have to replicate all of the
reading logic in packfile.c.
Instead, let's avoid running fsck_finish() until after
we've written out the .idx file, and then just add it
to our internal packed_git list.
This does mean that the objects are "in the repository"
before we finish our fsck checks. But unpack-objects
already exhibits this same behavior, and it's an
acceptable tradeoff here for the same reason: the
quarantine mechanism means that pushes will be
fully protected.
In addition to a basic push test in t7415, we add a sneaky
pack that reverses the usual object order in the pack,
requiring that index-pack access the tree and blob during
the "finish" step.
This already works for unpack-objects (since it will have
written out loose objects), but we'll check it with this
sneaky pack for good measure.
Signed-off-by: Jeff King <peff@peff.net>
7 years ago
|
|
|
if (do_fsck_object &&
|
|
|
|
fsck_object(&blob->object, (void *)data, size, &fsck_options))
|
|
|
|
die(_("fsck error in packed object"));
|
|
|
|
} else {
|
|
|
|
struct object *obj;
|
|
|
|
int eaten;
|
|
|
|
void *buf = (void *) data;
|
|
|
|
|
|
|
|
assert(data && "data can only be NULL for large _blobs_");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we do not need to free the memory here, as the
|
|
|
|
* buf is deleted by the caller.
|
|
|
|
*/
|
|
|
|
obj = parse_object_buffer(the_repository, oid, type,
|
|
|
|
size, buf,
|
object: convert parse_object* to take struct object_id
Make parse_object, parse_object_or_die, and parse_object_buffer take a
pointer to struct object_id. Remove the temporary variables inserted
earlier, since they are no longer necessary. Transform all of the
callers using the following semantic patch:
@@
expression E1;
@@
- parse_object(E1.hash)
+ parse_object(&E1)
@@
expression E1;
@@
- parse_object(E1->hash)
+ parse_object(E1)
@@
expression E1, E2;
@@
- parse_object_or_die(E1.hash, E2)
+ parse_object_or_die(&E1, E2)
@@
expression E1, E2;
@@
- parse_object_or_die(E1->hash, E2)
+ parse_object_or_die(E1, E2)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1.hash, E2, E3, E4, E5)
+ parse_object_buffer(&E1, E2, E3, E4, E5)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1->hash, E2, E3, E4, E5)
+ parse_object_buffer(E1, E2, E3, E4, E5)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
&eaten);
|
|
|
|
if (!obj)
|
|
|
|
die(_("invalid %s"), type_name(type));
|
|
|
|
if (do_fsck_object &&
|
|
|
|
fsck_object(obj, buf, size, &fsck_options))
|
|
|
|
die(_("fsck error in packed object"));
|
|
|
|
if (strict && fsck_walk(obj, NULL, &fsck_options))
|
|
|
|
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
|
|
|
|
|
|
|
|
if (obj->type == OBJ_TREE) {
|
|
|
|
struct tree *item = (struct tree *) obj;
|
|
|
|
item->buffer = NULL;
|
|
|
|
obj->parsed = 0;
|
|
|
|
}
|
|
|
|
if (obj->type == OBJ_COMMIT) {
|
|
|
|
struct commit *commit = (struct commit *) obj;
|
|
|
|
if (detach_commit_buffer(commit, NULL) != data)
|
|
|
|
BUG("parse_object_buffer transmogrified our buffer");
|
|
|
|
}
|
|
|
|
obj->flags |= FLAG_CHECKED;
|
|
|
|
}
|
|
|
|
read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
free(new_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that this node has been reconstructed and return its contents.
|
|
|
|
*
|
|
|
|
* In the typical and best case, this node would already be reconstructed
|
|
|
|
* (through the invocation to resolve_delta() in threaded_second_pass()) and it
|
|
|
|
* would not be pruned. However, if pruning of this node was necessary due to
|
|
|
|
* reaching delta_base_cache_limit, this function will find the closest
|
|
|
|
* ancestor with reconstructed data that has not been pruned (or if there is
|
|
|
|
* none, the ultimate base object), and reconstruct each node in the delta
|
|
|
|
* chain in order to generate the reconstructed data for this node.
|
|
|
|
*/
|
|
|
|
static void *get_base_data(struct base_data *c)
|
|
|
|
{
|
|
|
|
if (!c->data) {
|
|
|
|
struct object_entry *obj = c->obj;
|
|
|
|
struct base_data **delta = NULL;
|
|
|
|
int delta_nr = 0, delta_alloc = 0;
|
|
|
|
|
|
|
|
while (is_delta_type(c->obj->type) && !c->data) {
|
|
|
|
ALLOC_GROW(delta, delta_nr + 1, delta_alloc);
|
|
|
|
delta[delta_nr++] = c;
|
|
|
|
c = c->base;
|
|
|
|
}
|
|
|
|
if (!delta_nr) {
|
|
|
|
c->data = get_data_from_pack(obj);
|
|
|
|
c->size = obj->size;
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
base_cache_used += c->size;
|
|
|
|
prune_base_data(c);
|
|
|
|
}
|
|
|
|
for (; delta_nr > 0; delta_nr--) {
|
|
|
|
void *base, *raw;
|
|
|
|
c = delta[delta_nr - 1];
|
|
|
|
obj = c->obj;
|
|
|
|
base = get_base_data(c->base);
|
|
|
|
raw = get_data_from_pack(obj);
|
|
|
|
c->data = patch_delta(
|
|
|
|
base, c->base->size,
|
|
|
|
raw, obj->size,
|
|
|
|
&c->size);
|
|
|
|
free(raw);
|
|
|
|
if (!c->data)
|
|
|
|
bad_object(obj->idx.offset, _("failed to apply delta"));
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
base_cache_used += c->size;
|
|
|
|
prune_base_data(c);
|
|
|
|
}
|
|
|
|
free(delta);
|
|
|
|
}
|
|
|
|
return c->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct base_data *make_base(struct object_entry *obj,
|
|
|
|
struct base_data *parent)
|
|
|
|
{
|
|
|
|
struct base_data *base = xcalloc(1, sizeof(struct base_data));
|
|
|
|
base->base = parent;
|
|
|
|
base->obj = obj;
|
|
|
|
find_ref_delta_children(&obj->idx.oid,
|
|
|
|
&base->ref_first, &base->ref_last);
|
|
|
|
find_ofs_delta_children(obj->idx.offset,
|
|
|
|
&base->ofs_first, &base->ofs_last);
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
base->children_remaining = base->ref_last - base->ref_first +
|
|
|
|
base->ofs_last - base->ofs_first + 2;
|
|
|
|
return base;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct base_data *resolve_delta(struct object_entry *delta_obj,
|
|
|
|
struct base_data *base)
|
|
|
|
{
|
|
|
|
void *delta_data, *result_data;
|
|
|
|
struct base_data *result;
|
|
|
|
unsigned long result_size;
|
|
|
|
|
|
|
|
if (show_stat) {
|
|
|
|
int i = delta_obj - objects;
|
|
|
|
int j = base->obj - objects;
|
|
|
|
obj_stat[i].delta_depth = obj_stat[j].delta_depth + 1;
|
|
|
|
deepest_delta_lock();
|
|
|
|
if (deepest_delta < obj_stat[i].delta_depth)
|
|
|
|
deepest_delta = obj_stat[i].delta_depth;
|
|
|
|
deepest_delta_unlock();
|
|
|
|
obj_stat[i].base_object_no = j;
|
|
|
|
}
|
|
|
|
delta_data = get_data_from_pack(delta_obj);
|
|
|
|
assert(base->data);
|
|
|
|
result_data = patch_delta(base->data, base->size,
|
|
|
|
delta_data, delta_obj->size, &result_size);
|
|
|
|
free(delta_data);
|
|
|
|
if (!result_data)
|
|
|
|
bad_object(delta_obj->idx.offset, _("failed to apply delta"));
|
|
|
|
hash_object_file(the_hash_algo, result_data, result_size,
|
|
|
|
type_name(delta_obj->real_type), &delta_obj->idx.oid);
|
|
|
|
sha1_object(result_data, NULL, result_size, delta_obj->real_type,
|
|
|
|
&delta_obj->idx.oid);
|
|
|
|
|
|
|
|
result = make_base(delta_obj, base);
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
result->data = result_data;
|
|
|
|
result->size = result_size;
|
|
|
|
|
|
|
|
counter_lock();
|
|
|
|
nr_resolved_deltas++;
|
|
|
|
counter_unlock();
|
index-pack: fix race condition with duplicate bases
When we are resolving deltas in an indexed pack, we do it by
first selecting a potential base (either one stored in full
in the pack, or one created by resolving another delta), and
then resolving any deltas that use that base. When we
resolve a particular delta, we flip its "real_type" field
from OBJ_{REF,OFS}_DELTA to whatever the real type is.
We assume that traversing the objects this way will visit
each delta only once. This is correct for most packs; we
visit the delta only when we process its base, and each
object (and thus each base) appears only once. However, if a
base object appears multiple times in the pack, we will try
to resolve any deltas based on it once for each instance.
We can detect this case by noting that a delta we are about
to resolve has already had its real_type field flipped, and
we already do so with an assert(). However, if multiple
threads are in use, we may race with another thread on
comparing and flipping the field. We need to synchronize the
access.
The right mechanism for doing this is a compare-and-swap (we
atomically "claim" the delta for our own and find out
whether our claim was successful). We can implement this
in C by using a pthread mutex to protect the operation. This
is not the fastest way of doing a compare-and-swap; many
processors provide instructions for this, and gcc and other
compilers provide builtins to access them. However, some
experiments showed that lock contention does not cause a
significant slowdown here. Adding c-a-s support for many
compilers would increase the maintenance burden (and we
would still end up including the pthread version as a
fallback).
Note that we only need to touch the OBJ_REF_DELTA codepath
here. An OBJ_OFS_DELTA object points to its base using an
offset, and therefore has only one base, even if another
copy of that base object appears in the pack (we do still
touch it briefly because the setting of real_type is
factored out of resolve_data).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static int compare_ofs_delta_entry(const void *a, const void *b)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
const struct ofs_delta_entry *delta_a = a;
|
|
|
|
const struct ofs_delta_entry *delta_b = b;
|
|
|
|
|
index-pack: fix truncation of off_t in comparison
Commit c6458e6 (index-pack: kill union delta_base to save
memory, 2015-04-18) refactored the comparison functions used
in sorting and binary searching our delta list. The
resulting code does something like:
int cmp_offsets(off_t a, off_t b)
{
return a - b;
}
This works most of the time, but produces nonsensical
results when the difference between the two offsets is
larger than what can be stored in an "int". This can lead to
unresolved deltas if the packsize is larger than 2G (even on
64-bit systems, an int is still typically 32 bits):
$ git clone git://github.com/mozilla/gecko-dev
Cloning into 'gecko-dev'...
remote: Counting objects: 4800161, done.
remote: Compressing objects: 100% (178/178), done.
remote: Total 4800161 (delta 88), reused 0 (delta 0), pack-reused 4799978
Receiving objects: 100% (4800161/4800161), 2.21 GiB | 3.26 MiB/s, done.
Resolving deltas: 99% (3808820/3811944), completed with 0 local objects.
fatal: pack has 3124 unresolved deltas
fatal: index-pack failed
We can fix it by doing direct comparisons between the
offsets and returning constants; the callers only care about
the sign of the comparison, not the magnitude.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
return delta_a->offset < delta_b->offset ? -1 :
|
|
|
|
delta_a->offset > delta_b->offset ? 1 :
|
|
|
|
0;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_ref_delta_entry(const void *a, const void *b)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
const struct ref_delta_entry *delta_a = a;
|
|
|
|
const struct ref_delta_entry *delta_b = b;
|
|
|
|
|
|
|
|
return oidcmp(&delta_a->oid, &delta_b->oid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *threaded_second_pass(void *data)
|
|
|
|
{
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (data)
|
|
|
|
set_thread_data(data);
|
|
|
|
for (;;) {
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
struct base_data *parent = NULL;
|
|
|
|
struct object_entry *child_obj;
|
|
|
|
struct base_data *child;
|
|
|
|
|
|
|
|
counter_lock();
|
|
|
|
display_progress(progress, nr_resolved_deltas);
|
|
|
|
counter_unlock();
|
|
|
|
|
|
|
|
work_lock();
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (list_empty(&work_head)) {
|
|
|
|
/*
|
|
|
|
* Take an object from the object array.
|
|
|
|
*/
|
|
|
|
while (nr_dispatched < nr_objects &&
|
|
|
|
is_delta_type(objects[nr_dispatched].type))
|
|
|
|
nr_dispatched++;
|
|
|
|
if (nr_dispatched >= nr_objects) {
|
|
|
|
work_unlock();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
child_obj = &objects[nr_dispatched++];
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Peek at the top of the stack, and take a child from
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
parent = list_first_entry(&work_head, struct base_data,
|
|
|
|
list);
|
|
|
|
|
|
|
|
if (parent->ref_first <= parent->ref_last) {
|
|
|
|
int offset = ref_deltas[parent->ref_first++].obj_no;
|
|
|
|
child_obj = objects + offset;
|
|
|
|
if (child_obj->real_type != OBJ_REF_DELTA)
|
|
|
|
die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)",
|
|
|
|
(uintmax_t) child_obj->idx.offset,
|
|
|
|
oid_to_hex(&parent->obj->idx.oid));
|
|
|
|
child_obj->real_type = parent->obj->real_type;
|
|
|
|
} else {
|
|
|
|
child_obj = objects +
|
|
|
|
ofs_deltas[parent->ofs_first++].obj_no;
|
|
|
|
assert(child_obj->real_type == OBJ_OFS_DELTA);
|
|
|
|
child_obj->real_type = parent->obj->real_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (parent->ref_first > parent->ref_last &&
|
|
|
|
parent->ofs_first > parent->ofs_last) {
|
|
|
|
/*
|
|
|
|
* This parent has run out of children, so move
|
|
|
|
* it to done_head.
|
|
|
|
*/
|
|
|
|
list_del(&parent->list);
|
|
|
|
list_add(&parent->list, &done_head);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that the parent has data, since we will need
|
|
|
|
* it later.
|
|
|
|
*
|
|
|
|
* NEEDSWORK: If parent data needs to be reloaded, this
|
|
|
|
* prolongs the time that the current thread spends in
|
|
|
|
* the mutex. A mitigating factor is that parent data
|
|
|
|
* needs to be reloaded only if the delta base cache
|
|
|
|
* limit is exceeded, so in the typical case, this does
|
|
|
|
* not happen.
|
|
|
|
*/
|
|
|
|
get_base_data(parent);
|
|
|
|
parent->retain_data++;
|
|
|
|
}
|
|
|
|
work_unlock();
|
|
|
|
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
if (parent) {
|
|
|
|
child = resolve_delta(child_obj, parent);
|
|
|
|
if (!child->children_remaining)
|
|
|
|
FREE_AND_NULL(child->data);
|
|
|
|
} else {
|
|
|
|
child = make_base(child_obj, NULL);
|
|
|
|
if (child->children_remaining) {
|
|
|
|
/*
|
|
|
|
* Since this child has its own delta children,
|
|
|
|
* we will need this data in the future.
|
|
|
|
* Inflate now so that future iterations will
|
|
|
|
* have access to this object's data while
|
|
|
|
* outside the work mutex.
|
|
|
|
*/
|
|
|
|
child->data = get_data_from_pack(child_obj);
|
|
|
|
child->size = child_obj->size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
work_lock();
|
|
|
|
if (parent)
|
|
|
|
parent->retain_data--;
|
|
|
|
if (child->data) {
|
|
|
|
/*
|
|
|
|
* This child has its own children, so add it to
|
|
|
|
* work_head.
|
|
|
|
*/
|
|
|
|
list_add(&child->list, &work_head);
|
|
|
|
base_cache_used += child->size;
|
|
|
|
prune_base_data(NULL);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* This child does not have its own children. It may be
|
|
|
|
* the last descendant of its ancestors; free those
|
|
|
|
* that we can.
|
|
|
|
*/
|
|
|
|
struct base_data *p = parent;
|
|
|
|
|
|
|
|
while (p) {
|
|
|
|
struct base_data *next_p;
|
|
|
|
|
|
|
|
p->children_remaining--;
|
|
|
|
if (p->children_remaining)
|
|
|
|
break;
|
|
|
|
|
|
|
|
next_p = p->base;
|
|
|
|
free_base_data(p);
|
|
|
|
list_del(&p->list);
|
|
|
|
free(p);
|
|
|
|
|
|
|
|
p = next_p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
work_unlock();
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First pass:
|
|
|
|
* - find locations of all objects;
|
|
|
|
* - calculate SHA1 of all non-delta objects;
|
|
|
|
* - remember base (SHA1 or offset) for all deltas.
|
|
|
|
*/
|
|
|
|
static void parse_pack_objects(unsigned char *hash)
|
|
|
|
{
|
|
|
|
int i, nr_delays = 0;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ofs_delta_entry *ofs_delta = ofs_deltas;
|
|
|
|
struct object_id ref_delta_oid;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (verbose)
|
|
|
|
progress = start_progress(
|
|
|
|
from_stdin ? _("Receiving objects") : _("Indexing objects"),
|
|
|
|
nr_objects);
|
|
|
|
for (i = 0; i < nr_objects; i++) {
|
|
|
|
struct object_entry *obj = &objects[i];
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
void *data = unpack_raw_entry(obj, &ofs_delta->offset,
|
|
|
|
&ref_delta_oid,
|
|
|
|
&obj->idx.oid);
|
|
|
|
obj->real_type = obj->type;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (obj->type == OBJ_OFS_DELTA) {
|
|
|
|
nr_ofs_deltas++;
|
|
|
|
ofs_delta->obj_no = i;
|
|
|
|
ofs_delta++;
|
|
|
|
} else if (obj->type == OBJ_REF_DELTA) {
|
|
|
|
ALLOC_GROW(ref_deltas, nr_ref_deltas + 1, ref_deltas_alloc);
|
|
|
|
oidcpy(&ref_deltas[nr_ref_deltas].oid, &ref_delta_oid);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
ref_deltas[nr_ref_deltas].obj_no = i;
|
|
|
|
nr_ref_deltas++;
|
|
|
|
} else if (!data) {
|
|
|
|
/* large blobs, check later */
|
|
|
|
obj->real_type = OBJ_BAD;
|
|
|
|
nr_delays++;
|
|
|
|
} else
|
|
|
|
sha1_object(data, NULL, obj->size, obj->type,
|
|
|
|
&obj->idx.oid);
|
|
|
|
free(data);
|
|
|
|
display_progress(progress, i+1);
|
|
|
|
}
|
|
|
|
objects[i].idx.offset = consumed_bytes;
|
|
|
|
stop_progress(&progress);
|
|
|
|
|
|
|
|
/* Check pack integrity */
|
|
|
|
flush();
|
|
|
|
the_hash_algo->final_fn(hash, &input_ctx);
|
|
|
|
if (!hasheq(fill(the_hash_algo->rawsz), hash))
|
|
|
|
die(_("pack is corrupted (SHA1 mismatch)"));
|
|
|
|
use(the_hash_algo->rawsz);
|
|
|
|
|
|
|
|
/* If input_fd is a file, we should have reached its end now. */
|
|
|
|
if (fstat(input_fd, &st))
|
|
|
|
die_errno(_("cannot fstat packfile"));
|
git-bundle: assorted fixes
This patch fixes issues mentioned by Junio, Nico and Simon:
- I forgot to convert the usage string when removing the "--" from
the subcommands,
- a style fix in the bundle_header,
- use xread() instead of read(),
- use write_or_die() instead of write(),
- make the bundle header extensible,
- fail if the whitespace after a sha1 of a reference is missing,
- close() the fds passed to a subprocess,
- in verify_bundle(), do not use "rev-list --stdin", but rather
pass the revs directly (avoiding a fork()),
- fix a corrupted comment in show_object(), and
- fix the size check in index_pack.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>
18 years ago
|
|
|
if (S_ISREG(st.st_mode) &&
|
|
|
|
lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
|
|
|
|
die(_("pack has junk at the end"));
|
|
|
|
|
|
|
|
for (i = 0; i < nr_objects; i++) {
|
|
|
|
struct object_entry *obj = &objects[i];
|
|
|
|
if (obj->real_type != OBJ_BAD)
|
|
|
|
continue;
|
|
|
|
obj->real_type = obj->type;
|
|
|
|
sha1_object(NULL, obj, obj->size, obj->type,
|
|
|
|
&obj->idx.oid);
|
|
|
|
nr_delays--;
|
|
|
|
}
|
|
|
|
if (nr_delays)
|
|
|
|
die(_("confusion beyond insanity in parse_pack_objects()"));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Second pass:
|
|
|
|
* - for all non-delta objects, look if it is used as a base for
|
|
|
|
* deltas;
|
|
|
|
* - if used as a base, uncompress the object and apply all deltas,
|
|
|
|
* recursively checking if the resulting object is used as a base
|
|
|
|
* for some more deltas.
|
|
|
|
*/
|
|
|
|
static void resolve_deltas(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (!nr_ofs_deltas && !nr_ref_deltas)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Sort deltas by base SHA1/offset for fast searching */
|
|
|
|
QSORT(ofs_deltas, nr_ofs_deltas, compare_ofs_delta_entry);
|
|
|
|
QSORT(ref_deltas, nr_ref_deltas, compare_ref_delta_entry);
|
|
|
|
|
index-pack: add flag for showing delta-resolution progress
The index-pack command has two progress meters: one for
"receiving objects", and one for "resolving deltas". You get
neither by default, or both with "-v".
But for a push through receive-pack, we would want only the
"resolving deltas" phase, _not_ the "receiving objects"
progress. There are two reasons for this.
One is simply that existing clients are already printing
"writing objects" progress at the same time. Arguably
"receiving" from the far end is more useful, because it
tells you what has actually gotten there, as opposed to what
might be stuck in a buffer somewhere between the client and
server. But that would require a protocol extension to tell
clients not to print their progress. Possible, but
complexity for little gain.
The second reason is much more important. In a full-duplex
connection like git-over-ssh, we can print progress while
the pack is incoming, and it will immediately get to the
client. But for a half-duplex connection like git-over-http,
we should not say anything until we have received the full
request. Anything we write is subject to being stuck in a
buffer by the webserver. Worse, we can end up in a deadlock
if that buffer fills up.
So our best bet is to avoid writing anything that isn't a
small fixed size until we've received the full pack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (verbose || show_resolving_progress)
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
progress = start_progress(_("Resolving deltas"),
|
|
|
|
nr_ref_deltas + nr_ofs_deltas);
|
|
|
|
|
|
|
|
nr_dispatched = 0;
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
base_cache_limit = delta_base_cache_limit * nr_threads;
|
|
|
|
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
|
|
|
|
init_thread();
|
|
|
|
for (i = 0; i < nr_threads; i++) {
|
|
|
|
int ret = pthread_create(&thread_data[i].thread, NULL,
|
|
|
|
threaded_second_pass, thread_data + i);
|
|
|
|
if (ret)
|
|
|
|
die(_("unable to create thread: %s"),
|
|
|
|
strerror(ret));
|
|
|
|
}
|
|
|
|
for (i = 0; i < nr_threads; i++)
|
|
|
|
pthread_join(thread_data[i].thread, NULL);
|
|
|
|
cleanup_thread();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
threaded_second_pass(¬hread_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Third pass:
|
|
|
|
* - append objects to convert thin pack to full pack if required
|
|
|
|
* - write the final pack hash
|
|
|
|
*/
|
|
|
|
static void fix_unresolved_deltas(struct hashfile *f);
|
|
|
|
static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned char *pack_hash)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (nr_ref_deltas + nr_ofs_deltas == nr_resolved_deltas) {
|
|
|
|
stop_progress(&progress);
|
|
|
|
/* Flush remaining pack final hash. */
|
|
|
|
flush();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fix_thin_pack) {
|
|
|
|
struct hashfile *f;
|
|
|
|
unsigned char read_hash[GIT_MAX_RAWSZ], tail_hash[GIT_MAX_RAWSZ];
|
|
|
|
struct strbuf msg = STRBUF_INIT;
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
int nr_unresolved = nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas;
|
|
|
|
int nr_objects_initial = nr_objects;
|
|
|
|
if (nr_unresolved <= 0)
|
|
|
|
die(_("confusion beyond insanity"));
|
|
|
|
REALLOC_ARRAY(objects, nr_objects + nr_unresolved + 1);
|
|
|
|
memset(objects + nr_objects + 1, 0,
|
|
|
|
nr_unresolved * sizeof(*objects));
|
|
|
|
f = hashfd(output_fd, curr_pack);
|
index-pack: fix allocation of sorted_by_pos array
When c6458e60 (index-pack: kill union delta_base to save memory,
2015-04-18) attempted to reduce the memory footprint of index-pack,
one of the key thing it did was to keep track of ref-deltas and
ofs-deltas separately.
In fix_unresolved_deltas(), however it forgot that it now wants to
look only at ref deltas in one place. The code allocated an array
for nr_unresolved, which is sum of number of ref- and ofs-deltas
minus nr_resolved, which may be larger or smaller than the number
ref-deltas. Depending on nr_resolved, this was either under or over
allocating.
Also, the old code before this change had to use 'i' and 'n' because
some of the things we see in the (old) deltas[] array we scanned
with 'i' would not make it into the sorted_by_pos[] array in the old
world order, but now because you have only ref delta in a separate
ref_deltas[] array, they increment lock&step. We no longer need
separate variables. And most importantly, we shouldn't pass the
nr_unresolved parameter, as this number does not play a role in the
working of this helper function.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
fix_unresolved_deltas(f);
|
|
|
|
strbuf_addf(&msg, Q_("completed with %d local object",
|
|
|
|
"completed with %d local objects",
|
|
|
|
nr_objects - nr_objects_initial),
|
|
|
|
nr_objects - nr_objects_initial);
|
|
|
|
stop_progress_msg(&progress, msg.buf);
|
|
|
|
strbuf_release(&msg);
|
|
|
|
finalize_hashfile(f, tail_hash, 0);
|
|
|
|
hashcpy(read_hash, pack_hash);
|
|
|
|
fixup_pack_header_footer(output_fd, pack_hash,
|
|
|
|
curr_pack, nr_objects,
|
|
|
|
read_hash, consumed_bytes-the_hash_algo->rawsz);
|
|
|
|
if (!hasheq(read_hash, tail_hash))
|
|
|
|
die(_("Unexpected tail checksum for %s "
|
|
|
|
"(disk corruption?)"), curr_pack);
|
|
|
|
}
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
if (nr_ofs_deltas + nr_ref_deltas != nr_resolved_deltas)
|
|
|
|
die(Q_("pack has %d unresolved delta",
|
|
|
|
"pack has %d unresolved deltas",
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas),
|
|
|
|
nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_compressed(struct hashfile *f, void *in, unsigned int size)
|
|
|
|
{
|
|
|
|
git_zstream stream;
|
|
|
|
int status;
|
|
|
|
unsigned char outbuf[4096];
|
|
|
|
|
|
|
|
git_deflate_init(&stream, zlib_compression_level);
|
|
|
|
stream.next_in = in;
|
|
|
|
stream.avail_in = size;
|
|
|
|
|
|
|
|
do {
|
|
|
|
stream.next_out = outbuf;
|
|
|
|
stream.avail_out = sizeof(outbuf);
|
|
|
|
status = git_deflate(&stream, Z_FINISH);
|
|
|
|
hashwrite(f, outbuf, sizeof(outbuf) - stream.avail_out);
|
|
|
|
} while (status == Z_OK);
|
|
|
|
|
|
|
|
if (status != Z_STREAM_END)
|
|
|
|
die(_("unable to deflate appended object (%d)"), status);
|
|
|
|
size = stream.total_out;
|
|
|
|
git_deflate_end(&stream);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct object_entry *append_obj_to_pack(struct hashfile *f,
|
|
|
|
const unsigned char *sha1, void *buf,
|
|
|
|
unsigned long size, enum object_type type)
|
|
|
|
{
|
|
|
|
struct object_entry *obj = &objects[nr_objects++];
|
|
|
|
unsigned char header[10];
|
|
|
|
unsigned long s = size;
|
|
|
|
int n = 0;
|
|
|
|
unsigned char c = (type << 4) | (s & 15);
|
|
|
|
s >>= 4;
|
|
|
|
while (s) {
|
|
|
|
header[n++] = c | 0x80;
|
|
|
|
c = s & 0x7f;
|
|
|
|
s >>= 7;
|
|
|
|
}
|
|
|
|
header[n++] = c;
|
|
|
|
crc32_begin(f);
|
|
|
|
hashwrite(f, header, n);
|
|
|
|
obj[0].size = size;
|
|
|
|
obj[0].hdr_size = n;
|
|
|
|
obj[0].type = type;
|
|
|
|
obj[0].real_type = type;
|
|
|
|
obj[1].idx.offset = obj[0].idx.offset + n;
|
|
|
|
obj[1].idx.offset += write_compressed(f, buf, size);
|
|
|
|
obj[0].idx.crc32 = crc32_end(f);
|
|
|
|
hashflush(f);
|
|
|
|
hashcpy(obj->idx.oid.hash, sha1);
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int delta_pos_compare(const void *_a, const void *_b)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ref_delta_entry *a = *(struct ref_delta_entry **)_a;
|
|
|
|
struct ref_delta_entry *b = *(struct ref_delta_entry **)_b;
|
|
|
|
return a->obj_no - b->obj_no;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fix_unresolved_deltas(struct hashfile *f)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ref_delta_entry **sorted_by_pos;
|
index-pack: fix allocation of sorted_by_pos array
When c6458e60 (index-pack: kill union delta_base to save memory,
2015-04-18) attempted to reduce the memory footprint of index-pack,
one of the key thing it did was to keep track of ref-deltas and
ofs-deltas separately.
In fix_unresolved_deltas(), however it forgot that it now wants to
look only at ref deltas in one place. The code allocated an array
for nr_unresolved, which is sum of number of ref- and ofs-deltas
minus nr_resolved, which may be larger or smaller than the number
ref-deltas. Depending on nr_resolved, this was either under or over
allocating.
Also, the old code before this change had to use 'i' and 'n' because
some of the things we see in the (old) deltas[] array we scanned
with 'i' would not make it into the sorted_by_pos[] array in the old
world order, but now because you have only ref delta in a separate
ref_deltas[] array, they increment lock&step. We no longer need
separate variables. And most importantly, we shouldn't pass the
nr_unresolved parameter, as this number does not play a role in the
working of this helper function.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since many unresolved deltas may well be themselves base objects
|
|
|
|
* for more unresolved deltas, we really want to include the
|
|
|
|
* smallest number of base objects that would cover as much delta
|
|
|
|
* as possible by picking the
|
|
|
|
* trunc deltas first, allowing for other deltas to resolve without
|
|
|
|
* additional base objects. Since most base objects are to be found
|
|
|
|
* before deltas depending on them, a good heuristic is to start
|
|
|
|
* resolving deltas in the same order as their position in the pack.
|
|
|
|
*/
|
|
|
|
ALLOC_ARRAY(sorted_by_pos, nr_ref_deltas);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
for (i = 0; i < nr_ref_deltas; i++)
|
index-pack: fix allocation of sorted_by_pos array
When c6458e60 (index-pack: kill union delta_base to save memory,
2015-04-18) attempted to reduce the memory footprint of index-pack,
one of the key thing it did was to keep track of ref-deltas and
ofs-deltas separately.
In fix_unresolved_deltas(), however it forgot that it now wants to
look only at ref deltas in one place. The code allocated an array
for nr_unresolved, which is sum of number of ref- and ofs-deltas
minus nr_resolved, which may be larger or smaller than the number
ref-deltas. Depending on nr_resolved, this was either under or over
allocating.
Also, the old code before this change had to use 'i' and 'n' because
some of the things we see in the (old) deltas[] array we scanned
with 'i' would not make it into the sorted_by_pos[] array in the old
world order, but now because you have only ref delta in a separate
ref_deltas[] array, they increment lock&step. We no longer need
separate variables. And most importantly, we shouldn't pass the
nr_unresolved parameter, as this number does not play a role in the
working of this helper function.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
sorted_by_pos[i] = &ref_deltas[i];
|
|
|
|
QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare);
|
|
|
|
|
|
|
|
if (has_promisor_remote()) {
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
/*
|
|
|
|
* Prefetch the delta bases.
|
|
|
|
*/
|
|
|
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
|
|
|
for (i = 0; i < nr_ref_deltas; i++) {
|
|
|
|
struct ref_delta_entry *d = sorted_by_pos[i];
|
|
|
|
if (!oid_object_info_extended(the_repository, &d->oid,
|
|
|
|
NULL,
|
|
|
|
OBJECT_INFO_FOR_PREFETCH))
|
|
|
|
continue;
|
|
|
|
oid_array_append(&to_fetch, &d->oid);
|
|
|
|
}
|
|
|
|
promisor_remote_get_direct(the_repository,
|
|
|
|
to_fetch.oid, to_fetch.nr);
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
oid_array_clear(&to_fetch);
|
|
|
|
}
|
|
|
|
|
index-pack: fix allocation of sorted_by_pos array
When c6458e60 (index-pack: kill union delta_base to save memory,
2015-04-18) attempted to reduce the memory footprint of index-pack,
one of the key thing it did was to keep track of ref-deltas and
ofs-deltas separately.
In fix_unresolved_deltas(), however it forgot that it now wants to
look only at ref deltas in one place. The code allocated an array
for nr_unresolved, which is sum of number of ref- and ofs-deltas
minus nr_resolved, which may be larger or smaller than the number
ref-deltas. Depending on nr_resolved, this was either under or over
allocating.
Also, the old code before this change had to use 'i' and 'n' because
some of the things we see in the (old) deltas[] array we scanned
with 'i' would not make it into the sorted_by_pos[] array in the old
world order, but now because you have only ref delta in a separate
ref_deltas[] array, they increment lock&step. We no longer need
separate variables. And most importantly, we shouldn't pass the
nr_unresolved parameter, as this number does not play a role in the
working of this helper function.
Helped-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
for (i = 0; i < nr_ref_deltas; i++) {
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
struct ref_delta_entry *d = sorted_by_pos[i];
|
|
|
|
enum object_type type;
|
|
|
|
void *data;
|
|
|
|
unsigned long size;
|
|
|
|
|
|
|
|
if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
|
|
|
|
continue;
|
|
|
|
data = read_object_file(&d->oid, &type, &size);
|
|
|
|
if (!data)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (check_object_signature(the_repository, &d->oid,
|
|
|
|
data, size,
|
|
|
|
type_name(type)))
|
|
|
|
die(_("local object %s is corrupt"), oid_to_hex(&d->oid));
|
index-pack: make quantum of work smaller
Currently, when index-pack resolves deltas, it does not split up delta
trees into threads: each delta base root (an object that is not a
REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on
that root (direct or indirect) are processed in the same thread.
This is a problem when a repository contains a large text file (thus,
delta-able) that is modified many times - delta resolution time during
fetching is dominated by processing the deltas corresponding to that
text file.
This patch contains a solution to that. When cloning using
git -c core.deltabasecachelimit=1g clone \
https://fuchsia.googlesource.com/third_party/vulkan-cts
on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads,
which is the default).
The solution is to have a global work stack. This stack contains delta
bases (objects, whether appearing directly in the packfile or generated
by delta resolution, that themselves have delta children) that need to
be processed; whenever a thread needs work, it peeks at the top of the
stack and processes its next unprocessed child. If a thread finds the
stack empty, it will look for more delta base roots to push on the stack
instead.
The main weakness of having a global work stack is that more time is
spent in the mutex, but profiling has shown that most time is spent in
the resolution of the deltas themselves, so this shouldn't be an issue
in practice. In any case, experimentation (as described in the clone
command above) shows that this patch is a net improvement.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Add this as an object to the objects array and call
|
|
|
|
* threaded_second_pass() (which will pick up the added
|
|
|
|
* object).
|
|
|
|
*/
|
|
|
|
append_obj_to_pack(f, d->oid.hash, data, size, type);
|
|
|
|
threaded_second_pass(NULL);
|
|
|
|
|
|
|
|
display_progress(progress, nr_resolved_deltas);
|
|
|
|
}
|
|
|
|
free(sorted_by_pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *derive_filename(const char *pack_name, const char *suffix,
|
|
|
|
struct strbuf *buf)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
if (!strip_suffix(pack_name, ".pack", &len))
|
|
|
|
die(_("packfile name '%s' does not end with '.pack'"),
|
|
|
|
pack_name);
|
|
|
|
strbuf_add(buf, pack_name, len);
|
|
|
|
strbuf_addch(buf, '.');
|
|
|
|
strbuf_addstr(buf, suffix);
|
|
|
|
return buf->buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_special_file(const char *suffix, const char *msg,
|
|
|
|
const char *pack_name, const unsigned char *hash,
|
|
|
|
const char **report)
|
|
|
|
{
|
|
|
|
struct strbuf name_buf = STRBUF_INIT;
|
|
|
|
const char *filename;
|
|
|
|
int fd;
|
|
|
|
int msg_len = strlen(msg);
|
|
|
|
|
|
|
|
if (pack_name)
|
|
|
|
filename = derive_filename(pack_name, suffix, &name_buf);
|
|
|
|
else
|
|
|
|
filename = odb_pack_name(&name_buf, hash, suffix);
|
|
|
|
|
|
|
|
fd = odb_pack_keep(filename);
|
|
|
|
if (fd < 0) {
|
|
|
|
if (errno != EEXIST)
|
|
|
|
die_errno(_("cannot write %s file '%s'"),
|
|
|
|
suffix, filename);
|
|
|
|
} else {
|
|
|
|
if (msg_len > 0) {
|
|
|
|
write_or_die(fd, msg, msg_len);
|
|
|
|
write_or_die(fd, "\n", 1);
|
|
|
|
}
|
|
|
|
if (close(fd) != 0)
|
|
|
|
die_errno(_("cannot close written %s file '%s'"),
|
|
|
|
suffix, filename);
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (report)
|
|
|
|
*report = suffix;
|
|
|
|
}
|
|
|
|
strbuf_release(&name_buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void final(const char *final_pack_name, const char *curr_pack_name,
|
|
|
|
const char *final_index_name, const char *curr_index_name,
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
const char *keep_msg, const char *promisor_msg,
|
|
|
|
unsigned char *hash)
|
|
|
|
{
|
|
|
|
const char *report = "pack";
|
index-pack: make pointer-alias fallbacks safer
The final() function accepts a NULL value for certain
parameters, and falls back to writing into a reusable "name"
buffer, and then either:
1. For "keep_name", requiring all uses to do "keep_name ?
keep_name : name.buf". This is awkward, and it's easy
to accidentally look at the maybe-NULL keep_name.
2. For "final_index_name" and "final_pack_name", aliasing
those pointers to the "name" buffer. This is easier to
use, but the aliased pointers become invalid after the
buffer is reused (this isn't a bug now, but it's a
potential pitfall).
One way to make this safer would be to introduce an extra
pointer to do the aliasing, and have its lifetime match the
validity of the "name" buffer. But it's still easy to
accidentally use the wrong name (i.e., to use
"final_pack_name" instead of the aliased pointer).
Instead, let's use three separate buffers that will remain
valid through the function. That makes it safe to alias the
pointers and use them consistently. The extra allocations
shouldn't matter, as this function is not performance
sensitive.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
struct strbuf pack_name = STRBUF_INIT;
|
|
|
|
struct strbuf index_name = STRBUF_INIT;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!from_stdin) {
|
|
|
|
close(input_fd);
|
|
|
|
} else {
|
|
|
|
fsync_or_die(output_fd, curr_pack_name);
|
|
|
|
err = close(output_fd);
|
|
|
|
if (err)
|
|
|
|
die_errno(_("error while closing pack file"));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (keep_msg)
|
|
|
|
write_special_file("keep", keep_msg, final_pack_name, hash,
|
|
|
|
&report);
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (promisor_msg)
|
|
|
|
write_special_file("promisor", promisor_msg, final_pack_name,
|
|
|
|
hash, NULL);
|
|
|
|
|
|
|
|
if (final_pack_name != curr_pack_name) {
|
|
|
|
if (!final_pack_name)
|
|
|
|
final_pack_name = odb_pack_name(&pack_name, hash, "pack");
|
|
|
|
if (finalize_object_file(curr_pack_name, final_pack_name))
|
|
|
|
die(_("cannot store pack file"));
|
|
|
|
} else if (from_stdin)
|
|
|
|
chmod(final_pack_name, 0444);
|
|
|
|
|
|
|
|
if (final_index_name != curr_index_name) {
|
|
|
|
if (!final_index_name)
|
|
|
|
final_index_name = odb_pack_name(&index_name, hash, "idx");
|
|
|
|
if (finalize_object_file(curr_index_name, final_index_name))
|
|
|
|
die(_("cannot store index file"));
|
|
|
|
} else
|
|
|
|
chmod(final_index_name, 0444);
|
|
|
|
|
index-pack: handle --strict checks of non-repo packs
Commit 73c3f0f704 (index-pack: check .gitmodules files with
--strict, 2018-05-04) added a call to add_packed_git(), with
the intent that the newly-indexed objects would be available
to the process when we run fsck_finish(). But that's not
what add_packed_git() does. It only allocates the struct,
and you must install_packed_git() on the result. So that
call was effectively doing nothing (except leaking a
struct).
But wait, we passed all of the tests! Does that mean we
don't need the call at all?
For normal cases, no. When we run "index-pack --stdin"
inside a repository, we write the new pack into the object
directory. If fsck_finish() needs to access one of the new
objects, then our initial lookup will fail to find it, but
we'll follow up by running reprepare_packed_git() and
looking again. That logic was meant to handle somebody else
repacking simultaneously, but it ends up working for us
here.
But there is a case that does need this, that we were not
testing. You can run "git index-pack foo.pack" on any file,
even when it is not inside the object directory. Or you may
not even be in a repository at all! This case fails without
doing the proper install_packed_git() call.
We can make this work by adding the install call.
Note that we should be prepared to handle add_packed_git()
failing. We can just silently ignore this case, though. If
fsck_finish() later needs the objects and they're not
available, it will complain itself. And if it doesn't
(because we were able to resolve the whole fsck in the first
pass), then it actually isn't an interesting error at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (do_fsck_object) {
|
|
|
|
struct packed_git *p;
|
|
|
|
p = add_packed_git(final_index_name, strlen(final_index_name), 0);
|
|
|
|
if (p)
|
|
|
|
install_packed_git(the_repository, p);
|
index-pack: handle --strict checks of non-repo packs
Commit 73c3f0f704 (index-pack: check .gitmodules files with
--strict, 2018-05-04) added a call to add_packed_git(), with
the intent that the newly-indexed objects would be available
to the process when we run fsck_finish(). But that's not
what add_packed_git() does. It only allocates the struct,
and you must install_packed_git() on the result. So that
call was effectively doing nothing (except leaking a
struct).
But wait, we passed all of the tests! Does that mean we
don't need the call at all?
For normal cases, no. When we run "index-pack --stdin"
inside a repository, we write the new pack into the object
directory. If fsck_finish() needs to access one of the new
objects, then our initial lookup will fail to find it, but
we'll follow up by running reprepare_packed_git() and
looking again. That logic was meant to handle somebody else
repacking simultaneously, but it ends up working for us
here.
But there is a case that does need this, that we were not
testing. You can run "git index-pack foo.pack" on any file,
even when it is not inside the object directory. Or you may
not even be in a repository at all! This case fails without
doing the proper install_packed_git() call.
We can make this work by adding the install call.
Note that we should be prepared to handle add_packed_git()
failing. We can just silently ignore this case, though. If
fsck_finish() later needs the objects and they're not
available, it will complain itself. And if it doesn't
(because we were able to resolve the whole fsck in the first
pass), then it actually isn't an interesting error at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
}
|
index-pack: check .gitmodules files with --strict
Now that the internal fsck code has all of the plumbing we
need, we can start checking incoming .gitmodules files.
Naively, it seems like we would just need to add a call to
fsck_finish() after we've processed all of the objects. And
that would be enough to cover the initial test included
here. But there are two extra bits:
1. We currently don't bother calling fsck_object() at all
for blobs, since it has traditionally been a noop. We'd
actually catch these blobs in fsck_finish() at the end,
but it's more efficient to check them when we already
have the object loaded in memory.
2. The second pass done by fsck_finish() needs to access
the objects, but we're actually indexing the pack in
this process. In theory we could give the fsck code a
special callback for accessing the in-pack data, but
it's actually quite tricky:
a. We don't have an internal efficient index mapping
oids to packfile offsets. We only generate it on
the fly as part of writing out the .idx file.
b. We'd still have to reconstruct deltas, which means
we'd basically have to replicate all of the
reading logic in packfile.c.
Instead, let's avoid running fsck_finish() until after
we've written out the .idx file, and then just add it
to our internal packed_git list.
This does mean that the objects are "in the repository"
before we finish our fsck checks. But unpack-objects
already exhibits this same behavior, and it's an
acceptable tradeoff here for the same reason: the
quarantine mechanism means that pushes will be
fully protected.
In addition to a basic push test in t7415, we add a sneaky
pack that reverses the usual object order in the pack,
requiring that index-pack access the tree and blob during
the "finish" step.
This already works for unpack-objects (since it will have
written out loose objects), but we'll check it with this
sneaky pack for good measure.
Signed-off-by: Jeff King <peff@peff.net>
7 years ago
|
|
|
|
|
|
|
if (!from_stdin) {
|
|
|
|
printf("%s\n", hash_to_hex(hash));
|
|
|
|
} else {
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "%s\t%s\n", report, hash_to_hex(hash));
|
|
|
|
write_or_die(1, buf.buf, buf.len);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Let's just mimic git-unpack-objects here and write
|
|
|
|
* the last part of the input buffer to stdout.
|
|
|
|
*/
|
|
|
|
while (input_len) {
|
|
|
|
err = xwrite(1, input_buffer + input_offset, input_len);
|
|
|
|
if (err <= 0)
|
|
|
|
break;
|
|
|
|
input_len -= err;
|
|
|
|
input_offset += err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
index-pack: make pointer-alias fallbacks safer
The final() function accepts a NULL value for certain
parameters, and falls back to writing into a reusable "name"
buffer, and then either:
1. For "keep_name", requiring all uses to do "keep_name ?
keep_name : name.buf". This is awkward, and it's easy
to accidentally look at the maybe-NULL keep_name.
2. For "final_index_name" and "final_pack_name", aliasing
those pointers to the "name" buffer. This is easier to
use, but the aliased pointers become invalid after the
buffer is reused (this isn't a bug now, but it's a
potential pitfall).
One way to make this safer would be to introduce an extra
pointer to do the aliasing, and have its lifetime match the
validity of the "name" buffer. But it's still easy to
accidentally use the wrong name (i.e., to use
"final_pack_name" instead of the aliased pointer).
Instead, let's use three separate buffers that will remain
valid through the function. That makes it safe to alias the
pointers and use them consistently. The extra allocations
shouldn't matter, as this function is not performance
sensitive.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
strbuf_release(&index_name);
|
|
|
|
strbuf_release(&pack_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int git_index_pack_config(const char *k, const char *v, void *cb)
|
|
|
|
{
|
|
|
|
struct pack_idx_option *opts = cb;
|
|
|
|
|
|
|
|
if (!strcmp(k, "pack.indexversion")) {
|
|
|
|
opts->version = git_config_int(k, v);
|
|
|
|
if (opts->version > 2)
|
|
|
|
die(_("bad pack.indexversion=%"PRIu32), opts->version);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(k, "pack.threads")) {
|
|
|
|
nr_threads = git_config_int(k, v);
|
|
|
|
if (nr_threads < 0)
|
|
|
|
die(_("invalid number of threads specified (%d)"),
|
|
|
|
nr_threads);
|
|
|
|
if (!HAVE_THREADS && nr_threads != 1) {
|
|
|
|
warning(_("no threads support, ignoring %s"), k);
|
|
|
|
nr_threads = 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return git_default_config(k, v, cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cmp_uint32(const void *a_, const void *b_)
|
|
|
|
{
|
|
|
|
uint32_t a = *((uint32_t *)a_);
|
|
|
|
uint32_t b = *((uint32_t *)b_);
|
|
|
|
|
|
|
|
return (a < b) ? -1 : (a != b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void read_v2_anomalous_offsets(struct packed_git *p,
|
|
|
|
struct pack_idx_option *opts)
|
|
|
|
{
|
|
|
|
const uint32_t *idx1, *idx2;
|
|
|
|
uint32_t i;
|
|
|
|
|
|
|
|
/* The address of the 4-byte offset table */
|
|
|
|
idx1 = (((const uint32_t *)((const uint8_t *)p->index_data + p->crc_offset))
|
compute pack .idx byte offsets using size_t
A pack and its matching .idx file are limited to 2^32 objects, because
the pack format contains a 32-bit field to store the number of objects.
Hence we use uint32_t in the code.
But the byte count of even a .idx file can be much larger than that,
because it stores at least a hash and an offset for each object. So
using SHA-1, a v2 .idx file will cross the 4GB boundary at 153,391,650
objects. This confuses load_idx(), which computes the minimum size like
this:
unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz;
Even though min_size will be big enough on most 64-bit platforms, the
actual arithmetic is done as a uint32_t, resulting in a truncation. We
actually exceed that min_size, but then we do:
unsigned long max_size = min_size;
if (nr)
max_size += (nr - 1)*8;
to account for the variable-sized table. That computation doesn't
overflow quite so low, but with the truncation for min_size, we end up
with a max_size that is much smaller than our actual size. So we
complain that the idx is invalid, and can't find any of its objects.
We can fix this case by casting "nr" to a size_t, which will do the
multiplication in 64-bits (assuming you're on a 64-bit platform; this
will never work on a 32-bit system since we couldn't map the whole .idx
anyway). Likewise, we don't have to worry about further additions,
because adding a smaller number to a size_t will convert the other side
to a size_t.
A few notes:
- obviously we could just declare "nr" as a size_t in the first place
(and likewise, packed_git.num_objects). But it's conceptually a
uint32_t because of the on-disk format, and we correctly treat it
that way in other contexts that don't need to compute byte offsets
(e.g., iterating over the set of objects should and generally does
use a uint32_t). Switching to size_t would make all of those other
cases look wrong.
- it could be argued that the proper type is off_t to represent the
file offset. But in practice the .idx file must fit within memory,
because we mmap the whole thing. And the rest of the code (including
the idx_size variable we're comparing against) uses size_t.
- we'll add the same cast to the max_size arithmetic line. Even though
we're adding to a larger type, which will convert our result, the
multiplication is still done as a 32-bit value and can itself
overflow. I didn't check this with my test case, since it would need
an even larger pack (~530M objects), but looking at compiler output
shows that it works this way. The standard should agree, but I
couldn't find anything explicit in 6.3.1.8 ("usual arithmetic
conversions").
The case in load_idx() was the most immediate one that I was able to
trigger. After fixing it, looking up actual objects (including the very
last one in sha1 order) works in a test repo with 153,725,110 objects.
That's because bsearch_hash() works with uint32_t entry indices, and the
actual byte access:
int cmp = hashcmp(table + mi * stride, sha1);
is done with "stride" as a size_t, causing the uint32_t "mi" to be
promoted to a size_t. This is the way most code will access the index
data.
However, I audited all of the other byte-wise accesses of
packed_git.index_data, and many of the others are suspect (they are
similar to the max_size one, where we are adding to a properly sized
offset or directly to a pointer, but the multiplication in the
sub-expression can overflow). I didn't trigger any of these in practice,
but I believe they're potential problems, and certainly adding in the
cast is not going to hurt anything here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
+ (size_t)p->num_objects /* CRC32 table */
|
|
|
|
);
|
|
|
|
|
|
|
|
/* The address of the 8-byte offset table */
|
|
|
|
idx2 = idx1 + p->num_objects;
|
|
|
|
|
|
|
|
for (i = 0; i < p->num_objects; i++) {
|
|
|
|
uint32_t off = ntohl(idx1[i]);
|
|
|
|
if (!(off & 0x80000000))
|
|
|
|
continue;
|
|
|
|
off = off & 0x7fffffff;
|
|
|
|
check_pack_index_ptr(p, &idx2[off * 2]);
|
|
|
|
if (idx2[off * 2])
|
|
|
|
continue;
|
|
|
|
/*
|
|
|
|
* The real offset is ntohl(idx2[off * 2]) in high 4
|
|
|
|
* octets, and ntohl(idx2[off * 2 + 1]) in low 4
|
|
|
|
* octets. But idx2[off * 2] is Zero!!!
|
|
|
|
*/
|
|
|
|
ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc);
|
|
|
|
opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
QSORT(opts->anomaly, opts->anomaly_nr, cmp_uint32);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
|
|
|
|
{
|
|
|
|
struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
|
|
|
|
|
|
|
|
if (!p)
|
|
|
|
die(_("Cannot open existing pack file '%s'"), pack_name);
|
|
|
|
if (open_pack_index(p))
|
|
|
|
die(_("Cannot open existing pack idx file for '%s'"), pack_name);
|
|
|
|
|
|
|
|
/* Read the attributes from the existing idx file */
|
|
|
|
opts->version = p->index_version;
|
|
|
|
|
|
|
|
if (opts->version == 2)
|
|
|
|
read_v2_anomalous_offsets(p, opts);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get rid of the idx file as we do not need it anymore.
|
|
|
|
* NEEDSWORK: extract this bit from free_pack_by_name() in
|
|
|
|
* sha1-file.c, perhaps? It shouldn't matter very much as we
|
|
|
|
* know we haven't installed this pack (hence we never have
|
|
|
|
* read anything from it).
|
|
|
|
*/
|
|
|
|
close_pack_index(p);
|
|
|
|
free(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void show_pack_info(int stat_only)
|
|
|
|
{
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
int i, baseobjects = nr_objects - nr_ref_deltas - nr_ofs_deltas;
|
|
|
|
unsigned long *chain_histogram = NULL;
|
|
|
|
|
|
|
|
if (deepest_delta)
|
|
|
|
chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long));
|
|
|
|
|
|
|
|
for (i = 0; i < nr_objects; i++) {
|
|
|
|
struct object_entry *obj = &objects[i];
|
|
|
|
|
|
|
|
if (is_delta_type(obj->type))
|
|
|
|
chain_histogram[obj_stat[i].delta_depth - 1]++;
|
|
|
|
if (stat_only)
|
|
|
|
continue;
|
|
|
|
printf("%s %-6s %"PRIuMAX" %"PRIuMAX" %"PRIuMAX,
|
|
|
|
oid_to_hex(&obj->idx.oid),
|
|
|
|
type_name(obj->real_type), (uintmax_t)obj->size,
|
|
|
|
(uintmax_t)(obj[1].idx.offset - obj->idx.offset),
|
|
|
|
(uintmax_t)obj->idx.offset);
|
|
|
|
if (is_delta_type(obj->type)) {
|
|
|
|
struct object_entry *bobj = &objects[obj_stat[i].base_object_no];
|
|
|
|
printf(" %u %s", obj_stat[i].delta_depth,
|
|
|
|
oid_to_hex(&bobj->idx.oid));
|
|
|
|
}
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
if (baseobjects)
|
|
|
|
printf_ln(Q_("non delta: %d object",
|
|
|
|
"non delta: %d objects",
|
|
|
|
baseobjects),
|
|
|
|
baseobjects);
|
|
|
|
for (i = 0; i < deepest_delta; i++) {
|
|
|
|
if (!chain_histogram[i])
|
|
|
|
continue;
|
|
|
|
printf_ln(Q_("chain length = %d: %lu object",
|
|
|
|
"chain length = %d: %lu objects",
|
|
|
|
chain_histogram[i]),
|
|
|
|
i + 1,
|
|
|
|
chain_histogram[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmd_index_pack(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
int i, fix_thin_pack = 0, verify = 0, stat_only = 0;
|
|
|
|
const char *curr_index;
|
|
|
|
const char *index_name = NULL, *pack_name = NULL;
|
|
|
|
const char *keep_msg = NULL;
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
const char *promisor_msg = NULL;
|
|
|
|
struct strbuf index_name_buf = STRBUF_INIT;
|
|
|
|
struct pack_idx_entry **idx_objects;
|
|
|
|
struct pack_idx_option opts;
|
|
|
|
unsigned char pack_hash[GIT_MAX_RAWSZ];
|
|
|
|
unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */
|
receive-pack: send keepalives during quiet periods
After a client has sent us the complete pack, we may spend
some time processing the data and running hooks. If the
client asked us to be quiet, receive-pack won't send any
progress data during the index-pack or connectivity-check
steps. And hooks may or may not produce their own progress
output. In these cases, the network connection is totally
silent from both ends.
Git itself doesn't care about this (it will wait forever),
but other parts of the system (e.g., firewalls,
load-balancers, etc) might hang up the connection. So we'd
like to send some sort of keepalive to let the network and
the client side know that we're still alive and processing.
We can use the same trick we did in 05e9515 (upload-pack:
send keepalive packets during pack computation, 2013-09-08).
Namely, we will send an empty sideband data packet every `N`
seconds that we do not relay any stderr data over the
sideband channel. As with 05e9515, this means that we won't
bother sending keepalives when there's actual progress data,
but will kick in when it has been disabled (or if there is a
lull in the progress data).
The concept is simple, but the details are subtle enough
that they need discussing here.
Before the client sends us the pack, we don't want to do any
keepalives. We'll have sent our ref advertisement, and we're
waiting for them to send us the pack (and tell us that they
support sidebands at all).
While we're receiving the pack from the client (or waiting
for it to start), there's no need for keepalives; it's up to
them to keep the connection active by sending data.
Moreover, it would be wrong for us to do so. When we are the
server in the smart-http protocol, we must treat our
connection as half-duplex. So any keepalives we send while
receiving the pack would potentially be buffered by the
webserver. Not only does this make them useless (since they
would not be delivered in a timely manner), but it could
actually cause a deadlock if we fill up the buffer with
keepalives. (It wouldn't be wrong to send keepalives in this
phase for a full-duplex connection like ssh; it's simply
pointless, as it is the client's responsibility to speak).
As soon as we've gotten all of the pack data, then the
client is waiting for us to speak, and we should start
keepalives immediately. From here until the end of the
connection, we send one any time we are not otherwise
sending data.
But there's a catch. Receive-pack doesn't know the moment
we've gotten all the data. It passes the descriptor to
index-pack, who reads all of the data, and then starts
resolving the deltas. We have to communicate that back.
To make this work, we instruct the sideband muxer to enable
keepalives in three phases:
1. In the beginning, not at all.
2. While reading from index-pack, wait for a signal
indicating end-of-input, and then start them.
3. Afterwards, always.
The signal from index-pack in phase 2 has to come over the
stderr channel which the muxer is reading. We can't use an
extra pipe because the portable run-command interface only
gives us stderr and stdout.
Stdout is already used to pass the .keep filename back to
receive-pack. We could also send a signal there, but then we
would find out about it in the main thread. And the
keepalive needs to be done by the async muxer thread (since
it's the one writing sideband data back to the client). And
we can't reliably signal the async thread from the main
thread, because the async code sometimes uses threads and
sometimes uses forked processes.
Therefore the signal must come over the stderr channel,
where it may be interspersed with other random
human-readable messages from index-pack. This patch makes
the signal a single NUL byte. This is easy to parse, should
not appear in any normal stderr output, and we don't have to
worry about any timing issues (like seeing half the signal
bytes in one read(), and half in a subsequent one).
This is a bit ugly, but it's simple to code and should work
reliably.
Another option would be to stop using an async thread for
muxing entirely, and just poll() both stderr and stdout of
index-pack from the main thread. This would work for
index-pack (because we aren't doing anything useful in the
main thread while it runs anyway). But it would make the
connectivity check and the hook muxers much more
complicated, as they need to simultaneously feed the
sub-programs while reading their stderr.
The index-pack phase is the only one that needs this
signaling, so it could simply behave differently than the
other two. That would mean having two separate
implementations of copy_to_sideband (and the keepalive
code), though. And it still doesn't get rid of the
signaling; it just means we can write a nicer message like
"END_OF_INPUT" or something on stdout, since we don't have
to worry about separating it from the stderr cruft.
One final note: this signaling trick is only done with
index-pack, not with unpack-objects. There's no point in
doing it for the latter, because by definition it only kicks
in for a small number of objects, where keepalives are not
as useful (and this conveniently lets us avoid duplicating
the implementation).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
int report_end_of_input = 0;
|
|
|
|
int hash_algo = 0;
|
|
|
|
|
|
|
|
/*
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
* index-pack never needs to fetch missing objects except when
|
|
|
|
* REF_DELTA bases are missing (which are explicitly handled). It only
|
|
|
|
* accesses the repo to do hash collision checks and to check which
|
|
|
|
* REF_DELTA bases need to be fetched.
|
|
|
|
*/
|
|
|
|
fetch_if_missing = 0;
|
|
|
|
|
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage(index_pack_usage);
|
|
|
|
|
|
|
|
read_replace_refs = 0;
|
|
|
|
fsck_options.walk = mark_link;
|
|
|
|
|
|
|
|
reset_pack_idx_option(&opts);
|
|
|
|
git_config(git_index_pack_config, &opts);
|
|
|
|
if (prefix && chdir(prefix))
|
|
|
|
die(_("Cannot come back to cwd"));
|
|
|
|
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
|
|
|
|
if (*arg == '-') {
|
|
|
|
if (!strcmp(arg, "--stdin")) {
|
|
|
|
from_stdin = 1;
|
|
|
|
} else if (!strcmp(arg, "--fix-thin")) {
|
|
|
|
fix_thin_pack = 1;
|
|
|
|
} else if (skip_to_optional_arg(arg, "--strict", &arg)) {
|
|
|
|
strict = 1;
|
|
|
|
do_fsck_object = 1;
|
|
|
|
fsck_set_msg_types(&fsck_options, arg);
|
|
|
|
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
|
|
|
|
strict = 1;
|
|
|
|
check_self_contained_and_connected = 1;
|
|
|
|
} else if (!strcmp(arg, "--fsck-objects")) {
|
|
|
|
do_fsck_object = 1;
|
|
|
|
} else if (!strcmp(arg, "--verify")) {
|
|
|
|
verify = 1;
|
|
|
|
} else if (!strcmp(arg, "--verify-stat")) {
|
|
|
|
verify = 1;
|
|
|
|
show_stat = 1;
|
|
|
|
} else if (!strcmp(arg, "--verify-stat-only")) {
|
|
|
|
verify = 1;
|
|
|
|
show_stat = 1;
|
|
|
|
stat_only = 1;
|
|
|
|
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
|
|
|
|
; /* nothing to do */
|
|
|
|
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
|
|
|
|
; /* already parsed */
|
|
|
|
} else if (starts_with(arg, "--threads=")) {
|
|
|
|
char *end;
|
|
|
|
nr_threads = strtoul(arg+10, &end, 0);
|
|
|
|
if (!arg[10] || *end || nr_threads < 0)
|
|
|
|
usage(index_pack_usage);
|
|
|
|
if (!HAVE_THREADS && nr_threads != 1) {
|
|
|
|
warning(_("no threads support, ignoring %s"), arg);
|
|
|
|
nr_threads = 1;
|
|
|
|
}
|
|
|
|
} else if (starts_with(arg, "--pack_header=")) {
|
|
|
|
struct pack_header *hdr;
|
|
|
|
char *c;
|
|
|
|
|
|
|
|
hdr = (struct pack_header *)input_buffer;
|
|
|
|
hdr->hdr_signature = htonl(PACK_SIGNATURE);
|
|
|
|
hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
|
|
|
|
if (*c != ',')
|
|
|
|
die(_("bad %s"), arg);
|
|
|
|
hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
|
|
|
|
if (*c)
|
|
|
|
die(_("bad %s"), arg);
|
|
|
|
input_len = sizeof(*hdr);
|
|
|
|
} else if (!strcmp(arg, "-v")) {
|
|
|
|
verbose = 1;
|
index-pack: add flag for showing delta-resolution progress
The index-pack command has two progress meters: one for
"receiving objects", and one for "resolving deltas". You get
neither by default, or both with "-v".
But for a push through receive-pack, we would want only the
"resolving deltas" phase, _not_ the "receiving objects"
progress. There are two reasons for this.
One is simply that existing clients are already printing
"writing objects" progress at the same time. Arguably
"receiving" from the far end is more useful, because it
tells you what has actually gotten there, as opposed to what
might be stuck in a buffer somewhere between the client and
server. But that would require a protocol extension to tell
clients not to print their progress. Possible, but
complexity for little gain.
The second reason is much more important. In a full-duplex
connection like git-over-ssh, we can print progress while
the pack is incoming, and it will immediately get to the
client. But for a half-duplex connection like git-over-http,
we should not say anything until we have received the full
request. Anything we write is subject to being stuck in a
buffer by the webserver. Worse, we can end up in a deadlock
if that buffer fills up.
So our best bet is to avoid writing anything that isn't a
small fixed size until we've received the full pack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
} else if (!strcmp(arg, "--show-resolving-progress")) {
|
|
|
|
show_resolving_progress = 1;
|
receive-pack: send keepalives during quiet periods
After a client has sent us the complete pack, we may spend
some time processing the data and running hooks. If the
client asked us to be quiet, receive-pack won't send any
progress data during the index-pack or connectivity-check
steps. And hooks may or may not produce their own progress
output. In these cases, the network connection is totally
silent from both ends.
Git itself doesn't care about this (it will wait forever),
but other parts of the system (e.g., firewalls,
load-balancers, etc) might hang up the connection. So we'd
like to send some sort of keepalive to let the network and
the client side know that we're still alive and processing.
We can use the same trick we did in 05e9515 (upload-pack:
send keepalive packets during pack computation, 2013-09-08).
Namely, we will send an empty sideband data packet every `N`
seconds that we do not relay any stderr data over the
sideband channel. As with 05e9515, this means that we won't
bother sending keepalives when there's actual progress data,
but will kick in when it has been disabled (or if there is a
lull in the progress data).
The concept is simple, but the details are subtle enough
that they need discussing here.
Before the client sends us the pack, we don't want to do any
keepalives. We'll have sent our ref advertisement, and we're
waiting for them to send us the pack (and tell us that they
support sidebands at all).
While we're receiving the pack from the client (or waiting
for it to start), there's no need for keepalives; it's up to
them to keep the connection active by sending data.
Moreover, it would be wrong for us to do so. When we are the
server in the smart-http protocol, we must treat our
connection as half-duplex. So any keepalives we send while
receiving the pack would potentially be buffered by the
webserver. Not only does this make them useless (since they
would not be delivered in a timely manner), but it could
actually cause a deadlock if we fill up the buffer with
keepalives. (It wouldn't be wrong to send keepalives in this
phase for a full-duplex connection like ssh; it's simply
pointless, as it is the client's responsibility to speak).
As soon as we've gotten all of the pack data, then the
client is waiting for us to speak, and we should start
keepalives immediately. From here until the end of the
connection, we send one any time we are not otherwise
sending data.
But there's a catch. Receive-pack doesn't know the moment
we've gotten all the data. It passes the descriptor to
index-pack, who reads all of the data, and then starts
resolving the deltas. We have to communicate that back.
To make this work, we instruct the sideband muxer to enable
keepalives in three phases:
1. In the beginning, not at all.
2. While reading from index-pack, wait for a signal
indicating end-of-input, and then start them.
3. Afterwards, always.
The signal from index-pack in phase 2 has to come over the
stderr channel which the muxer is reading. We can't use an
extra pipe because the portable run-command interface only
gives us stderr and stdout.
Stdout is already used to pass the .keep filename back to
receive-pack. We could also send a signal there, but then we
would find out about it in the main thread. And the
keepalive needs to be done by the async muxer thread (since
it's the one writing sideband data back to the client). And
we can't reliably signal the async thread from the main
thread, because the async code sometimes uses threads and
sometimes uses forked processes.
Therefore the signal must come over the stderr channel,
where it may be interspersed with other random
human-readable messages from index-pack. This patch makes
the signal a single NUL byte. This is easy to parse, should
not appear in any normal stderr output, and we don't have to
worry about any timing issues (like seeing half the signal
bytes in one read(), and half in a subsequent one).
This is a bit ugly, but it's simple to code and should work
reliably.
Another option would be to stop using an async thread for
muxing entirely, and just poll() both stderr and stdout of
index-pack from the main thread. This would work for
index-pack (because we aren't doing anything useful in the
main thread while it runs anyway). But it would make the
connectivity check and the hook muxers much more
complicated, as they need to simultaneously feed the
sub-programs while reading their stderr.
The index-pack phase is the only one that needs this
signaling, so it could simply behave differently than the
other two. That would mean having two separate
implementations of copy_to_sideband (and the keepalive
code), though. And it still doesn't get rid of the
signaling; it just means we can write a nicer message like
"END_OF_INPUT" or something on stdout, since we don't have
to worry about separating it from the stderr cruft.
One final note: this signaling trick is only done with
index-pack, not with unpack-objects. There's no point in
doing it for the latter, because by definition it only kicks
in for a small number of objects, where keepalives are not
as useful (and this conveniently lets us avoid duplicating
the implementation).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
} else if (!strcmp(arg, "--report-end-of-input")) {
|
|
|
|
report_end_of_input = 1;
|
|
|
|
} else if (!strcmp(arg, "-o")) {
|
|
|
|
if (index_name || (i+1) >= argc)
|
|
|
|
usage(index_pack_usage);
|
|
|
|
index_name = argv[++i];
|
|
|
|
} else if (starts_with(arg, "--index-version=")) {
|
|
|
|
char *c;
|
|
|
|
opts.version = strtoul(arg + 16, &c, 10);
|
|
|
|
if (opts.version > 2)
|
|
|
|
die(_("bad %s"), arg);
|
|
|
|
if (*c == ',')
|
|
|
|
opts.off32_limit = strtoul(c+1, &c, 0);
|
|
|
|
if (*c || opts.off32_limit & 0x80000000)
|
|
|
|
die(_("bad %s"), arg);
|
|
|
|
} else if (skip_prefix(arg, "--max-input-size=", &arg)) {
|
|
|
|
max_input_size = strtoumax(arg, NULL, 10);
|
|
|
|
} else if (skip_prefix(arg, "--object-format=", &arg)) {
|
|
|
|
hash_algo = hash_algo_by_name(arg);
|
|
|
|
if (hash_algo == GIT_HASH_UNKNOWN)
|
|
|
|
die(_("unknown hash algorithm '%s'"), arg);
|
|
|
|
repo_set_hash_algo(the_repository, hash_algo);
|
|
|
|
} else
|
|
|
|
usage(index_pack_usage);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pack_name)
|
|
|
|
usage(index_pack_usage);
|
|
|
|
pack_name = arg;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pack_name && !from_stdin)
|
|
|
|
usage(index_pack_usage);
|
|
|
|
if (fix_thin_pack && !from_stdin)
|
|
|
|
die(_("--fix-thin cannot be used without --stdin"));
|
|
|
|
if (from_stdin && !startup_info->have_repository)
|
|
|
|
die(_("--stdin requires a git repository"));
|
|
|
|
if (from_stdin && hash_algo)
|
|
|
|
die(_("--object-format cannot be used with --stdin"));
|
|
|
|
if (!index_name && pack_name)
|
|
|
|
index_name = derive_filename(pack_name, "idx", &index_name_buf);
|
|
|
|
|
|
|
|
if (verify) {
|
|
|
|
if (!index_name)
|
|
|
|
die(_("--verify with no packfile name given"));
|
|
|
|
read_idx_option(&opts, index_name);
|
|
|
|
opts.flags |= WRITE_IDX_VERIFY | WRITE_IDX_STRICT;
|
|
|
|
}
|
|
|
|
if (strict)
|
|
|
|
opts.flags |= WRITE_IDX_STRICT;
|
|
|
|
|
|
|
|
if (HAVE_THREADS && !nr_threads) {
|
|
|
|
nr_threads = online_cpus();
|
index-pack: adjust default threading cap
Commit b8a2486f15 (index-pack: support multithreaded delta resolving,
2012-05-06) describes an experiment that shows that setting the number
of threads for index-pack higher than 3 does not help.
I repeated that experiment using a more modern version of Git and a more
modern CPU and got different results.
Here are timings for p5302 against linux.git run on my laptop, a Core
i9-9880H with 8 cores plus hyperthreading (so online-cpus returns 16):
5302.3: index-pack 0 threads 256.28(253.41+2.79)
5302.4: index-pack 1 threads 257.03(254.03+2.91)
5302.5: index-pack 2 threads 149.39(268.34+3.06)
5302.6: index-pack 4 threads 94.96(294.10+3.23)
5302.7: index-pack 8 threads 68.12(339.26+3.89)
5302.8: index-pack 16 threads 70.90(655.03+7.21)
5302.9: index-pack default number of threads 116.91(290.05+3.21)
You can see that wall-clock times continue to improve dramatically up to
the number of cores, but bumping beyond that (into hyperthreading
territory) does not help (and in fact hurts a little).
Here's the same experiment on a machine with dual Xeon 6230's, totaling
40 cores (80 with hyperthreading):
5302.3: index-pack 0 threads 310.04(302.73+6.90)
5302.4: index-pack 1 threads 310.55(302.68+7.40)
5302.5: index-pack 2 threads 178.17(304.89+8.20)
5302.6: index-pack 5 threads 99.53(315.54+9.56)
5302.7: index-pack 10 threads 72.80(327.37+12.79)
5302.8: index-pack 20 threads 60.68(357.74+21.66)
5302.9: index-pack 40 threads 58.07(454.44+67.96)
5302.10: index-pack 80 threads 59.81(720.45+334.52)
5302.11: index-pack default number of threads 134.18(309.32+7.98)
The results are similar; things stop improving at 40 threads. Curiously,
going from 20 to 40 really doesn't help much, either (and increases CPU
time considerably). So that may represent an actual barrier to
parallelism, where we lose out due to context-switching and loss of
cache locality, but don't reap the wall-clock benefits due to contention
of our coarse-grained locks.
So what's a good default value? It's clear that the current cap of 3 is
too low; our default values are 42% and 57% slower than the best times
on each machine. The results on the 40-core machine imply that 20
threads is an actual barrier regardless of the number of cores, so we'll
take that as a maximum. We get the best results on these machines at
half of the online-cpus value. That's presumably a result of the
hyperthreading. That's common on multi-core Intel processors, but not
necessarily elsewhere. But if we take it as an assumption, we can
perform optimally on hyperthreaded machines and still do much better
than the status quo on other machines, as long as we never half below
the current value of 3.
So that's what this patch does.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 years ago
|
|
|
/*
|
|
|
|
* Experiments show that going above 20 threads doesn't help,
|
|
|
|
* no matter how many cores you have. Below that, we tend to
|
|
|
|
* max at half the number of online_cpus(), presumably because
|
|
|
|
* half of those are hyperthreads rather than full cores. We'll
|
|
|
|
* never reduce the level below "3", though, to match a
|
|
|
|
* historical value that nobody complained about.
|
|
|
|
*/
|
|
|
|
if (nr_threads < 4)
|
|
|
|
; /* too few cores to consider capping */
|
|
|
|
else if (nr_threads < 6)
|
|
|
|
nr_threads = 3; /* historic cap */
|
|
|
|
else if (nr_threads < 40)
|
|
|
|
nr_threads /= 2;
|
|
|
|
else
|
|
|
|
nr_threads = 20; /* hard cap */
|
|
|
|
}
|
|
|
|
|
|
|
|
curr_pack = open_pack_file(pack_name);
|
|
|
|
parse_pack_header();
|
|
|
|
objects = xcalloc(st_add(nr_objects, 1), sizeof(struct object_entry));
|
|
|
|
if (show_stat)
|
|
|
|
obj_stat = xcalloc(st_add(nr_objects, 1), sizeof(struct object_stat));
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
ofs_deltas = xcalloc(nr_objects, sizeof(struct ofs_delta_entry));
|
|
|
|
parse_pack_objects(pack_hash);
|
receive-pack: send keepalives during quiet periods
After a client has sent us the complete pack, we may spend
some time processing the data and running hooks. If the
client asked us to be quiet, receive-pack won't send any
progress data during the index-pack or connectivity-check
steps. And hooks may or may not produce their own progress
output. In these cases, the network connection is totally
silent from both ends.
Git itself doesn't care about this (it will wait forever),
but other parts of the system (e.g., firewalls,
load-balancers, etc) might hang up the connection. So we'd
like to send some sort of keepalive to let the network and
the client side know that we're still alive and processing.
We can use the same trick we did in 05e9515 (upload-pack:
send keepalive packets during pack computation, 2013-09-08).
Namely, we will send an empty sideband data packet every `N`
seconds that we do not relay any stderr data over the
sideband channel. As with 05e9515, this means that we won't
bother sending keepalives when there's actual progress data,
but will kick in when it has been disabled (or if there is a
lull in the progress data).
The concept is simple, but the details are subtle enough
that they need discussing here.
Before the client sends us the pack, we don't want to do any
keepalives. We'll have sent our ref advertisement, and we're
waiting for them to send us the pack (and tell us that they
support sidebands at all).
While we're receiving the pack from the client (or waiting
for it to start), there's no need for keepalives; it's up to
them to keep the connection active by sending data.
Moreover, it would be wrong for us to do so. When we are the
server in the smart-http protocol, we must treat our
connection as half-duplex. So any keepalives we send while
receiving the pack would potentially be buffered by the
webserver. Not only does this make them useless (since they
would not be delivered in a timely manner), but it could
actually cause a deadlock if we fill up the buffer with
keepalives. (It wouldn't be wrong to send keepalives in this
phase for a full-duplex connection like ssh; it's simply
pointless, as it is the client's responsibility to speak).
As soon as we've gotten all of the pack data, then the
client is waiting for us to speak, and we should start
keepalives immediately. From here until the end of the
connection, we send one any time we are not otherwise
sending data.
But there's a catch. Receive-pack doesn't know the moment
we've gotten all the data. It passes the descriptor to
index-pack, who reads all of the data, and then starts
resolving the deltas. We have to communicate that back.
To make this work, we instruct the sideband muxer to enable
keepalives in three phases:
1. In the beginning, not at all.
2. While reading from index-pack, wait for a signal
indicating end-of-input, and then start them.
3. Afterwards, always.
The signal from index-pack in phase 2 has to come over the
stderr channel which the muxer is reading. We can't use an
extra pipe because the portable run-command interface only
gives us stderr and stdout.
Stdout is already used to pass the .keep filename back to
receive-pack. We could also send a signal there, but then we
would find out about it in the main thread. And the
keepalive needs to be done by the async muxer thread (since
it's the one writing sideband data back to the client). And
we can't reliably signal the async thread from the main
thread, because the async code sometimes uses threads and
sometimes uses forked processes.
Therefore the signal must come over the stderr channel,
where it may be interspersed with other random
human-readable messages from index-pack. This patch makes
the signal a single NUL byte. This is easy to parse, should
not appear in any normal stderr output, and we don't have to
worry about any timing issues (like seeing half the signal
bytes in one read(), and half in a subsequent one).
This is a bit ugly, but it's simple to code and should work
reliably.
Another option would be to stop using an async thread for
muxing entirely, and just poll() both stderr and stdout of
index-pack from the main thread. This would work for
index-pack (because we aren't doing anything useful in the
main thread while it runs anyway). But it would make the
connectivity check and the hook muxers much more
complicated, as they need to simultaneously feed the
sub-programs while reading their stderr.
The index-pack phase is the only one that needs this
signaling, so it could simply behave differently than the
other two. That would mean having two separate
implementations of copy_to_sideband (and the keepalive
code), though. And it still doesn't get rid of the
signaling; it just means we can write a nicer message like
"END_OF_INPUT" or something on stdout, since we don't have
to worry about separating it from the stderr cruft.
One final note: this signaling trick is only done with
index-pack, not with unpack-objects. There's no point in
doing it for the latter, because by definition it only kicks
in for a small number of objects, where keepalives are not
as useful (and this conveniently lets us avoid duplicating
the implementation).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
if (report_end_of_input)
|
|
|
|
write_in_full(2, "\0", 1);
|
|
|
|
resolve_deltas();
|
|
|
|
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
|
index-pack: kill union delta_base to save memory
Once we know the number of objects in the input pack, we allocate an
array of nr_objects of struct delta_entry. On x86-64, this struct is
32 bytes long. The union delta_base, which is part of struct
delta_entry, provides enough space to store either ofs-delta (8 bytes)
or ref-delta (20 bytes).
Because ofs-delta encoding is more efficient space-wise and more
performant at runtime than ref-delta encoding, Git packers try to use
ofs-delta whenever possible, and it is expected that objects encoded
as ref-delta are minority.
In the best clone case where no ref-delta object is present, we waste
(20-8) * nr_objects bytes because of this union. That's about 38MB out
of 100MB for deltas[] with 3.4M objects, or 38%. deltas[] would be
around 62MB without the waste.
This patch attempts to eliminate that. deltas[] array is split into
two: one for ofs-delta and one for ref-delta. Many functions are also
duplicated because of this split. With this patch, ofs_deltas[] array
takes 51MB. ref_deltas[] should remain unallocated in clone case (0
bytes). This array grows as we see ref-delta. We save about half in
this case, or 25% of total bookkeeping.
The saving is more than the calculation above because some padding in
the old delta_entry struct is removed. ofs_delta_entry is 16 bytes,
including the 4 bytes padding. That's 13MB for padding, but packing
the struct could break platforms that do not support unaligned
access. If someone on 32-bit is really low on memory and only deals
with packs smaller than 2G, using 32-bit off_t would eliminate the
padding and save 27MB on top.
A note about ofs_deltas allocation. We could use ref_deltas memory
allocation strategy for ofs_deltas. But that probably just adds more
overhead on top. ofs-deltas are generally the majority (1/2 to 2/3) in
any pack. Incremental realloc may lead to too many memcpy. And if we
preallocate, say 1/2 or 2/3 of nr_objects initially, the growth rate
of ALLOC_GROW() could make this array larger than nr_objects, wasting
more memory.
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
free(ofs_deltas);
|
|
|
|
free(ref_deltas);
|
|
|
|
if (strict)
|
|
|
|
foreign_nr = check_objects();
|
|
|
|
|
|
|
|
if (show_stat)
|
|
|
|
show_pack_info(stat_only);
|
|
|
|
|
|
|
|
ALLOC_ARRAY(idx_objects, nr_objects);
|
|
|
|
for (i = 0; i < nr_objects; i++)
|
|
|
|
idx_objects[i] = &objects[i].idx;
|
|
|
|
curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash);
|
|
|
|
free(idx_objects);
|
|
|
|
|
|
|
|
if (!verify)
|
|
|
|
final(pack_name, curr_pack,
|
|
|
|
index_name, curr_index,
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
keep_msg, promisor_msg,
|
|
|
|
pack_hash);
|
|
|
|
else
|
|
|
|
close(input_fd);
|
index-pack: check .gitmodules files with --strict
Now that the internal fsck code has all of the plumbing we
need, we can start checking incoming .gitmodules files.
Naively, it seems like we would just need to add a call to
fsck_finish() after we've processed all of the objects. And
that would be enough to cover the initial test included
here. But there are two extra bits:
1. We currently don't bother calling fsck_object() at all
for blobs, since it has traditionally been a noop. We'd
actually catch these blobs in fsck_finish() at the end,
but it's more efficient to check them when we already
have the object loaded in memory.
2. The second pass done by fsck_finish() needs to access
the objects, but we're actually indexing the pack in
this process. In theory we could give the fsck code a
special callback for accessing the in-pack data, but
it's actually quite tricky:
a. We don't have an internal efficient index mapping
oids to packfile offsets. We only generate it on
the fly as part of writing out the .idx file.
b. We'd still have to reconstruct deltas, which means
we'd basically have to replicate all of the
reading logic in packfile.c.
Instead, let's avoid running fsck_finish() until after
we've written out the .idx file, and then just add it
to our internal packed_git list.
This does mean that the objects are "in the repository"
before we finish our fsck checks. But unpack-objects
already exhibits this same behavior, and it's an
acceptable tradeoff here for the same reason: the
quarantine mechanism means that pushes will be
fully protected.
In addition to a basic push test in t7415, we add a sneaky
pack that reverses the usual object order in the pack,
requiring that index-pack access the tree and blob during
the "finish" step.
This already works for unpack-objects (since it will have
written out loose objects), but we'll check it with this
sneaky pack for good measure.
Signed-off-by: Jeff King <peff@peff.net>
7 years ago
|
|
|
|
|
|
|
if (do_fsck_object && fsck_finish(&fsck_options))
|
|
|
|
die(_("fsck error in pack objects"));
|
|
|
|
|
|
|
|
free(objects);
|
|
|
|
strbuf_release(&index_name_buf);
|
|
|
|
if (pack_name == NULL)
|
|
|
|
free((void *) curr_pack);
|
|
|
|
if (index_name == NULL)
|
|
|
|
free((void *) curr_index);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Let the caller know this pack is not self contained
|
|
|
|
*/
|
|
|
|
if (check_self_contained_and_connected && foreign_nr)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|