Browse Source
"git pack-objects" has a few options that tell it not to pack objects found in certain packfiles, which require it to scan .idx files of all available packs. The codepaths involved in these operations have been optimized for a common case of not having any non-local pack and/or any .kept pack. * jk/pack-objects-optim: pack-objects: compute local/ignore_pack_keep early pack-objects: break out of want_object loop early find_pack_entry: replace last_found_pack with MRU cache add generic most-recently-used list sha1_file: drop free_pack_by_name t/perf: add tests for many-pack scenariosmaint

8 changed files with 248 additions and 50 deletions
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
#include "cache.h" |
||||
#include "mru.h" |
||||
|
||||
void mru_append(struct mru *mru, void *item) |
||||
{ |
||||
struct mru_entry *cur = xmalloc(sizeof(*cur)); |
||||
cur->item = item; |
||||
cur->prev = mru->tail; |
||||
cur->next = NULL; |
||||
|
||||
if (mru->tail) |
||||
mru->tail->next = cur; |
||||
else |
||||
mru->head = cur; |
||||
mru->tail = cur; |
||||
} |
||||
|
||||
void mru_mark(struct mru *mru, struct mru_entry *entry) |
||||
{ |
||||
/* If we're already at the front of the list, nothing to do */ |
||||
if (mru->head == entry) |
||||
return; |
||||
|
||||
/* Otherwise, remove us from our current slot... */ |
||||
if (entry->prev) |
||||
entry->prev->next = entry->next; |
||||
if (entry->next) |
||||
entry->next->prev = entry->prev; |
||||
else |
||||
mru->tail = entry->prev; |
||||
|
||||
/* And insert us at the beginning. */ |
||||
entry->prev = NULL; |
||||
entry->next = mru->head; |
||||
if (mru->head) |
||||
mru->head->prev = entry; |
||||
mru->head = entry; |
||||
} |
||||
|
||||
void mru_clear(struct mru *mru) |
||||
{ |
||||
struct mru_entry *p = mru->head; |
||||
|
||||
while (p) { |
||||
struct mru_entry *to_free = p; |
||||
p = p->next; |
||||
free(to_free); |
||||
} |
||||
mru->head = mru->tail = NULL; |
||||
} |
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
#ifndef MRU_H |
||||
#define MRU_H |
||||
|
||||
/** |
||||
* A simple most-recently-used cache, backed by a doubly-linked list. |
||||
* |
||||
* Usage is roughly: |
||||
* |
||||
* // Create a list. Zero-initialization is required. |
||||
* static struct mru cache; |
||||
* mru_append(&cache, item); |
||||
* ... |
||||
* |
||||
* // Iterate in MRU order. |
||||
* struct mru_entry *p; |
||||
* for (p = cache.head; p; p = p->next) { |
||||
* if (matches(p->item)) |
||||
* break; |
||||
* } |
||||
* |
||||
* // Mark an item as used, moving it to the front of the list. |
||||
* mru_mark(&cache, p); |
||||
* |
||||
* // Reset the list to empty, cleaning up all resources. |
||||
* mru_clear(&cache); |
||||
* |
||||
* Note that you SHOULD NOT call mru_mark() and then continue traversing the |
||||
* list; it reorders the marked item to the front of the list, and therefore |
||||
* you will begin traversing the whole list again. |
||||
*/ |
||||
|
||||
struct mru_entry { |
||||
void *item; |
||||
struct mru_entry *prev, *next; |
||||
}; |
||||
|
||||
struct mru { |
||||
struct mru_entry *head, *tail; |
||||
}; |
||||
|
||||
void mru_append(struct mru *mru, void *item); |
||||
void mru_mark(struct mru *mru, struct mru_entry *entry); |
||||
void mru_clear(struct mru *mru); |
||||
|
||||
#endif /* MRU_H */ |
@ -0,0 +1,87 @@
@@ -0,0 +1,87 @@
|
||||
#!/bin/sh |
||||
|
||||
test_description='performance with large numbers of packs' |
||||
. ./perf-lib.sh |
||||
|
||||
test_perf_large_repo |
||||
|
||||
# A real many-pack situation would probably come from having a lot of pushes |
||||
# over time. We don't know how big each push would be, but we can fake it by |
||||
# just walking the first-parent chain and having every 5 commits be their own |
||||
# "push". This isn't _entirely_ accurate, as real pushes would have some |
||||
# duplicate objects due to thin-pack fixing, but it's a reasonable |
||||
# approximation. |
||||
# |
||||
# And then all of the rest of the objects can go in a single packfile that |
||||
# represents the state before any of those pushes (actually, we'll generate |
||||
# that first because in such a setup it would be the oldest pack, and we sort |
||||
# the packs by reverse mtime inside git). |
||||
repack_into_n () { |
||||
rm -rf staging && |
||||
mkdir staging && |
||||
|
||||
git rev-list --first-parent HEAD | |
||||
sed -n '1~5p' | |
||||
head -n "$1" | |
||||
perl -e 'print reverse <>' \ |
||||
>pushes |
||||
|
||||
# create base packfile |
||||
head -n 1 pushes | |
||||
git pack-objects --delta-base-offset --revs staging/pack |
||||
|
||||
# and then incrementals between each pair of commits |
||||
last= && |
||||
while read rev |
||||
do |
||||
if test -n "$last"; then |
||||
{ |
||||
echo "$rev" && |
||||
echo "^$last" |
||||
} | |
||||
git pack-objects --delta-base-offset --revs \ |
||||
staging/pack || return 1 |
||||
fi |
||||
last=$rev |
||||
done <pushes && |
||||
|
||||
# and install the whole thing |
||||
rm -f .git/objects/pack/* && |
||||
mv staging/* .git/objects/pack/ |
||||
} |
||||
|
||||
# Pretend we just have a single branch and no reflogs, and that everything is |
||||
# in objects/pack; that makes our fake pack-building via repack_into_n() |
||||
# much simpler. |
||||
test_expect_success 'simplify reachability' ' |
||||
tip=$(git rev-parse --verify HEAD) && |
||||
git for-each-ref --format="option no-deref%0adelete %(refname)" | |
||||
git update-ref --stdin && |
||||
rm -rf .git/logs && |
||||
git update-ref refs/heads/master $tip && |
||||
git symbolic-ref HEAD refs/heads/master && |
||||
git repack -ad |
||||
' |
||||
|
||||
for nr_packs in 1 50 1000 |
||||
do |
||||
test_expect_success "create $nr_packs-pack scenario" ' |
||||
repack_into_n $nr_packs |
||||
' |
||||
|
||||
test_perf "rev-list ($nr_packs)" ' |
||||
git rev-list --objects --all >/dev/null |
||||
' |
||||
|
||||
# This simulates the interesting part of the repack, which is the |
||||
# actual pack generation, without smudging the on-disk setup |
||||
# between trials. |
||||
test_perf "repack ($nr_packs)" ' |
||||
git pack-objects --keep-true-parents \ |
||||
--honor-pack-keep --non-empty --all \ |
||||
--reflog --indexed-objects --delta-base-offset \ |
||||
--stdout </dev/null >/dev/null |
||||
' |
||||
done |
||||
|
||||
test_done |
Loading…
Reference in new issue