|
|
|
#ifndef UNPACK_TREES_H
|
|
|
|
#define UNPACK_TREES_H
|
|
|
|
|
|
|
|
#define MAX_UNPACK_TREES 8
|
|
|
|
|
|
|
|
struct unpack_trees_options;
|
|
|
|
|
|
|
|
typedef int (*merge_fn_t)(struct cache_entry **src,
|
|
|
|
struct unpack_trees_options *options);
|
|
|
|
|
|
|
|
struct unpack_trees_error_msgs {
|
|
|
|
const char *would_overwrite;
|
|
|
|
const char *not_uptodate_file;
|
|
|
|
const char *not_uptodate_dir;
|
|
|
|
const char *would_lose_untracked;
|
|
|
|
const char *bind_overlap;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct unpack_trees_options {
|
|
|
|
unsigned int reset:1,
|
|
|
|
merge:1,
|
|
|
|
update:1,
|
|
|
|
index_only:1,
|
|
|
|
nontrivial_merge:1,
|
|
|
|
trivial_merges_only:1,
|
|
|
|
verbose_update:1,
|
|
|
|
aggressive:1,
|
|
|
|
skip_unmerged:1,
|
checkout: do not lose staged removal
The logic to checkout a different commit implements the safety to never
lose user's local changes. For example, switching from a commit to
another commit, when you have changed a path that is different between
them, need to merge your changes to the version from the switched-to
commit, which you may not necessarily be able to resolve easily. By
default, "git checkout" refused to switch branches, to give you a chance
to stash your local changes (or use "-m" to merge, accepting the risks of
getting conflicts).
This safety, however, had one deliberate hole since early June 2005. When
your local change was to remove a path (and optionally to stage that
removal), the command checked out the path from the switched-to commit
nevertheless.
This was to allow an initial checkout to happen smoothly (e.g. an initial
checkout is done by starting with an empty index and switching from the
commit at the HEAD to the same commit). We can tighten the rule slightly
to allow this special case to pass, without losing sight of removal
explicitly done by the user, by noticing if the index is truly empty when
the operation begins.
For historical background, see:
http://thread.gmane.org/gmane.comp.version-control.git/4641/focus=4646
This case is marked as *0* in the message, which both Linus and I said "it
feels somewhat wrong but otherwise we cannot start from an empty index".
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
initial_checkout:1,
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
diff_index_cached:1,
|
|
|
|
gently:1;
|
|
|
|
const char *prefix;
|
|
|
|
int pos;
|
|
|
|
struct dir_struct *dir;
|
|
|
|
merge_fn_t fn;
|
|
|
|
struct unpack_trees_error_msgs msgs;
|
|
|
|
|
|
|
|
int head_idx;
|
|
|
|
int merge_size;
|
|
|
|
|
|
|
|
struct cache_entry *df_conflict_entry;
|
Make run_diff_index() use unpack_trees(), not read_tree()
A plain "git commit" would still run lstat() a lot more than necessary,
because wt_status_print() would cause the index to be repeatedly flushed
and re-read by wt_read_cache(), and that would cause the CE_UPTODATE bit
to be lost, resulting in the files in the index being lstat'ed three
times each.
The reason why wt-status.c ended up invalidating and re-reading the
cache multiple times was that it uses "run_diff_index()", which in turn
uses "read_tree()" to populate the index with *both* the old index and
the tree we want to compare against.
So this patch re-writes run_diff_index() to not use read_tree(), but
instead use "unpack_trees()" to diff the index to a tree. That, in
turn, means that we don't need to modify the index itself, which then
means that we don't need to invalidate it and re-read it!
This, together with the lstat() optimizations, means that "git commit"
on the kernel tree really only needs to lstat() the index entries once.
That noticeably cuts down on the cached timings.
Best time before:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.399s
user 0m0.232s
sys 0m0.164s
Best time after:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.254s
user 0m0.140s
sys 0m0.112s
so it's a noticeable improvement in addition to being a nice conceptual
cleanup (it's really not that pretty that "run_diff_index()" dirties the
index!)
Doing an "strace -c" on it also shows that as it cuts the number of
lstat() calls by two thirds, it goes from being lstat()-limited to being
limited by getdents() (which is the readdir system call):
Before:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
60.69 0.000704 0 69230 31 lstat
23.62 0.000274 0 5522 getdents
8.36 0.000097 0 5508 2638 open
2.59 0.000030 0 2869 close
2.50 0.000029 0 274 write
1.47 0.000017 0 2844 fstat
After:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
45.17 0.000276 0 5522 getdents
26.51 0.000162 0 23112 31 lstat
19.80 0.000121 0 5503 2638 open
4.91 0.000030 0 2864 close
1.48 0.000020 0 274 write
1.34 0.000018 0 2844 fstat
...
It passes the test-suite for me, but this is another of one of those
really core functions, and certainly pretty subtle, so..
NOTE! The Linux lstat() system call is really quite cheap when everything
is cached, so the fact that this is quite noticeable on Linux is likely to
mean that it is *much* more noticeable on other operating systems. I bet
you'll see a much bigger performance improvement from this on Windows in
particular.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
17 years ago
|
|
|
void *unpack_data;
|
|
|
|
|
|
|
|
struct index_state *dst_index;
|
|
|
|
struct index_state *src_index;
|
|
|
|
struct index_state result;
|
|
|
|
};
|
|
|
|
|
|
|
|
extern int unpack_trees(unsigned n, struct tree_desc *t,
|
|
|
|
struct unpack_trees_options *options);
|
|
|
|
|
|
|
|
int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o);
|
|
|
|
int twoway_merge(struct cache_entry **src, struct unpack_trees_options *o);
|
|
|
|
int bind_merge(struct cache_entry **src, struct unpack_trees_options *o);
|
|
|
|
int oneway_merge(struct cache_entry **src, struct unpack_trees_options *o);
|
|
|
|
|
|
|
|
#endif
|