|
|
|
#include "cache.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "tree-walk.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "list-objects.h"
|
|
|
|
|
|
|
|
static void process_blob(struct rev_info *revs,
|
|
|
|
struct blob *blob,
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show_object_fn show,
|
|
|
|
struct name_path *path,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
struct object *obj = &blob->object;
|
|
|
|
|
|
|
|
if (!revs->blob_objects)
|
|
|
|
return;
|
|
|
|
if (!obj)
|
|
|
|
die("bad blob object");
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
return;
|
|
|
|
obj->flags |= SEEN;
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show(obj, path, name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Processing a gitlink entry currently does nothing, since
|
|
|
|
* we do not recurse into the subproject.
|
|
|
|
*
|
|
|
|
* We *could* eventually add a flag that actually does that,
|
|
|
|
* which would involve:
|
|
|
|
* - is the subproject actually checked out?
|
|
|
|
* - if so, see if the subproject has already been added
|
|
|
|
* to the alternates list, and add it if not.
|
|
|
|
* - process the commit (or tag) the gitlink points to
|
|
|
|
* recursively.
|
|
|
|
*
|
|
|
|
* However, it's unclear whether there is really ever any
|
|
|
|
* reason to see superprojects and subprojects as such a
|
|
|
|
* "unified" object pool (potentially resulting in a totally
|
|
|
|
* humongous pack - avoiding which was the whole point of
|
|
|
|
* having gitlinks in the first place!).
|
|
|
|
*
|
|
|
|
* So for now, there is just a note that we *could* follow
|
|
|
|
* the link, and how to do it. Whether it necessarily makes
|
|
|
|
* any sense what-so-ever to ever do that is another issue.
|
|
|
|
*/
|
|
|
|
static void process_gitlink(struct rev_info *revs,
|
|
|
|
const unsigned char *sha1,
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show_object_fn show,
|
|
|
|
struct name_path *path,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
/* Nothing to do */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void process_tree(struct rev_info *revs,
|
|
|
|
struct tree *tree,
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show_object_fn show,
|
|
|
|
struct name_path *path,
|
|
|
|
struct strbuf *base,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
struct object *obj = &tree->object;
|
|
|
|
struct tree_desc desc;
|
|
|
|
struct name_entry entry;
|
|
|
|
struct name_path me;
|
|
|
|
int all_interesting = (revs->diffopt.pathspec.nr == 0);
|
|
|
|
int baselen = base->len;
|
|
|
|
|
|
|
|
if (!revs->tree_objects)
|
|
|
|
return;
|
|
|
|
if (!obj)
|
|
|
|
die("bad tree object");
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
return;
|
|
|
|
if (parse_tree(tree) < 0)
|
|
|
|
die("bad tree object %s", sha1_to_hex(obj->sha1));
|
|
|
|
obj->flags |= SEEN;
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show(obj, path, name);
|
|
|
|
me.up = path;
|
|
|
|
me.elem = name;
|
|
|
|
me.elem_len = strlen(name);
|
|
|
|
|
|
|
|
if (!all_interesting) {
|
|
|
|
strbuf_addstr(base, name);
|
|
|
|
if (base->len)
|
|
|
|
strbuf_addch(base, '/');
|
|
|
|
}
|
|
|
|
|
|
|
|
init_tree_desc(&desc, tree->buffer, tree->size);
|
|
|
|
|
|
|
|
while (tree_entry(&desc, &entry)) {
|
|
|
|
if (!all_interesting) {
|
|
|
|
int showit = tree_entry_interesting(&entry,
|
|
|
|
base, 0,
|
|
|
|
&revs->diffopt.pathspec);
|
|
|
|
|
|
|
|
if (showit < 0)
|
|
|
|
break;
|
|
|
|
else if (!showit)
|
|
|
|
continue;
|
|
|
|
else if (showit == 2)
|
|
|
|
all_interesting = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (S_ISDIR(entry.mode))
|
|
|
|
process_tree(revs,
|
|
|
|
lookup_tree(entry.sha1),
|
|
|
|
show, &me, base, entry.path);
|
|
|
|
else if (S_ISGITLINK(entry.mode))
|
|
|
|
process_gitlink(revs, entry.sha1,
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show, &me, entry.path);
|
|
|
|
else
|
|
|
|
process_blob(revs,
|
|
|
|
lookup_blob(entry.sha1),
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show, &me, entry.path);
|
|
|
|
}
|
|
|
|
strbuf_setlen(base, baselen);
|
|
|
|
free(tree->buffer);
|
|
|
|
tree->buffer = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_edge_parents_uninteresting(struct commit *commit,
|
|
|
|
struct rev_info *revs,
|
|
|
|
show_edge_fn show_edge)
|
|
|
|
{
|
|
|
|
struct commit_list *parents;
|
|
|
|
|
|
|
|
for (parents = commit->parents; parents; parents = parents->next) {
|
|
|
|
struct commit *parent = parents->item;
|
|
|
|
if (!(parent->object.flags & UNINTERESTING))
|
|
|
|
continue;
|
|
|
|
mark_tree_uninteresting(parent->tree);
|
|
|
|
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
|
|
|
|
parent->object.flags |= SHOWN;
|
|
|
|
show_edge(parent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void mark_edges_uninteresting(struct commit_list *list,
|
|
|
|
struct rev_info *revs,
|
|
|
|
show_edge_fn show_edge)
|
|
|
|
{
|
|
|
|
for ( ; list; list = list->next) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
|
|
|
|
if (commit->object.flags & UNINTERESTING) {
|
|
|
|
mark_tree_uninteresting(commit->tree);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
mark_edge_parents_uninteresting(commit, revs, show_edge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
static void add_pending_tree(struct rev_info *revs, struct tree *tree)
|
|
|
|
{
|
|
|
|
add_pending_object(revs, &tree->object, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
void traverse_commit_list(struct rev_info *revs,
|
|
|
|
show_commit_fn show_commit,
|
|
|
|
show_object_fn show_object,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct commit *commit;
|
|
|
|
struct strbuf base;
|
|
|
|
|
|
|
|
strbuf_init(&base, PATH_MAX);
|
|
|
|
while ((commit = get_revision(revs)) != NULL) {
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
add_pending_tree(revs, commit->tree);
|
|
|
|
show_commit(commit, data);
|
|
|
|
}
|
|
|
|
for (i = 0; i < revs->pending.nr; i++) {
|
|
|
|
struct object_array_entry *pending = revs->pending.objects + i;
|
|
|
|
struct object *obj = pending->item;
|
|
|
|
const char *name = pending->name;
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
continue;
|
|
|
|
if (obj->type == OBJ_TAG) {
|
|
|
|
obj->flags |= SEEN;
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
show_object(obj, NULL, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (obj->type == OBJ_TREE) {
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
process_tree(revs, (struct tree *)obj, show_object,
|
|
|
|
NULL, &base, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (obj->type == OBJ_BLOB) {
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 years ago
|
|
|
process_blob(revs, (struct blob *)obj, show_object,
|
|
|
|
NULL, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
die("unknown pending object %s (%s)",
|
|
|
|
sha1_to_hex(obj->sha1), name);
|
|
|
|
}
|
|
|
|
if (revs->pending.nr) {
|
|
|
|
free(revs->pending.objects);
|
|
|
|
revs->pending.nr = 0;
|
|
|
|
revs->pending.alloc = 0;
|
|
|
|
revs->pending.objects = NULL;
|
|
|
|
}
|
|
|
|
strbuf_release(&base);
|
|
|
|
}
|