Merge branch 'kn/bundle-dedup-optim'

Optimize the code to dedup references recorded in a bundle file.

* kn/bundle-dedup-optim:
  bundle: fix non-linear performance scaling with refs
  t6020: test for duplicate refnames in bundle creation
main
Junio C Hamano 2025-04-23 13:58:50 -07:00
commit bb74c0abbc
4 changed files with 61 additions and 41 deletions

View File

@ -384,6 +384,7 @@ static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
{ {
int i; int i;
int ref_count = 0; int ref_count = 0;
struct strset objects = STRSET_INIT;


for (i = 0; i < revs->pending.nr; i++) { for (i = 0; i < revs->pending.nr; i++) {
struct object_array_entry *e = revs->pending.objects + i; struct object_array_entry *e = revs->pending.objects + i;
@ -401,6 +402,9 @@ static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
flag = 0; flag = 0;
display_ref = (flag & REF_ISSYMREF) ? e->name : ref; display_ref = (flag & REF_ISSYMREF) ? e->name : ref;


if (strset_contains(&objects, display_ref))
goto skip_write_ref;

if (e->item->type == OBJ_TAG && if (e->item->type == OBJ_TAG &&
!is_tag_in_date_range(e->item, revs)) { !is_tag_in_date_range(e->item, revs)) {
e->item->flags |= UNINTERESTING; e->item->flags |= UNINTERESTING;
@ -423,6 +427,7 @@ static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
} }


ref_count++; ref_count++;
strset_add(&objects, display_ref);
write_or_die(bundle_fd, oid_to_hex(&e->item->oid), the_hash_algo->hexsz); write_or_die(bundle_fd, oid_to_hex(&e->item->oid), the_hash_algo->hexsz);
write_or_die(bundle_fd, " ", 1); write_or_die(bundle_fd, " ", 1);
write_or_die(bundle_fd, display_ref, strlen(display_ref)); write_or_die(bundle_fd, display_ref, strlen(display_ref));
@ -431,6 +436,8 @@ static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
free(ref); free(ref);
} }


strset_clear(&objects);

/* end header */ /* end header */
write_or_die(bundle_fd, "\n", 1); write_or_die(bundle_fd, "\n", 1);
return ref_count; return ref_count;
@ -566,7 +573,6 @@ int create_bundle(struct repository *r, const char *path,
*/ */
revs.blob_objects = revs.tree_objects = 0; revs.blob_objects = revs.tree_objects = 0;
traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
object_array_remove_duplicates(&revs_copy.pending);


/* write bundle refs */ /* write bundle refs */
ref_count = write_bundle_refs(bundle_fd, &revs_copy); ref_count = write_bundle_refs(bundle_fd, &revs_copy);

View File

@ -492,44 +492,11 @@ void object_array_clear(struct object_array *array)
array->nr = array->alloc = 0; array->nr = array->alloc = 0;
} }


/*
* Return true if array already contains an entry.
*/
static int contains_object(struct object_array *array,
const struct object *item, const char *name)
{
unsigned nr = array->nr, i;
struct object_array_entry *object = array->objects;

for (i = 0; i < nr; i++, object++)
if (item == object->item && !strcmp(object->name, name))
return 1;
return 0;
}

void object_array_remove_duplicates(struct object_array *array)
{
unsigned nr = array->nr, src;
struct object_array_entry *objects = array->objects;

array->nr = 0;
for (src = 0; src < nr; src++) {
if (!contains_object(array, objects[src].item,
objects[src].name)) {
if (src != array->nr)
objects[array->nr] = objects[src];
array->nr++;
} else {
object_array_release_entry(&objects[src]);
}
}
}

void clear_object_flags(struct repository *repo, unsigned flags) void clear_object_flags(struct repository *repo, unsigned flags)
{ {
int i; int i;


for (i=0; i < repo->parsed_objects->obj_hash_size; i++) { for (i = 0; i < repo->parsed_objects->obj_hash_size; i++) {
struct object *obj = repo->parsed_objects->obj_hash[i]; struct object *obj = repo->parsed_objects->obj_hash[i];
if (obj) if (obj)
obj->flags &= ~flags; obj->flags &= ~flags;

View File

@ -326,12 +326,6 @@ typedef int (*object_array_each_func_t)(struct object_array_entry *, void *);
void object_array_filter(struct object_array *array, void object_array_filter(struct object_array *array,
object_array_each_func_t want, void *cb_data); object_array_each_func_t want, void *cb_data);


/*
* Remove from array all but the first entry with a given name.
* Warning: this function uses an O(N^2) algorithm.
*/
void object_array_remove_duplicates(struct object_array *array);

/* /*
* Remove any objects from the array, freeing all used memory; afterwards * Remove any objects from the array, freeing all used memory; afterwards
* the array is ready to store more objects with add_object_array(). * the array is ready to store more objects with add_object_array().

View File

@ -673,6 +673,59 @@ test_expect_success 'bundle progress with --no-quiet' '
grep "%" err grep "%" err
' '


test_expect_success 'create bundle with duplicate refnames' '
git bundle create out.bdl "main" "main" &&

git bundle list-heads out.bdl |
make_user_friendly_and_stable_output >actual &&
cat >expect <<-\EOF &&
<COMMIT-P> refs/heads/main
EOF
test_cmp expect actual
'

test_expect_success 'create bundle with duplicate refnames and --all' '
git bundle create out.bdl --all "main" "main" &&

git bundle list-heads out.bdl |
make_user_friendly_and_stable_output >actual &&
cat >expect <<-\EOF &&
<COMMIT-P> refs/heads/main
<COMMIT-N> refs/heads/release
<COMMIT-D> refs/heads/topic/1
<COMMIT-H> refs/heads/topic/2
<COMMIT-D> refs/pull/1/head
<COMMIT-G> refs/pull/2/head
<TAG-1> refs/tags/v1
<TAG-2> refs/tags/v2
<TAG-3> refs/tags/v3
<COMMIT-P> HEAD
EOF
test_cmp expect actual
'

test_expect_success 'create bundle with duplicate exlusion refnames' '
git bundle create out.bdl "main" "main^!" &&

git bundle list-heads out.bdl |
make_user_friendly_and_stable_output >actual &&
cat >expect <<-\EOF &&
<COMMIT-P> refs/heads/main
EOF
test_cmp expect actual
'

test_expect_success 'create bundle with duplicate refname short-form' '
git bundle create out.bdl "main" "main" "refs/heads/main" "refs/heads/main" &&

git bundle list-heads out.bdl |
make_user_friendly_and_stable_output >actual &&
cat >expect <<-\EOF &&
<COMMIT-P> refs/heads/main
EOF
test_cmp expect actual
'

test_expect_success 'read bundle over stdin' ' test_expect_success 'read bundle over stdin' '
git bundle create some.bundle HEAD && git bundle create some.bundle HEAD &&