Merge branch 'ds/fix-thin-fix'

"git index-pack --fix-thin" used to abort to prevent a cycle in
delta chains from forming in a corner case even when there is no
such cycle.

* ds/fix-thin-fix:
  index-pack: allow revisiting REF_DELTA chains
  t5309: create failing test for 'git index-pack'
  test-tool: add pack-deltas helper
maint
Junio C Hamano 2025-05-12 14:22:49 -07:00
commit 6dbc41631d
7 changed files with 216 additions and 28 deletions

View File

@ -819,6 +819,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-name-hash.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-pack-deltas.o
TEST_BUILTINS_OBJS += test-pack-mtimes.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o

View File

@ -1108,8 +1108,8 @@ static void *threaded_second_pass(void *data)
set_thread_data(data);
for (;;) {
struct base_data *parent = NULL;
struct object_entry *child_obj;
struct base_data *child;
struct object_entry *child_obj = NULL;
struct base_data *child = NULL;

counter_lock();
display_progress(progress, nr_resolved_deltas);
@ -1136,15 +1136,18 @@ static void *threaded_second_pass(void *data)
parent = list_first_entry(&work_head, struct base_data,
list);

if (parent->ref_first <= parent->ref_last) {
while (parent->ref_first <= parent->ref_last) {
int offset = ref_deltas[parent->ref_first++].obj_no;
child_obj = objects + offset;
if (child_obj->real_type != OBJ_REF_DELTA)
die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)",
(uintmax_t) child_obj->idx.offset,
oid_to_hex(&parent->obj->idx.oid));
if (child_obj->real_type != OBJ_REF_DELTA) {
child_obj = NULL;
continue;
}
child_obj->real_type = parent->obj->real_type;
} else {
break;
}

if (!child_obj && parent->ofs_first <= parent->ofs_last) {
child_obj = objects +
ofs_deltas[parent->ofs_first++].obj_no;
assert(child_obj->real_type == OBJ_OFS_DELTA);
@ -1177,29 +1180,32 @@ static void *threaded_second_pass(void *data)
}
work_unlock();

if (parent) {
child = resolve_delta(child_obj, parent);
if (!child->children_remaining)
FREE_AND_NULL(child->data);
} else {
child = make_base(child_obj, NULL);
if (child->children_remaining) {
/*
* Since this child has its own delta children,
* we will need this data in the future.
* Inflate now so that future iterations will
* have access to this object's data while
* outside the work mutex.
*/
child->data = get_data_from_pack(child_obj);
child->size = child_obj->size;
if (child_obj) {
if (parent) {
child = resolve_delta(child_obj, parent);
if (!child->children_remaining)
FREE_AND_NULL(child->data);
} else{
child = make_base(child_obj, NULL);
if (child->children_remaining) {
/*
* Since this child has its own delta children,
* we will need this data in the future.
* Inflate now so that future iterations will
* have access to this object's data while
* outside the work mutex.
*/
child->data = get_data_from_pack(child_obj);
child->size = child_obj->size;
}
}
}

work_lock();
if (parent)
parent->retain_data--;
if (child->data) {

if (child && child->data) {
/*
* This child has its own children, so add it to
* work_head.
@ -1208,7 +1214,7 @@ static void *threaded_second_pass(void *data)
base_cache_used += child->size;
prune_base_data(NULL);
free_base_data(child);
} else {
} else if (child) {
/*
* This child does not have its own children. It may be
* the last descendant of its ancestors; free those

View File

@ -36,6 +36,7 @@ test_tool_sources = [
'test-mktemp.c',
'test-name-hash.c',
'test-online-cpus.c',
'test-pack-deltas.c',
'test-pack-mtimes.c',
'test-parse-options.c',
'test-parse-pathspec-file.c',

148
t/helper/test-pack-deltas.c Normal file
View File

@ -0,0 +1,148 @@
#define USE_THE_REPOSITORY_VARIABLE

#include "test-tool.h"
#include "git-compat-util.h"
#include "delta.h"
#include "git-zlib.h"
#include "hash.h"
#include "hex.h"
#include "pack.h"
#include "pack-objects.h"
#include "parse-options.h"
#include "setup.h"
#include "strbuf.h"
#include "string-list.h"

static const char *usage_str[] = {
"test-tool pack-deltas --num-objects <num-objects>",
NULL
};

static unsigned long do_compress(void **pptr, unsigned long size)
{
git_zstream stream;
void *in, *out;
unsigned long maxsize;

git_deflate_init(&stream, 1);
maxsize = git_deflate_bound(&stream, size);

in = *pptr;
out = xmalloc(maxsize);
*pptr = out;

stream.next_in = in;
stream.avail_in = size;
stream.next_out = out;
stream.avail_out = maxsize;
while (git_deflate(&stream, Z_FINISH) == Z_OK)
; /* nothing */
git_deflate_end(&stream);

free(in);
return stream.total_out;
}

static void write_ref_delta(struct hashfile *f,
struct object_id *oid,
struct object_id *base)
{
unsigned char header[MAX_PACK_OBJECT_HEADER];
unsigned long size, base_size, delta_size, compressed_size, hdrlen;
enum object_type type;
void *base_buf, *delta_buf;
void *buf = repo_read_object_file(the_repository,
oid, &type,
&size);

if (!buf)
die("unable to read %s", oid_to_hex(oid));

base_buf = repo_read_object_file(the_repository,
base, &type,
&base_size);

if (!base_buf)
die("unable to read %s", oid_to_hex(base));

delta_buf = diff_delta(base_buf, base_size,
buf, size, &delta_size, 0);

compressed_size = do_compress(&delta_buf, delta_size);

hdrlen = encode_in_pack_object_header(header, sizeof(header),
OBJ_REF_DELTA, delta_size);
hashwrite(f, header, hdrlen);
hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
hashwrite(f, delta_buf, compressed_size);

free(buf);
free(base_buf);
free(delta_buf);
}

int cmd__pack_deltas(int argc, const char **argv)
{
int num_objects = -1;
struct hashfile *f;
struct strbuf line = STRBUF_INIT;
struct option options[] = {
OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")),
OPT_END()
};

argc = parse_options(argc, argv, NULL,
options, usage_str, 0);

if (argc || num_objects < 0)
usage_with_options(usage_str, options);

setup_git_directory();

f = hashfd(the_repository->hash_algo, 1, "<stdout>");
write_pack_header(f, num_objects);

/* Read each line from stdin into 'line' */
while (strbuf_getline_lf(&line, stdin) != EOF) {
const char *type_str, *content_oid_str, *base_oid_str = NULL;
struct object_id content_oid, base_oid;
struct string_list items = STRING_LIST_INIT_NODUP;
/*
* Tokenize into two or three parts:
* 1. REF_DELTA, OFS_DELTA, or FULL.
* 2. The object ID for the content object.
* 3. The object ID for the base object (optional).
*/
if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
die("invalid input format: %s", line.buf);

if (items.nr < 2)
die("invalid input format: %s", line.buf);

type_str = items.items[0].string;
content_oid_str = items.items[1].string;

if (get_oid_hex(content_oid_str, &content_oid))
die("invalid object: %s", content_oid_str);
if (items.nr >= 3) {
base_oid_str = items.items[2].string;
if (get_oid_hex(base_oid_str, &base_oid))
die("invalid object: %s", base_oid_str);
}
string_list_clear(&items, 0);

if (!strcmp(type_str, "REF_DELTA"))
write_ref_delta(f, &content_oid, &base_oid);
else if (!strcmp(type_str, "OFS_DELTA"))
die("OFS_DELTA not implemented");
else if (!strcmp(type_str, "FULL"))
die("FULL not implemented");
else
die("unknown pack type: %s", type_str);
}

finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
strbuf_release(&line);
return 0;
}

View File

@ -46,6 +46,7 @@ static struct test_cmd cmds[] = {
{ "mktemp", cmd__mktemp },
{ "name-hash", cmd__name_hash },
{ "online-cpus", cmd__online_cpus },
{ "pack-deltas", cmd__pack_deltas },
{ "pack-mtimes", cmd__pack_mtimes },
{ "parse-options", cmd__parse_options },
{ "parse-options-flags", cmd__parse_options_flags },

View File

@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__name_hash(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__pack_deltas(int argc, const char **argv);
int cmd__pack_mtimes(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
int cmd__parse_options_flags(int argc, const char **argv);

View File

@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' '
test_expect_success 'failover to an object in another pack' '
clear_packs &&
git index-pack --stdin <ab.pack &&
test_must_fail git index-pack --stdin --fix-thin <cycle.pack

# This cycle does not fail since the existence of A & B in
# the repo allows us to resolve the cycle.
git index-pack --stdin --fix-thin <cycle.pack
'

test_expect_success 'failover to a duplicate object in the same pack' '
@ -72,7 +75,34 @@ test_expect_success 'failover to a duplicate object in the same pack' '
pack_obj $A
} >recoverable.pack &&
pack_trailer recoverable.pack &&
test_must_fail git index-pack --fix-thin --stdin <recoverable.pack

# This cycle does not fail since the existence of a full copy
# of A in the pack allows us to resolve the cycle.
git index-pack --fix-thin --stdin <recoverable.pack
'

test_expect_success 'index-pack works with thin pack A->B->C with B on disk' '
git init server &&
(
cd server &&
test_commit_bulk 4
) &&

A=$(git -C server rev-parse HEAD^{tree}) &&
B=$(git -C server rev-parse HEAD~1^{tree}) &&
C=$(git -C server rev-parse HEAD~2^{tree}) &&
git -C server reset --hard HEAD~1 &&

test-tool -C server pack-deltas --num-objects=2 >thin.pack <<-EOF &&
REF_DELTA $A $B
REF_DELTA $B $C
EOF

git clone "file://$(pwd)/server" client &&
(
cd client &&
git index-pack --fix-thin --stdin <../thin.pack
)
'

test_done