diff --git a/object-file.c b/object-file.c index a59030911f..6d7afdb723 100644 --- a/object-file.c +++ b/object-file.c @@ -1448,29 +1448,17 @@ static int hash_blob_stream(struct odb_write_stream *stream, /* * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. + * packfile in state while updating the hash in ctx. */ -static int stream_blob_to_pack(struct transaction_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path) +static void stream_blob_to_pack(struct transaction_packfile *state, + struct git_hash_ctx *ctx, int fd, size_t size, + const char *path) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; - off_t offset = 0; git_deflate_init(&s, pack_compression_level); @@ -1487,15 +1475,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if ((size_t)read_result != rsize) die("failed to read %u bytes from '%s'", (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } + + git_hash_update(ctx, ibuf, rsize); + s.next_in = ibuf; s.avail_in = rsize; size -= rsize; @@ -1506,14 +1488,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if (!s.avail_out || status == Z_STREAM_END) { size_t written = s.next_out - obuf; - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - hashwrite(state->f, obuf, written); state->offset += written; s.next_out = obuf; @@ -1530,7 +1504,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, } } git_deflate_end(&s); - return 0; } static void flush_packfile_transaction(struct odb_transaction_files *transaction) @@ -1606,48 +1579,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac size_t size, const char *path) { struct transaction_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx; - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t)-1) - return error("cannot find the current offset"); - header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); transaction->base.source->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); + /* + * If writing another object to the packfile could result in it + * exceeding the configured size limit, flush the current packfile + * transaction. + * + * Note that this uses the inflated object size as an approximation. + * Blob objects written in this manner are not delta-compressed, so + * the difference between the inflated and on-disk size is limited + * to zlib compression and is sufficient for this check. + */ + if (state->nr_written && pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + size) + flush_packfile_transaction(transaction); + CALLOC_ARRAY(idx, 1); prepare_packfile_transaction(transaction); hashfile_checkpoint_init(state->f, &checkpoint); - already_hashed_to = 0; - - while (1) { - prepare_packfile_transaction(transaction); - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_packfile_transaction(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) - return error("cannot seek back"); - } + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + stream_blob_to_pack(state, &ctx, fd, size, path); git_hash_final_oid(result_oid, &ctx); idx->crc32 = crc32_end(state->f);