From 6ed7f25e95069a900b10f838b15639af3fac05d3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Aug 2008 16:07:58 -0400 Subject: [PATCH 1/5] pack-objects: improve returned information from write_one() This function returns 0 when the current object couldn't be written due to the pack size limit, otherwise the current offset in the pack. There is a problem with this approach however, since current object could be a delta and its delta base might just have been written in the same write_one() call, but those successfully written objects are not accounted in the offset variable tracked by the caller. Currently this is not an issue but a subsequent patch will need this. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index d394c494a5..640e05a12e 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -410,25 +410,22 @@ static unsigned long write_object(struct sha1file *f, return hdrlen + datalen; } -static off_t write_one(struct sha1file *f, +static int write_one(struct sha1file *f, struct object_entry *e, - off_t offset) + off_t *offset) { unsigned long size; /* offset is non zero if object is written already. */ if (e->idx.offset || e->preferred_base) - return offset; + return 1; /* if we are deltified, write out base object first. */ - if (e->delta) { - offset = write_one(f, e->delta, offset); - if (!offset) - return 0; - } + if (e->delta && !write_one(f, e->delta, offset)) + return 0; - e->idx.offset = offset; - size = write_object(f, e, offset); + e->idx.offset = *offset; + size = write_object(f, e, *offset); if (!size) { e->idx.offset = 0; return 0; @@ -436,9 +433,10 @@ static off_t write_one(struct sha1file *f, written_list[nr_written++] = &e->idx; /* make sure off_t is sufficiently large not to wrap */ - if (offset > offset + size) + if (*offset > *offset + size) die("pack too large for current definition of off_t"); - return offset + size; + *offset += size; + return 1; } /* forward declaration for write_pack_file */ @@ -448,7 +446,7 @@ static void write_pack_file(void) { uint32_t i = 0, j; struct sha1file *f; - off_t offset, offset_one, last_obj_offset = 0; + off_t offset; struct pack_header hdr; uint32_t nr_remaining = nr_result; time_t last_mtime = 0; @@ -480,11 +478,8 @@ static void write_pack_file(void) offset = sizeof(hdr); nr_written = 0; for (; i < nr_objects; i++) { - last_obj_offset = offset; - offset_one = write_one(f, objects + i, offset); - if (!offset_one) + if (!write_one(f, objects + i, &offset)) break; - offset = offset_one; display_progress(progress_state, written); } From abeb40e5aa5b4a39799ae1caad241c8c7708053a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Aug 2008 16:07:59 -0400 Subject: [PATCH 2/5] improve reliability of fixup_pack_header_footer() Currently, this function has the potential to read corrupted pack data from disk and give it a valid SHA1 checksum. Let's add the ability to validate SHA1 checksum of existing data along the way, including before and after any arbitrary point in the pack. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 3 +- fast-import.c | 3 +- index-pack.c | 3 +- pack-write.c | 71 +++++++++++++++++++++++++++++++++++------- pack.h | 2 +- 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 640e05a12e..501f9367cb 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -493,7 +493,8 @@ static void write_pack_file(void) sha1close(f, sha1, CSUM_FSYNC); } else { int fd = sha1close(f, NULL, 0); - fixup_pack_header_footer(fd, sha1, pack_tmp_name, nr_written); + fixup_pack_header_footer(fd, sha1, pack_tmp_name, + nr_written, NULL, 0); close(fd); } diff --git a/fast-import.c b/fast-import.c index 7089e6f9e6..d85b3a561f 100644 --- a/fast-import.c +++ b/fast-import.c @@ -951,7 +951,8 @@ static void end_packfile(void) close_pack_windows(pack_data); fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, - pack_data->pack_name, object_count); + pack_data->pack_name, object_count, + NULL, 0); close(pack_data->pack_fd); idx_name = keep_pack(create_index()); diff --git a/index-pack.c b/index-pack.c index 728af7da9c..411b80d815 100644 --- a/index-pack.c +++ b/index-pack.c @@ -982,7 +982,8 @@ int main(int argc, char **argv) nr_objects - nr_objects_initial); stop_progress_msg(&progress, msg); fixup_pack_header_footer(output_fd, sha1, - curr_pack, nr_objects); + curr_pack, nr_objects, + NULL, 0); } if (nr_deltas != nr_resolved_deltas) die("pack has %d unresolved deltas", diff --git a/pack-write.c b/pack-write.c index ddcfd37af2..f9776c51fa 100644 --- a/pack-write.c +++ b/pack-write.c @@ -144,41 +144,88 @@ char *write_idx_file(char *index_name, struct pack_idx_entry **objects, return index_name; } +/* + * Update pack header with object_count and compute new SHA1 for pack data + * associated to pack_fd, and write that SHA1 at the end. That new SHA1 + * is also returned in new_pack_sha1. + * + * If partial_pack_sha1 is non null, then the SHA1 of the existing pack + * (without the header update) is computed and validated against the + * one provided in partial_pack_sha1. The validation is performed at + * partial_pack_offset bytes in the pack file. The SHA1 of the remaining + * data (i.e. from partial_pack_offset to the end) is then computed and + * returned in partial_pack_sha1. + * + * Note that new_pack_sha1 is updated last, so both new_pack_sha1 and + * partial_pack_sha1 can refer to the same buffer if the caller is not + * interested in the resulting SHA1 of pack data above partial_pack_offset. + */ void fixup_pack_header_footer(int pack_fd, - unsigned char *pack_file_sha1, + unsigned char *new_pack_sha1, const char *pack_name, - uint32_t object_count) + uint32_t object_count, + unsigned char *partial_pack_sha1, + off_t partial_pack_offset) { static const int buf_sz = 128 * 1024; - SHA_CTX c; + SHA_CTX old_sha1_ctx, new_sha1_ctx; struct pack_header hdr; char *buf; + SHA1_Init(&old_sha1_ctx); + SHA1_Init(&new_sha1_ctx); + if (lseek(pack_fd, 0, SEEK_SET) != 0) - die("Failed seeking to start: %s", strerror(errno)); + die("Failed seeking to start of %s: %s", pack_name, strerror(errno)); if (read_in_full(pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) die("Unable to reread header of %s: %s", pack_name, strerror(errno)); if (lseek(pack_fd, 0, SEEK_SET) != 0) - die("Failed seeking to start: %s", strerror(errno)); + die("Failed seeking to start of %s: %s", pack_name, strerror(errno)); + SHA1_Update(&old_sha1_ctx, &hdr, sizeof(hdr)); hdr.hdr_entries = htonl(object_count); + SHA1_Update(&new_sha1_ctx, &hdr, sizeof(hdr)); write_or_die(pack_fd, &hdr, sizeof(hdr)); - - SHA1_Init(&c); - SHA1_Update(&c, &hdr, sizeof(hdr)); + partial_pack_offset -= sizeof(hdr); buf = xmalloc(buf_sz); for (;;) { - ssize_t n = xread(pack_fd, buf, buf_sz); + ssize_t m, n; + m = (partial_pack_sha1 && partial_pack_offset < buf_sz) ? + partial_pack_offset : buf_sz; + n = xread(pack_fd, buf, m); if (!n) break; if (n < 0) die("Failed to checksum %s: %s", pack_name, strerror(errno)); - SHA1_Update(&c, buf, n); + SHA1_Update(&new_sha1_ctx, buf, n); + + if (!partial_pack_sha1) + continue; + + SHA1_Update(&old_sha1_ctx, buf, n); + partial_pack_offset -= n; + if (partial_pack_offset == 0) { + unsigned char sha1[20]; + SHA1_Final(sha1, &old_sha1_ctx); + if (hashcmp(sha1, partial_pack_sha1) != 0) + die("Unexpected checksum for %s " + "(disk corruption?)", pack_name); + /* + * Now let's compute the SHA1 of the remainder of the + * pack, which also means making partial_pack_offset + * big enough not to matter anymore. + */ + SHA1_Init(&old_sha1_ctx); + partial_pack_offset = ~partial_pack_offset; + partial_pack_offset -= MSB(partial_pack_offset, 1); + } } free(buf); - SHA1_Final(pack_file_sha1, &c); - write_or_die(pack_fd, pack_file_sha1, 20); + if (partial_pack_sha1) + SHA1_Final(partial_pack_sha1, &old_sha1_ctx); + SHA1_Final(new_pack_sha1, &new_sha1_ctx); + write_or_die(pack_fd, new_pack_sha1, 20); fsync_or_die(pack_fd, pack_name); } diff --git a/pack.h b/pack.h index 76e6aa2aad..a883334b26 100644 --- a/pack.h +++ b/pack.h @@ -58,7 +58,7 @@ struct pack_idx_entry { extern char *write_idx_file(char *index_name, struct pack_idx_entry **objects, int nr_objects, unsigned char *sha1); extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); extern int verify_pack(struct packed_git *); -extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t); +extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t); extern char *index_pack_lockfile(int fd); #define PH_ERROR_EOF (-1) From ac0463ed44e859c84e354cd0ae47d9b5b124e112 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Aug 2008 16:08:00 -0400 Subject: [PATCH 3/5] pack-objects: use fixup_pack_header_footer()'s validation mode When limiting the pack size, a new header has to be written to the pack and a new SHA1 computed. Make sure that the SHA1 of what is being read back matches the SHA1 of what was written. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 4 ++-- csum-file.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 501f9367cb..a02c673fb7 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -492,9 +492,9 @@ static void write_pack_file(void) } else if (nr_written == nr_remaining) { sha1close(f, sha1, CSUM_FSYNC); } else { - int fd = sha1close(f, NULL, 0); + int fd = sha1close(f, sha1, 0); fixup_pack_header_footer(fd, sha1, pack_tmp_name, - nr_written, NULL, 0); + nr_written, sha1, offset); close(fd); } diff --git a/csum-file.c b/csum-file.c index ace64f165e..28389541a3 100644 --- a/csum-file.c +++ b/csum-file.c @@ -42,11 +42,11 @@ int sha1close(struct sha1file *f, unsigned char *result, unsigned int flags) sha1flush(f, offset); f->offset = 0; } + SHA1_Final(f->buffer, &f->ctx); + if (result) + hashcpy(result, f->buffer); if (flags & (CSUM_CLOSE | CSUM_FSYNC)) { /* write checksum and close fd */ - SHA1_Final(f->buffer, &f->ctx); - if (result) - hashcpy(result, f->buffer); sha1flush(f, 20); if (flags & CSUM_FSYNC) fsync_or_die(f->fd, f->name); From 8522148f79c29ef8f63688e7186e458a22333224 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Aug 2008 16:08:01 -0400 Subject: [PATCH 4/5] index-pack: use fixup_pack_header_footer()'s validation mode When completing a thin pack, a new header has to be written to the pack and a new SHA1 computed. Make sure that the SHA1 of what is being read back matches the SHA1 of what was written for both: the original pack and the appended objects. To do so, a couple write_or_die() calls were converted to sha1write() which has the advantage of doing some buffering as well as handling SHA1 and CRC32 checksum already. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/index-pack.c b/index-pack.c index 411b80d815..a6e91fe3ba 100644 --- a/index-pack.c +++ b/index-pack.c @@ -654,7 +654,7 @@ static void parse_pack_objects(unsigned char *sha1) } } -static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc) +static int write_compressed(struct sha1file *f, void *in, unsigned int size) { z_stream stream; unsigned long maxsize; @@ -674,13 +674,12 @@ static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_c deflateEnd(&stream); size = stream.total_out; - write_or_die(fd, out, size); - *obj_crc = crc32(*obj_crc, out, size); + sha1write(f, out, size); free(out); return size; } -static struct object_entry *append_obj_to_pack( +static struct object_entry *append_obj_to_pack(struct sha1file *f, const unsigned char *sha1, void *buf, unsigned long size, enum object_type type) { @@ -696,15 +695,15 @@ static struct object_entry *append_obj_to_pack( s >>= 7; } header[n++] = c; - write_or_die(output_fd, header, n); - obj[0].idx.crc32 = crc32(0, Z_NULL, 0); - obj[0].idx.crc32 = crc32(obj[0].idx.crc32, header, n); + crc32_begin(f); + sha1write(f, header, n); obj[0].size = size; obj[0].hdr_size = n; obj[0].type = type; obj[0].real_type = type; obj[1].idx.offset = obj[0].idx.offset + n; - obj[1].idx.offset += write_compressed(output_fd, buf, size, &obj[0].idx.crc32); + obj[1].idx.offset += write_compressed(f, buf, size); + obj[0].idx.crc32 = crc32_end(f); hashcpy(obj->idx.sha1, sha1); return obj; } @@ -716,7 +715,7 @@ static int delta_pos_compare(const void *_a, const void *_b) return a->obj_no - b->obj_no; } -static void fix_unresolved_deltas(int nr_unresolved) +static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) { struct delta_entry **sorted_by_pos; int i, n = 0; @@ -754,8 +753,8 @@ static void fix_unresolved_deltas(int nr_unresolved) if (check_sha1_signature(d->base.sha1, base_obj.data, base_obj.size, typename(type))) die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); - base_obj.obj = append_obj_to_pack(d->base.sha1, base_obj.data, - base_obj.size, type); + base_obj.obj = append_obj_to_pack(f, d->base.sha1, + base_obj.data, base_obj.size, type); link_base_data(NULL, &base_obj); find_delta_children(&d->base, &first, &last); @@ -875,7 +874,7 @@ int main(int argc, char **argv) const char *keep_name = NULL, *keep_msg = NULL; char *index_name_buf = NULL, *keep_name_buf = NULL; struct pack_idx_entry **idx_objects; - unsigned char sha1[20]; + unsigned char pack_sha1[20]; int nongit = 0; setup_git_directory_gently(&nongit); @@ -962,13 +961,15 @@ int main(int argc, char **argv) parse_pack_header(); objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); - parse_pack_objects(sha1); + parse_pack_objects(pack_sha1); if (nr_deltas == nr_resolved_deltas) { stop_progress(&progress); /* Flush remaining pack final 20-byte SHA1. */ flush(); } else { if (fix_thin_pack) { + struct sha1file *f; + unsigned char read_sha1[20], tail_sha1[20]; char msg[48]; int nr_unresolved = nr_deltas - nr_resolved_deltas; int nr_objects_initial = nr_objects; @@ -977,13 +978,19 @@ int main(int argc, char **argv) objects = xrealloc(objects, (nr_objects + nr_unresolved + 1) * sizeof(*objects)); - fix_unresolved_deltas(nr_unresolved); + f = sha1fd(output_fd, curr_pack); + fix_unresolved_deltas(f, nr_unresolved); sprintf(msg, "completed with %d local objects", nr_objects - nr_objects_initial); stop_progress_msg(&progress, msg); - fixup_pack_header_footer(output_fd, sha1, + sha1close(f, tail_sha1, 0); + hashcpy(read_sha1, pack_sha1); + fixup_pack_header_footer(output_fd, pack_sha1, curr_pack, nr_objects, - NULL, 0); + read_sha1, consumed_bytes-20); + if (hashcmp(read_sha1, tail_sha1) != 0) + die("Unexpected tail checksum for %s " + "(disk corruption?)", curr_pack); } if (nr_deltas != nr_resolved_deltas) die("pack has %d unresolved deltas", @@ -996,13 +1003,13 @@ int main(int argc, char **argv) idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; - curr_index = write_idx_file(index_name, idx_objects, nr_objects, sha1); + curr_index = write_idx_file(index_name, idx_objects, nr_objects, pack_sha1); free(idx_objects); final(pack_name, curr_pack, index_name, curr_index, keep_name, keep_msg, - sha1); + pack_sha1); free(objects); free(index_name_buf); free(keep_name_buf); From d35825da6d5570524234e1bfe4ff0f57957b6db3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Aug 2008 16:08:02 -0400 Subject: [PATCH 5/5] fixup_pack_header_footer(): use nicely aligned buffer sizes It should be more efficient to use nicely aligned buffer sizes, either for filesystem operations or SHA1 checksums. Also, using a relatively small nominal size might allow for the data to remain in L1 cache between both SHA1_Update() calls. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- pack-write.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pack-write.c b/pack-write.c index f9776c51fa..939ed56362 100644 --- a/pack-write.c +++ b/pack-write.c @@ -167,7 +167,7 @@ void fixup_pack_header_footer(int pack_fd, unsigned char *partial_pack_sha1, off_t partial_pack_offset) { - static const int buf_sz = 128 * 1024; + int aligned_sz, buf_sz = 8 * 1024; SHA_CTX old_sha1_ctx, new_sha1_ctx; struct pack_header hdr; char *buf; @@ -188,10 +188,11 @@ void fixup_pack_header_footer(int pack_fd, partial_pack_offset -= sizeof(hdr); buf = xmalloc(buf_sz); + aligned_sz = buf_sz - sizeof(hdr); for (;;) { ssize_t m, n; - m = (partial_pack_sha1 && partial_pack_offset < buf_sz) ? - partial_pack_offset : buf_sz; + m = (partial_pack_sha1 && partial_pack_offset < aligned_sz) ? + partial_pack_offset : aligned_sz; n = xread(pack_fd, buf, m); if (!n) break; @@ -199,6 +200,10 @@ void fixup_pack_header_footer(int pack_fd, die("Failed to checksum %s: %s", pack_name, strerror(errno)); SHA1_Update(&new_sha1_ctx, buf, n); + aligned_sz -= n; + if (!aligned_sz) + aligned_sz = buf_sz; + if (!partial_pack_sha1) continue;