diff --git a/Makefile b/Makefile index e9a1e1efd5..f674b9cc57 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41..0fc70d13a8 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" @@ -129,22 +129,20 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; - enum object_type type; - unsigned long sz; + struct odb_read_stream *st; char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43d..97ea8d60d6 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { @@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837..505ddaa12f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index c489582faa..4979bc795e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 699fe678cd..a7e901e49c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/log.c b/builtin/log.c index c8319b8af3..d4cf9c59c8 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7937106ec5..1ce8d6ee21 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); @@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); diff --git a/entry.c b/entry.c index cae02eb503..7817aee362 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/meson.build b/meson.build index 63ca32b863..d765a09525 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 84c9249dab..af1c3f972d 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" @@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,7 +160,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } @@ -234,9 +232,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; int fd = open_loose_object(source->loose, oid, &p); @@ -246,11 +244,29 @@ void *odb_source_loose_map_object(struct odb_source *source, return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +345,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -1980,3 +2003,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbd..1229d5f675 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/odb.c b/odb.c index dc8f292f3d..af13174425 100644 --- a/odb.c +++ b/odb.c @@ -670,8 +670,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -706,8 +704,8 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) @@ -717,8 +715,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -751,25 +749,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, diff --git a/odb/streaming.c b/odb/streaming.c new file mode 100644 index 0000000000..745cd486fb --- /dev/null +++ b/odb/streaming.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2011, Google Inc. + */ + +#include "git-compat-util.h" +#include "convert.h" +#include "environment.h" +#include "repository.h" +#include "object-file.h" +#include "odb.h" +#include "odb/streaming.h" +#include "replace-object.h" +#include "packfile.h" + +#define FILTER_BUFFER (1024*16) + +/***************************************************************** + * + * Filtered stream + * + *****************************************************************/ + +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return odb_read_stream_close(fs->upstream); +} + +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, + size_t sz) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + size_t filled = 0; + + while (sz) { + /* do we already have filtered output? */ + if (fs->o_ptr < fs->o_end) { + size_t to_move = fs->o_end - fs->o_ptr; + if (sz < to_move) + to_move = sz; + memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); + fs->o_ptr += to_move; + sz -= to_move; + filled += to_move; + continue; + } + fs->o_end = fs->o_ptr = 0; + + /* do we have anything to feed the filter with? */ + if (fs->i_ptr < fs->i_end) { + size_t to_feed = fs->i_end - fs->i_ptr; + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + fs->ibuf + fs->i_ptr, &to_feed, + fs->obuf, &to_receive)) + return -1; + fs->i_ptr = fs->i_end - to_feed; + fs->o_end = FILTER_BUFFER - to_receive; + continue; + } + + /* tell the filter to drain upon no more input */ + if (fs->input_finished) { + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + NULL, NULL, + fs->obuf, &to_receive)) + return -1; + fs->o_end = FILTER_BUFFER - to_receive; + if (!fs->o_end) + break; + continue; + } + fs->i_end = fs->i_ptr = 0; + + /* refill the input from the upstream */ + if (!fs->input_finished) { + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end < 0) + return -1; + if (fs->i_end) + continue; + } + fs->input_finished = 1; + } + return filled; +} + +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) +{ + struct odb_filtered_read_stream *fs; + + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; + fs->upstream = st; + fs->filter = filter; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; +} + +/***************************************************************** + * + * In-core stream + * + *****************************************************************/ + +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); + return 0; +} + +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + size_t read_size = sz; + size_t remainder = st->base.size - st->read_ptr; + + if (remainder <= read_size) + read_size = remainder; + if (read_size) { + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; + } + return read_size; +} + +static int open_istream_incore(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, + }; + struct odb_incore_read_stream *st; + int ret; + + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; + ret = odb_read_object_info_extended(odb, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + + return 0; +} + +/***************************************************************************** + * static helpers variables and functions for users of streaming interface + *****************************************************************************/ + +static int istream_source(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) + return 0; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_stream(out, source, oid)) + return 0; + + return open_istream_incore(out, odb, oid); +} + +/**************************************************************** + * Users of streaming interface + ****************************************************************/ + +int odb_read_stream_close(struct odb_read_stream *st) +{ + int r = st->close(st); + free(st); + return r; +} + +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) +{ + return st->read(st, buf, sz); +} + +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter) +{ + struct odb_read_stream *st; + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); + + if (ret) + return NULL; + + if (filter) { + /* Add "&& !is_null_stream_filter(filter)" for performance */ + struct odb_read_stream *nst = attach_stream_filter(st, filter); + if (!nst) { + odb_read_stream_close(st); + return NULL; + } + st = nst; + } + + return st; +} + +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) +{ + struct odb_read_stream *st; + ssize_t kept = 0; + int result = -1; + + st = odb_read_stream_open(odb, oid, filter); + if (!st) { + if (filter) + free_stream_filter(filter); + return result; + } + if (st->type != OBJ_BLOB) + goto close_and_exit; + for (;;) { + char buf[1024 * 16]; + ssize_t wrote, holeto; + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); + + if (readlen < 0) + goto close_and_exit; + if (!readlen) + break; + if (can_seek && sizeof(buf) == readlen) { + for (holeto = 0; holeto < readlen; holeto++) + if (buf[holeto]) + break; + if (readlen == holeto) { + kept += holeto; + continue; + } + } + + if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) + goto close_and_exit; + else + kept = 0; + wrote = write_in_full(fd, buf, readlen); + + if (wrote < 0) + goto close_and_exit; + } + if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || + xwrite(fd, "", 1) != 1)) + goto close_and_exit; + result = 0; + + close_and_exit: + odb_read_stream_close(st); + return result; +} diff --git a/odb/streaming.h b/odb/streaming.h new file mode 100644 index 0000000000..c7861f7e13 --- /dev/null +++ b/odb/streaming.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef STREAMING_H +#define STREAMING_H 1 + +#include "object.h" + +struct object_database; +struct odb_read_stream; +struct stream_filter; + +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + +/* + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); + +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); + +#endif /* STREAMING_H */ diff --git a/packfile.c b/packfile.c index 3d8b994a61..c88bd92619 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -885,22 +886,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -2105,7 +2090,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct packfile_list_entry *l; @@ -2130,6 +2117,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { @@ -2400,3 +2438,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(e->pack); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index 20e1cf17b2..59d162a3f4 100644 --- a/packfile.h +++ b/packfile.h @@ -9,6 +9,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct pack_window *windows; @@ -177,6 +178,21 @@ void packfile_store_add_pack(struct packfile_store *store, for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ ((p) = (e ? e->pack : NULL)); e = e->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -376,7 +392,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6..0bf4bd6d4a 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/streaming.c b/streaming.c deleted file mode 100644 index 00ad649ae3..0000000000 --- a/streaming.c +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "convert.h" -#include "environment.h" -#include "streaming.h" -#include "repository.h" -#include "object-file.h" -#include "odb.h" -#include "replace-object.h" -#include "packfile.h" - -typedef int (*open_istream_fn)(struct git_istream *, - struct repository *, - const struct object_id *, - enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); - -#define FILTER_BUFFER (1024*16) - -struct filtered_istream { - struct git_istream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - -struct git_istream { - open_istream_fn open; - close_istream_fn close; - read_istream_fn read; - - unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - - struct filtered_istream filtered; - } u; -}; - -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct git_istream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - -/***************************************************************** - * - * Filtered stream - * - *****************************************************************/ - -static int close_istream_filtered(struct git_istream *st) -{ - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); -} - -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, - size_t sz) -{ - struct filtered_istream *fs = &(st->u.filtered); - size_t filled = 0; - - while (sz) { - /* do we already have filtered output? */ - if (fs->o_ptr < fs->o_end) { - size_t to_move = fs->o_end - fs->o_ptr; - if (sz < to_move) - to_move = sz; - memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); - fs->o_ptr += to_move; - sz -= to_move; - filled += to_move; - continue; - } - fs->o_end = fs->o_ptr = 0; - - /* do we have anything to feed the filter with? */ - if (fs->i_ptr < fs->i_end) { - size_t to_feed = fs->i_end - fs->i_ptr; - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - fs->ibuf + fs->i_ptr, &to_feed, - fs->obuf, &to_receive)) - return -1; - fs->i_ptr = fs->i_end - to_feed; - fs->o_end = FILTER_BUFFER - to_receive; - continue; - } - - /* tell the filter to drain upon no more input */ - if (fs->input_finished) { - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - NULL, NULL, - fs->obuf, &to_receive)) - return -1; - fs->o_end = FILTER_BUFFER - to_receive; - if (!fs->o_end) - break; - continue; - } - fs->i_end = fs->i_ptr = 0; - - /* refill the input from the upstream */ - if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); - if (fs->i_end < 0) - return -1; - if (fs->i_end) - continue; - } - fs->input_finished = 1; - } - return filled; -} - -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) -{ - struct git_istream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); - - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; - fs->upstream = st; - fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - return ifs; -} - -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_done: - return 0; - case z_error: - return -1; - default: - break; - } - - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct git_istream *st) -{ - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return 0; -} - -static int open_istream_loose(struct git_istream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = type; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) - break; - } - if (!st->u.loose.mapped) - return -1; - - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) - goto error; - - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return -1; -} - - -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, - size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; - break; - case z_done: - return 0; - case z_error: - return -1; - case z_used: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct git_istream *st) -{ - close_deflated_stream(st); - return 0; -} - -static int open_istream_pack_non_delta(struct git_istream *st, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) -{ - struct pack_window *window; - enum object_type in_pack_type; - - window = NULL; - - in_pack_type = unpack_object_header(st->u.in_pack.pack, - &window, - &st->u.in_pack.pos, - &st->size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - - return 0; -} - - -/***************************************************************** - * - * In-core stream - * - *****************************************************************/ - -static int close_istream_incore(struct git_istream *st) -{ - free(st->u.incore.buf); - return 0; -} - -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) -{ - size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; - - if (remainder <= read_size) - read_size = remainder; - if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; - } - return read_size; -} - -static int open_istream_incore(struct git_istream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); -} - -/***************************************************************************** - * static helpers variables and functions for users of streaming interface - *****************************************************************************/ - -static int istream_source(struct git_istream *st, - struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = type; - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; - - switch (oi.whence) { - case OI_LOOSE: - st->open = open_istream_loose; - return 0; - case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; - return 0; - } -} - -/**************************************************************** - * Users of streaming interface - ****************************************************************/ - -int close_istream(struct git_istream *st) -{ - int r = st->close(st); - free(st); - return r; -} - -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) -{ - return st->read(st, buf, sz); -} - -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) -{ - struct git_istream *st = xmalloc(sizeof(*st)); - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); - - if (ret) { - free(st); - return NULL; - } - - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } - if (filter) { - /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); - if (!nst) { - close_istream(st); - return NULL; - } - st = nst; - } - - *size = st->size; - return st; -} - -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) -{ - struct git_istream *st; - enum object_type type; - unsigned long sz; - ssize_t kept = 0; - int result = -1; - - st = open_istream(the_repository, oid, &type, &sz, filter); - if (!st) { - if (filter) - free_stream_filter(filter); - return result; - } - if (type != OBJ_BLOB) - goto close_and_exit; - for (;;) { - char buf[1024 * 16]; - ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); - - if (readlen < 0) - goto close_and_exit; - if (!readlen) - break; - if (can_seek && sizeof(buf) == readlen) { - for (holeto = 0; holeto < readlen; holeto++) - if (buf[holeto]) - break; - if (readlen == holeto) { - kept += holeto; - continue; - } - } - - if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) - goto close_and_exit; - else - kept = 0; - wrote = write_in_full(fd, buf, readlen); - - if (wrote < 0) - goto close_and_exit; - } - if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || - xwrite(fd, "", 1) != 1)) - goto close_and_exit; - result = 0; - - close_and_exit: - close_istream(st); - return result; -} diff --git a/streaming.h b/streaming.h deleted file mode 100644 index bd27f59e57..0000000000 --- a/streaming.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef STREAMING_H -#define STREAMING_H 1 - -#include "object.h" - -/* opaque */ -struct git_istream; -struct stream_filter; - -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); - -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); - -#endif /* STREAMING_H */