Merge branch 'ps/object-read-stream' into jch

The "git_istream" abstraction has been revamped to make it easier
to interface with pluggable object database design.

* ps/object-read-stream:
  streaming: drop redundant type and size pointers
  streaming: move into object database subsystem
  streaming: refactor interface to be object-database-centric
  streaming: move logic to read packed objects streams into backend
  streaming: move logic to read loose objects streams into backend
  streaming: make the `odb_read_stream` definition public
  streaming: get rid of `the_repository`
  streaming: rely on object sources to create object stream
  packfile: introduce function to read object info from a store
  streaming: move zlib stream into backends
  streaming: create structure for filtered object streams
  streaming: create structure for packed object streams
  streaming: create structure for loose object streams
  streaming: create structure for in-core object streams
  streaming: allocate stream inside the backend-specific logic
  streaming: explicitly pass packfile info when streaming a packed object
  streaming: propagate final object type via the stream
  streaming: drop the `open()` callback function
  streaming: rename `git_istream` into `odb_read_stream`
seen
Junio C Hamano 2025-11-30 18:32:10 -08:00
commit b8ae9bb7f5
20 changed files with 779 additions and 729 deletions

View File

@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o
LIB_OBJS += object-name.o
LIB_OBJS += object.o
LIB_OBJS += odb.o
LIB_OBJS += odb/streaming.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o
LIB_OBJS += stable-qsort.o
LIB_OBJS += statinfo.o
LIB_OBJS += strbuf.o
LIB_OBJS += streaming.o
LIB_OBJS += string-list.o
LIB_OBJS += strmap.o
LIB_OBJS += strvec.o

View File

@ -12,8 +12,8 @@
#include "tar.h"
#include "archive.h"
#include "odb.h"
#include "odb/streaming.h"
#include "strbuf.h"
#include "streaming.h"
#include "run-command.h"
#include "write-or-die.h"

@ -129,22 +129,20 @@ static void write_trailer(void)
*/
static int stream_blocked(struct repository *r, const struct object_id *oid)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
struct odb_read_stream *st;
char buf[BLOCKSIZE];
ssize_t readlen;

st = open_istream(r, oid, &type, &sz, NULL);
st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return error(_("cannot stream blob %s"), oid_to_hex(oid));
for (;;) {
readlen = read_istream(st, buf, sizeof(buf));
readlen = odb_read_stream_read(st, buf, sizeof(buf));
if (readlen <= 0)
break;
do_write_blocked(buf, readlen);
}
close_istream(st);
odb_read_stream_close(st);
if (!readlen)
finish_record();
return readlen;

View File

@ -10,9 +10,9 @@
#include "gettext.h"
#include "git-zlib.h"
#include "hex.h"
#include "streaming.h"
#include "utf8.h"
#include "odb.h"
#include "odb/streaming.h"
#include "strbuf.h"
#include "userdiff.h"
#include "write-or-die.h"
@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args,
enum zip_method method;
unsigned char *out;
void *deflated = NULL;
struct git_istream *stream = NULL;
struct odb_read_stream *stream = NULL;
unsigned long flags = 0;
int is_binary = -1;
const char *path_without_prefix = path + args->baselen;
@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args,
method = ZIP_METHOD_DEFLATE;

if (!buffer) {
enum object_type type;
stream = open_istream(args->repo, oid, &type, &size,
NULL);
stream = odb_read_stream_open(args->repo->objects, oid, NULL);
if (!stream)
return error(_("cannot stream blob %s"),
oid_to_hex(oid));
size = stream->size;
flags |= ZIP_STREAM;
out = NULL;
} else {
@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args,
ssize_t readlen;

for (;;) {
readlen = read_istream(stream, buf, sizeof(buf));
readlen = odb_read_stream_read(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args,
buf, readlen);
write_or_die(1, buf, readlen);
}
close_istream(stream);
odb_read_stream_close(stream);
if (readlen)
return readlen;

@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args,
zstream.avail_out = sizeof(compressed);

for (;;) {
readlen = read_istream(stream, buf, sizeof(buf));
readlen = odb_read_stream_read(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args,
}

}
close_istream(stream);
odb_read_stream_close(stream);
if (readlen)
return readlen;


View File

@ -18,13 +18,13 @@
#include "list-objects-filter-options.h"
#include "parse-options.h"
#include "userdiff.h"
#include "streaming.h"
#include "oid-array.h"
#include "packfile.h"
#include "pack-bitmap.h"
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
#include "odb/streaming.h"
#include "replace-object.h"
#include "promisor-remote.h"
#include "mailmap.h"
@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode,

static int stream_blob(const struct object_id *oid)
{
if (stream_blob_to_fd(1, oid, NULL, 0))
if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0))
die("unable to stream %s to stdout", oid_to_hex(oid));
return 0;
}

View File

@ -13,11 +13,11 @@
#include "fsck.h"
#include "parse-options.h"
#include "progress.h"
#include "streaming.h"
#include "packfile.h"
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
#include "odb/streaming.h"
#include "path.h"
#include "read-cache-ll.h"
#include "replace-object.h"
@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj)
}
f = xfopen(filename, "w");
if (obj->type == OBJ_BLOB) {
if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1))
if (odb_stream_blob_to_fd(the_repository->objects, fileno(f),
&obj->oid, NULL, 1))
die_errno(_("could not write '%s'"), filename);
} else
fprintf(f, "%s\n", describe_object(&obj->oid));

View File

@ -16,12 +16,12 @@
#include "progress.h"
#include "fsck.h"
#include "strbuf.h"
#include "streaming.h"
#include "thread-utils.h"
#include "packfile.h"
#include "pack-revindex.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
#include "oid-array.h"
#include "oidset.h"
#include "path.h"
@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid,

struct compare_data {
struct object_entry *entry;
struct git_istream *st;
struct odb_read_stream *st;
unsigned char *buf;
unsigned long buf_size;
};
@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
}

while (size) {
ssize_t len = read_istream(data->st, data->buf, size);
ssize_t len = odb_read_stream_read(data->st, data->buf, size);
if (len == 0)
die(_("SHA1 COLLISION FOUND WITH %s !"),
oid_to_hex(&data->entry->idx.oid));
@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
static int check_collison(struct object_entry *entry)
{
struct compare_data data;
enum object_type type;
unsigned long size;

if (entry->size <= repo_settings_get_big_file_threshold(the_repository) ||
entry->type != OBJ_BLOB)
@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry)

memset(&data, 0, sizeof(data));
data.entry = entry;
data.st = open_istream(the_repository, &entry->idx.oid, &type, &size,
NULL);
data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL);
if (!data.st)
return -1;
if (size != entry->size || type != entry->type)
if (data.st->size != entry->size || data.st->type != entry->type)
die(_("SHA1 COLLISION FOUND WITH %s !"),
oid_to_hex(&entry->idx.oid));
unpack_data(entry, compare_objects, &data);
close_istream(data.st);
odb_read_stream_close(data.st);
free(data.buf);
return 0;
}

View File

@ -16,6 +16,7 @@
#include "refs.h"
#include "object-name.h"
#include "odb.h"
#include "odb/streaming.h"
#include "pager.h"
#include "color.h"
#include "commit.h"
@ -35,7 +36,6 @@
#include "parse-options.h"
#include "line-log.h"
#include "branch.h"
#include "streaming.h"
#include "version.h"
#include "mailmap.h"
#include "progress.h"
@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
fflush(rev->diffopt.file);
if (!rev->diffopt.flags.textconv_set_via_cmdline ||
!rev->diffopt.flags.allow_textconv)
return stream_blob_to_fd(1, oid, NULL, 0);
return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);

if (get_oid_with_context(the_repository, obj_name,
GET_OID_RECORD_PATH,
@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
!textconv_object(the_repository, obj_context.path,
obj_context.mode, &oidc, 1, &buf, &size)) {
object_context_release(&obj_context);
return stream_blob_to_fd(1, oid, NULL, 0);
return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
}

if (!buf)

View File

@ -22,7 +22,6 @@
#include "pack-objects.h"
#include "progress.h"
#include "refs.h"
#include "streaming.h"
#include "thread-utils.h"
#include "pack-bitmap.h"
#include "delta-islands.h"
@ -33,6 +32,7 @@
#include "packfile.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
#include "replace-object.h"
#include "dir.h"
#include "midx.h"
@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size)
return stream.total_out;
}

static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f,
const struct object_id *oid)
{
git_zstream stream;
@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi
for (;;) {
ssize_t readlen;
int zret = Z_OK;
readlen = read_istream(st, ibuf, sizeof(ibuf));
readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf));
if (readlen == -1)
die(_("unable to read %s"), oid_to_hex(oid));

@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
unsigned hdrlen;
enum object_type type;
void *buf;
struct git_istream *st = NULL;
struct odb_read_stream *st = NULL;
const unsigned hashsz = the_hash_algo->rawsz;

if (!usable_delta) {
if (oe_type(entry) == OBJ_BLOB &&
oe_size_greater_than(&to_pack, entry,
repo_settings_get_big_file_threshold(the_repository)) &&
(st = open_istream(the_repository, &entry->idx.oid, &type,
&size, NULL)) != NULL)
(st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
NULL)) != NULL) {
buf = NULL;
else {
type = st->type;
size = st->size;
} else {
buf = odb_read_object(the_repository->objects,
&entry->idx.oid, &type,
&size);
@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
dheader[--pos] = 128 | (--ofs & 127);
if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
if (st)
close_istream(st);
odb_read_stream_close(st);
free(buf);
return 0;
}
@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
*/
if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
if (st)
close_istream(st);
odb_read_stream_close(st);
free(buf);
return 0;
}
@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
} else {
if (limit && hdrlen + datalen + hashsz >= limit) {
if (st)
close_istream(st);
odb_read_stream_close(st);
free(buf);
return 0;
}
@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
}
if (st) {
datalen = write_large_blob_data(st, f, &entry->idx.oid);
close_istream(st);
odb_read_stream_close(st);
} else {
hashwrite(f, buf, datalen);
free(buf);

View File

@ -2,13 +2,13 @@

#include "git-compat-util.h"
#include "odb.h"
#include "odb/streaming.h"
#include "dir.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "name-hash.h"
#include "sparse-index.h"
#include "streaming.h"
#include "submodule.h"
#include "symlinks.h"
#include "progress.h"
@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path,
if (fd < 0)
return -1;

result |= stream_blob_to_fd(fd, &ce->oid, filter, 1);
result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1);
*fstat_done = fstat_checkout_output(fd, state, statbuf);
result |= close(fd);


View File

@ -397,6 +397,7 @@ libgit_sources = [
'object-name.c',
'object.c',
'odb.c',
'odb/streaming.c',
'oid-array.c',
'oidmap.c',
'oidset.c',
@ -490,7 +491,6 @@ libgit_sources = [
'stable-qsort.c',
'statinfo.c',
'strbuf.c',
'streaming.c',
'string-list.c',
'strmap.c',
'strvec.c',

View File

@ -20,13 +20,13 @@
#include "object-file-convert.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
#include "path.h"
#include "read-cache-ll.h"
#include "setup.h"
#include "streaming.h"
#include "tempfile.h"
#include "tmp-objdir.h"

@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
int stream_object_signature(struct repository *r, const struct object_id *oid)
{
struct object_id real_oid;
unsigned long size;
enum object_type obj_type;
struct git_istream *st;
struct odb_read_stream *st;
struct git_hash_ctx c;
char hdr[MAX_HEADER_LEN];
int hdrlen;

st = open_istream(r, oid, &obj_type, &size, NULL);
st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return -1;

/* Generate the header */
hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);

/* Sha1.. */
r->hash_algo->init_fn(&c);
git_hash_update(&c, hdr, hdrlen);
for (;;) {
char buf[1024 * 16];
ssize_t readlen = read_istream(st, buf, sizeof(buf));
ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));

if (readlen < 0) {
close_istream(st);
odb_read_stream_close(st);
return -1;
}
if (!readlen)
@ -162,7 +160,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
git_hash_update(&c, buf, readlen);
}
git_hash_final_oid(&real_oid, &c);
close_istream(st);
odb_read_stream_close(st);
return !oideq(oid, &real_oid) ? -1 : 0;
}

@ -234,9 +232,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
return map;
}

void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size)
static void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size)
{
const char *p;
int fd = open_loose_object(source->loose, oid, &p);
@ -246,11 +244,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
return map_fd(fd, p, size);
}

enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz)
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
ULHR_TOO_LONG,
};

/**
* unpack_loose_header() initializes the data stream needed to unpack
* a loose object header.
*
* Returns:
*
* - ULHR_OK on success
* - ULHR_BAD on error
* - ULHR_TOO_LONG if the header was too long
*
* It will only parse up to MAX_HEADER_LEN bytes.
*/
static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz)
{
int status;

@ -329,11 +345,18 @@ static void *unpack_loose_rest(git_zstream *stream,
}

/*
* parse_loose_header() parses the starting "<type> <len>\0" of an
* object. If it doesn't follow that format -1 is returned. To check
* the validity of the <type> populate the "typep" in the "struct
* object_info". It will be OBJ_BAD if the object type is unknown. The
* parsed <len> can be retrieved via "oi->sizep", and from there
* passed to unpack_loose_rest().
*
* We used to just use "sscanf()", but that's actually way
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
int parse_loose_header(const char *hdr, struct object_info *oi)
static int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
size_t size;
@ -1980,3 +2003,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
loose_object_map_clear(&loose->map);
free(loose);
}

struct odb_loose_read_stream {
struct odb_read_stream base;
git_zstream z;
enum {
ODB_LOOSE_READ_STREAM_INUSE,
ODB_LOOSE_READ_STREAM_DONE,
ODB_LOOSE_READ_STREAM_ERROR,
} z_state;
void *mapped;
unsigned long mapsize;
char hdr[32];
int hdr_avail;
int hdr_used;
};

static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
size_t total_read = 0;

switch (st->z_state) {
case ODB_LOOSE_READ_STREAM_DONE:
return 0;
case ODB_LOOSE_READ_STREAM_ERROR:
return -1;
default:
break;
}

if (st->hdr_used < st->hdr_avail) {
size_t to_copy = st->hdr_avail - st->hdr_used;
if (sz < to_copy)
to_copy = sz;
memcpy(buf, st->hdr + st->hdr_used, to_copy);
st->hdr_used += to_copy;
total_read += to_copy;
}

while (total_read < sz) {
int status;

st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
status = git_inflate(&st->z, Z_FINISH);

total_read = st->z.next_out - (unsigned char *)buf;

if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_DONE;
break;
}
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
return -1;
}
}
return total_read;
}

static int close_istream_loose(struct odb_read_stream *_st)
{
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
return 0;
}

int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct object_info oi = OBJECT_INFO_INIT;
struct odb_loose_read_stream *st;
unsigned long mapsize;
void *mapped;

mapped = odb_source_loose_map_object(source, oid, &mapsize);
if (!mapped)
return -1;

/*
* Note: we must allocate this structure early even though we may still
* fail. This is because we need to initialize the zlib stream, and it
* is not possible to copy the stream around after the fact because it
* has self-referencing pointers.
*/
CALLOC_ARRAY(st, 1);

switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
sizeof(st->hdr))) {
case ULHR_OK:
break;
case ULHR_BAD:
case ULHR_TOO_LONG:
goto error;
}

oi.sizep = &st->base.size;
oi.typep = &st->base.type;

if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
goto error;

st->mapped = mapped;
st->mapsize = mapsize;
st->hdr_used = strlen(st->hdr) + 1;
st->hdr_avail = st->z.total_out;
st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
st->base.close = close_istream_loose;
st->base.read = read_istream_loose;

*out = &st->base;

return 0;
error:
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
free(st);
return -1;
}

View File

@ -16,6 +16,8 @@ enum {
int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);

struct object_info;
struct odb_read_stream;
struct odb_source;

struct odb_source_loose {
@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi, int flags);

void *odb_source_loose_map_object(struct odb_source *source,
const struct object_id *oid,
unsigned long *size);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid);

/*
* Return true iff an object database source has a loose object
@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
int format_object_header(char *str, size_t size, enum object_type type,
size_t objsize);

/**
* unpack_loose_header() initializes the data stream needed to unpack
* a loose object header.
*
* Returns:
*
* - ULHR_OK on success
* - ULHR_BAD on error
* - ULHR_TOO_LONG if the header was too long
*
* It will only parse up to MAX_HEADER_LEN bytes.
*/
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
ULHR_TOO_LONG,
};
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz);

/**
* parse_loose_header() parses the starting "<type> <len>\0" of an
* object. If it doesn't follow that format -1 is returned. To check
* the validity of the <type> populate the "typep" in the "struct
* object_info". It will be OBJ_BAD if the object type is unknown. The
* parsed <len> can be retrieved via "oi->sizep", and from there
* passed to unpack_loose_rest().
*/
struct object_info;
int parse_loose_header(const char *hdr, struct object_info *oi);

int force_object_loose(struct odb_source *source,
const struct object_id *oid, time_t mtime);


29
odb.c
View File

@ -670,8 +670,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
{
static struct object_info blank_oi = OBJECT_INFO_INIT;
const struct cached_object *co;
struct pack_entry e;
int rtype;
const struct object_id *real = oid;
int already_retried = 0;

@ -706,8 +704,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
while (1) {
struct odb_source *source;

if (find_pack_entry(odb->repo, real, &e))
break;
if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
return 0;

/* Most likely it's a loose object. */
for (source = odb->sources; source; source = source->next)
@ -717,8 +715,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
/* Not a loose object; someone else may have just packed it. */
if (!(flags & OBJECT_INFO_QUICK)) {
odb_reprepare(odb->repo->objects);
if (find_pack_entry(odb->repo, real, &e))
break;
if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
return 0;
}

/*
@ -751,25 +749,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
}
return -1;
}

if (oi == &blank_oi)
/*
* We know that the caller doesn't actually need the
* information below, so return early.
*/
return 0;
rtype = packed_object_info(odb->repo, e.p, e.offset, oi);
if (rtype < 0) {
mark_bad_packed_object(e.p, real);
return do_oid_object_info_extended(odb, real, oi, 0);
} else if (oi->whence == OI_PACKED) {
oi->u.packed.offset = e.offset;
oi->u.packed.pack = e.p;
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}

return 0;
}

static int oid_object_info_convert(struct repository *r,

293
odb/streaming.c Normal file
View File

@ -0,0 +1,293 @@
/*
* Copyright (c) 2011, Google Inc.
*/

#include "git-compat-util.h"
#include "convert.h"
#include "environment.h"
#include "repository.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
#include "replace-object.h"
#include "packfile.h"

#define FILTER_BUFFER (1024*16)

/*****************************************************************
*
* Filtered stream
*
*****************************************************************/

struct odb_filtered_read_stream {
struct odb_read_stream base;
struct odb_read_stream *upstream;
struct stream_filter *filter;
char ibuf[FILTER_BUFFER];
char obuf[FILTER_BUFFER];
int i_end, i_ptr;
int o_end, o_ptr;
int input_finished;
};

static int close_istream_filtered(struct odb_read_stream *_fs)
{
struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
free_stream_filter(fs->filter);
return odb_read_stream_close(fs->upstream);
}

static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
size_t sz)
{
struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
size_t filled = 0;

while (sz) {
/* do we already have filtered output? */
if (fs->o_ptr < fs->o_end) {
size_t to_move = fs->o_end - fs->o_ptr;
if (sz < to_move)
to_move = sz;
memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
fs->o_ptr += to_move;
sz -= to_move;
filled += to_move;
continue;
}
fs->o_end = fs->o_ptr = 0;

/* do we have anything to feed the filter with? */
if (fs->i_ptr < fs->i_end) {
size_t to_feed = fs->i_end - fs->i_ptr;
size_t to_receive = FILTER_BUFFER;
if (stream_filter(fs->filter,
fs->ibuf + fs->i_ptr, &to_feed,
fs->obuf, &to_receive))
return -1;
fs->i_ptr = fs->i_end - to_feed;
fs->o_end = FILTER_BUFFER - to_receive;
continue;
}

/* tell the filter to drain upon no more input */
if (fs->input_finished) {
size_t to_receive = FILTER_BUFFER;
if (stream_filter(fs->filter,
NULL, NULL,
fs->obuf, &to_receive))
return -1;
fs->o_end = FILTER_BUFFER - to_receive;
if (!fs->o_end)
break;
continue;
}
fs->i_end = fs->i_ptr = 0;

/* refill the input from the upstream */
if (!fs->input_finished) {
fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER);
if (fs->i_end < 0)
return -1;
if (fs->i_end)
continue;
}
fs->input_finished = 1;
}
return filled;
}

static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
struct stream_filter *filter)
{
struct odb_filtered_read_stream *fs;

CALLOC_ARRAY(fs, 1);
fs->base.close = close_istream_filtered;
fs->base.read = read_istream_filtered;
fs->upstream = st;
fs->filter = filter;
fs->base.size = -1; /* unknown */
fs->base.type = st->type;

return &fs->base;
}

/*****************************************************************
*
* In-core stream
*
*****************************************************************/

struct odb_incore_read_stream {
struct odb_read_stream base;
char *buf; /* from odb_read_object_info_extended() */
unsigned long read_ptr;
};

static int close_istream_incore(struct odb_read_stream *_st)
{
struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
free(st->buf);
return 0;
}

static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz)
{
struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
size_t read_size = sz;
size_t remainder = st->base.size - st->read_ptr;

if (remainder <= read_size)
read_size = remainder;
if (read_size) {
memcpy(buf, st->buf + st->read_ptr, read_size);
st->read_ptr += read_size;
}
return read_size;
}

static int open_istream_incore(struct odb_read_stream **out,
struct object_database *odb,
const struct object_id *oid)
{
struct object_info oi = OBJECT_INFO_INIT;
struct odb_incore_read_stream stream = {
.base.close = close_istream_incore,
.base.read = read_istream_incore,
};
struct odb_incore_read_stream *st;
int ret;

oi.typep = &stream.base.type;
oi.sizep = &stream.base.size;
oi.contentp = (void **)&stream.buf;
ret = odb_read_object_info_extended(odb, oid, &oi,
OBJECT_INFO_DIE_IF_CORRUPT);
if (ret)
return ret;

CALLOC_ARRAY(st, 1);
*st = stream;
*out = &st->base;

return 0;
}

/*****************************************************************************
* static helpers variables and functions for users of streaming interface
*****************************************************************************/

static int istream_source(struct odb_read_stream **out,
struct object_database *odb,
const struct object_id *oid)
{
struct odb_source *source;

if (!packfile_store_read_object_stream(out, odb->packfiles, oid))
return 0;

odb_prepare_alternates(odb);
for (source = odb->sources; source; source = source->next)
if (!odb_source_loose_read_object_stream(out, source, oid))
return 0;

return open_istream_incore(out, odb, oid);
}

/****************************************************************
* Users of streaming interface
****************************************************************/

int odb_read_stream_close(struct odb_read_stream *st)
{
int r = st->close(st);
free(st);
return r;
}

ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz)
{
return st->read(st, buf, sz);
}

struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
const struct object_id *oid,
struct stream_filter *filter)
{
struct odb_read_stream *st;
const struct object_id *real = lookup_replace_object(odb->repo, oid);
int ret = istream_source(&st, odb, real);

if (ret)
return NULL;

if (filter) {
/* Add "&& !is_null_stream_filter(filter)" for performance */
struct odb_read_stream *nst = attach_stream_filter(st, filter);
if (!nst) {
odb_read_stream_close(st);
return NULL;
}
st = nst;
}

return st;
}

int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
struct stream_filter *filter,
int can_seek)
{
struct odb_read_stream *st;
ssize_t kept = 0;
int result = -1;

st = odb_read_stream_open(odb, oid, filter);
if (!st) {
if (filter)
free_stream_filter(filter);
return result;
}
if (st->type != OBJ_BLOB)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));

if (readlen < 0)
goto close_and_exit;
if (!readlen)
break;
if (can_seek && sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}

if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);

if (wrote < 0)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
xwrite(fd, "", 1) != 1))
goto close_and_exit;
result = 0;

close_and_exit:
odb_read_stream_close(st);
return result;
}

67
odb/streaming.h Normal file
View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2011, Google Inc.
*/
#ifndef STREAMING_H
#define STREAMING_H 1

#include "object.h"

struct object_database;
struct odb_read_stream;
struct stream_filter;

typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *);
typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t);

/*
* A stream that can be used to read an object from the object database without
* loading all of it into memory.
*/
struct odb_read_stream {
odb_read_stream_close_fn close;
odb_read_stream_read_fn read;
enum object_type type;
unsigned long size; /* inflated size of full object */
};

/*
* Create a new object stream for the given object database. An optional filter
* can be used to transform the object's content.
*
* Returns the stream on success, a `NULL` pointer otherwise.
*/
struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
const struct object_id *oid,
struct stream_filter *filter);

/*
* Close the given read stream and release all resources associated with it.
* Returns 0 on success, a negative error code otherwise.
*/
int odb_read_stream_close(struct odb_read_stream *stream);

/*
* Read data from the stream into the buffer. Returns 0 on EOF and the number
* of bytes read on success. Returns a negative error code in case reading from
* the stream fails.
*/
ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);

/*
* Look up the object by its ID and write the full contents to the file
* descriptor. The object must be a blob, or the function will fail. When
* provided, the filter is used to transform the blob contents.
*
* `can_seek` should be set to 1 in case the given file descriptor can be
* seek(3p)'d on. This is used to support files with holes in case a
* significant portion of the blob contains NUL bytes.
*
* Returns a negative error code on failure, 0 on success.
*/
int odb_stream_blob_to_fd(struct object_database *odb,
int fd,
const struct object_id *oid,
struct stream_filter *filter,
int can_seek);

#endif /* STREAMING_H */

View File

@ -20,6 +20,7 @@
#include "tree.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
#include "midx.h"
#include "commit-graph.h"
#include "pack-revindex.h"
@ -885,22 +886,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
return p;
}

int packfile_store_freshen_object(struct packfile_store *store,
const struct object_id *oid)
{
struct pack_entry e;
if (!find_pack_entry(store->odb->repo, oid, &e))
return 0;
if (e.p->is_cruft)
return 0;
if (e.p->freshened)
return 1;
if (utime(e.p->pack_name, NULL))
return 0;
e.p->freshened = 1;
return 1;
}

void (*report_garbage)(unsigned seen_bits, const char *path);

static void report_helper(const struct string_list *list,
@ -2105,7 +2090,9 @@ static int fill_pack_entry(const struct object_id *oid,
return 1;
}

int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
static int find_pack_entry(struct repository *r,
const struct object_id *oid,
struct pack_entry *e)
{
struct packfile_list_entry *l;

@ -2130,6 +2117,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0;
}

int packfile_store_freshen_object(struct packfile_store *store,
const struct object_id *oid)
{
struct pack_entry e;
if (!find_pack_entry(store->odb->repo, oid, &e))
return 0;
if (e.p->is_cruft)
return 0;
if (e.p->freshened)
return 1;
if (utime(e.p->pack_name, NULL))
return 0;
e.p->freshened = 1;
return 1;
}

int packfile_store_read_object_info(struct packfile_store *store,
const struct object_id *oid,
struct object_info *oi,
unsigned flags UNUSED)
{
static struct object_info blank_oi = OBJECT_INFO_INIT;
struct pack_entry e;
int rtype;

if (!find_pack_entry(store->odb->repo, oid, &e))
return 1;

/*
* We know that the caller doesn't actually need the
* information below, so return early.
*/
if (oi == &blank_oi)
return 0;

rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
if (rtype < 0) {
mark_bad_packed_object(e.p, oid);
return -1;
}

if (oi->whence == OI_PACKED) {
oi->u.packed.offset = e.offset;
oi->u.packed.pack = e.p;
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}

return 0;
}

static void maybe_invalidate_kept_pack_cache(struct repository *r,
unsigned flags)
{
@ -2400,3 +2438,130 @@ void packfile_store_close(struct packfile_store *store)
close_pack(e->pack);
}
}

struct odb_packed_read_stream {
struct odb_read_stream base;
struct packed_git *pack;
git_zstream z;
enum {
ODB_PACKED_READ_STREAM_UNINITIALIZED,
ODB_PACKED_READ_STREAM_INUSE,
ODB_PACKED_READ_STREAM_DONE,
ODB_PACKED_READ_STREAM_ERROR,
} z_state;
off_t pos;
};

static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
size_t sz)
{
struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
size_t total_read = 0;

switch (st->z_state) {
case ODB_PACKED_READ_STREAM_UNINITIALIZED:
memset(&st->z, 0, sizeof(st->z));
git_inflate_init(&st->z);
st->z_state = ODB_PACKED_READ_STREAM_INUSE;
break;
case ODB_PACKED_READ_STREAM_DONE:
return 0;
case ODB_PACKED_READ_STREAM_ERROR:
return -1;
case ODB_PACKED_READ_STREAM_INUSE:
break;
}

while (total_read < sz) {
int status;
struct pack_window *window = NULL;
unsigned char *mapped;

mapped = use_pack(st->pack, &window,
st->pos, &st->z.avail_in);

st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
st->z.next_in = mapped;
status = git_inflate(&st->z, Z_FINISH);

st->pos += st->z.next_in - mapped;
total_read = st->z.next_out - (unsigned char *)buf;
unuse_pack(&window);

if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = ODB_PACKED_READ_STREAM_DONE;
break;
}

/*
* Unlike the loose object case, we do not have to worry here
* about running out of input bytes and spinning infinitely. If
* we get Z_BUF_ERROR due to too few input bytes, then we'll
* replenish them in the next use_pack() call when we loop. If
* we truly hit the end of the pack (i.e., because it's corrupt
* or truncated), then use_pack() catches that and will die().
*/
if (status != Z_OK && status != Z_BUF_ERROR) {
git_inflate_end(&st->z);
st->z_state = ODB_PACKED_READ_STREAM_ERROR;
return -1;
}
}
return total_read;
}

static int close_istream_pack_non_delta(struct odb_read_stream *_st)
{
struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
git_inflate_end(&st->z);
return 0;
}

int packfile_store_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store,
const struct object_id *oid)
{
struct odb_packed_read_stream *stream;
struct pack_window *window = NULL;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type in_pack_type;
unsigned long size;

oi.sizep = &size;

if (packfile_store_read_object_info(store, oid, &oi, 0) ||
oi.u.packed.is_delta ||
repo_settings_get_big_file_threshold(store->odb->repo) >= size)
return -1;

in_pack_type = unpack_object_header(oi.u.packed.pack,
&window,
&oi.u.packed.offset,
&size);
unuse_pack(&window);
switch (in_pack_type) {
default:
return -1; /* we do not do deltas for now */
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
break;
}

CALLOC_ARRAY(stream, 1);
stream->base.close = close_istream_pack_non_delta;
stream->base.read = read_istream_pack_non_delta;
stream->base.type = in_pack_type;
stream->base.size = size;
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
stream->pack = oi.u.packed.pack;
stream->pos = oi.u.packed.offset;

*out = &stream->base;

return 0;
}

View File

@ -9,6 +9,7 @@

/* in odb.h */
struct object_info;
struct odb_read_stream;

struct packed_git {
struct pack_window *windows;
@ -177,6 +178,21 @@ void packfile_store_add_pack(struct packfile_store *store,
for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \
((p) = (e ? e->pack : NULL)); e = e->next)

int packfile_store_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store,
const struct object_id *oid);

/*
* Try to read the object identified by its ID from the object store and
* populate the object info with its data. Returns 1 in case the object was
* not found, 0 if it was and read successfully, and a negative error code in
* case the object was corrupted.
*/
int packfile_store_read_object_info(struct packfile_store *store,
const struct object_id *oid,
struct object_info *oi,
unsigned flags);

/*
* Get all packs managed by the given store, including packfiles that are
* referenced by multi-pack indices.
@ -376,7 +392,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
* Iff a pack file in the given repository contains the object named by sha1,
* return true and store its location to e.
*/
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);

int has_object_pack(struct repository *r, const struct object_id *oid);

View File

@ -13,7 +13,7 @@
#include "read-cache-ll.h"
#include "run-command.h"
#include "sigchain.h"
#include "streaming.h"
#include "odb/streaming.h"
#include "symlinks.h"
#include "thread-utils.h"
#include "trace2.h"
@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,

filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid);
if (filter) {
if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) {
if (odb_stream_blob_to_fd(the_repository->objects, fd,
&pc_item->ce->oid, filter, 1)) {
/* On error, reset fd to try writing without streaming */
if (reset_fd(fd, path))
return -1;

View File

@ -1,561 +0,0 @@
/*
* Copyright (c) 2011, Google Inc.
*/

#define USE_THE_REPOSITORY_VARIABLE

#include "git-compat-util.h"
#include "convert.h"
#include "environment.h"
#include "streaming.h"
#include "repository.h"
#include "object-file.h"
#include "odb.h"
#include "replace-object.h"
#include "packfile.h"

typedef int (*open_istream_fn)(struct git_istream *,
struct repository *,
const struct object_id *,
enum object_type *);
typedef int (*close_istream_fn)(struct git_istream *);
typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);

#define FILTER_BUFFER (1024*16)

struct filtered_istream {
struct git_istream *upstream;
struct stream_filter *filter;
char ibuf[FILTER_BUFFER];
char obuf[FILTER_BUFFER];
int i_end, i_ptr;
int o_end, o_ptr;
int input_finished;
};

struct git_istream {
open_istream_fn open;
close_istream_fn close;
read_istream_fn read;

unsigned long size; /* inflated size of full object */
git_zstream z;
enum { z_unused, z_used, z_done, z_error } z_state;

union {
struct {
char *buf; /* from odb_read_object_info_extended() */
unsigned long read_ptr;
} incore;

struct {
void *mapped;
unsigned long mapsize;
char hdr[32];
int hdr_avail;
int hdr_used;
} loose;

struct {
struct packed_git *pack;
off_t pos;
} in_pack;

struct filtered_istream filtered;
} u;
};

/*****************************************************************
*
* Common helpers
*
*****************************************************************/

static void close_deflated_stream(struct git_istream *st)
{
if (st->z_state == z_used)
git_inflate_end(&st->z);
}


/*****************************************************************
*
* Filtered stream
*
*****************************************************************/

static int close_istream_filtered(struct git_istream *st)
{
free_stream_filter(st->u.filtered.filter);
return close_istream(st->u.filtered.upstream);
}

static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
size_t sz)
{
struct filtered_istream *fs = &(st->u.filtered);
size_t filled = 0;

while (sz) {
/* do we already have filtered output? */
if (fs->o_ptr < fs->o_end) {
size_t to_move = fs->o_end - fs->o_ptr;
if (sz < to_move)
to_move = sz;
memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
fs->o_ptr += to_move;
sz -= to_move;
filled += to_move;
continue;
}
fs->o_end = fs->o_ptr = 0;

/* do we have anything to feed the filter with? */
if (fs->i_ptr < fs->i_end) {
size_t to_feed = fs->i_end - fs->i_ptr;
size_t to_receive = FILTER_BUFFER;
if (stream_filter(fs->filter,
fs->ibuf + fs->i_ptr, &to_feed,
fs->obuf, &to_receive))
return -1;
fs->i_ptr = fs->i_end - to_feed;
fs->o_end = FILTER_BUFFER - to_receive;
continue;
}

/* tell the filter to drain upon no more input */
if (fs->input_finished) {
size_t to_receive = FILTER_BUFFER;
if (stream_filter(fs->filter,
NULL, NULL,
fs->obuf, &to_receive))
return -1;
fs->o_end = FILTER_BUFFER - to_receive;
if (!fs->o_end)
break;
continue;
}
fs->i_end = fs->i_ptr = 0;

/* refill the input from the upstream */
if (!fs->input_finished) {
fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
if (fs->i_end < 0)
return -1;
if (fs->i_end)
continue;
}
fs->input_finished = 1;
}
return filled;
}

static struct git_istream *attach_stream_filter(struct git_istream *st,
struct stream_filter *filter)
{
struct git_istream *ifs = xmalloc(sizeof(*ifs));
struct filtered_istream *fs = &(ifs->u.filtered);

ifs->close = close_istream_filtered;
ifs->read = read_istream_filtered;
fs->upstream = st;
fs->filter = filter;
fs->i_end = fs->i_ptr = 0;
fs->o_end = fs->o_ptr = 0;
fs->input_finished = 0;
ifs->size = -1; /* unknown */
return ifs;
}

/*****************************************************************
*
* Loose object stream
*
*****************************************************************/

static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
{
size_t total_read = 0;

switch (st->z_state) {
case z_done:
return 0;
case z_error:
return -1;
default:
break;
}

if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
if (sz < to_copy)
to_copy = sz;
memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
st->u.loose.hdr_used += to_copy;
total_read += to_copy;
}

while (total_read < sz) {
int status;

st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
status = git_inflate(&st->z, Z_FINISH);

total_read = st->z.next_out - (unsigned char *)buf;

if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = z_done;
break;
}
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
git_inflate_end(&st->z);
st->z_state = z_error;
return -1;
}
}
return total_read;
}

static int close_istream_loose(struct git_istream *st)
{
close_deflated_stream(st);
munmap(st->u.loose.mapped, st->u.loose.mapsize);
return 0;
}

static int open_istream_loose(struct git_istream *st, struct repository *r,
const struct object_id *oid,
enum object_type *type)
{
struct object_info oi = OBJECT_INFO_INIT;
struct odb_source *source;

oi.sizep = &st->size;
oi.typep = type;

odb_prepare_alternates(r->objects);
for (source = r->objects->sources; source; source = source->next) {
st->u.loose.mapped = odb_source_loose_map_object(source, oid,
&st->u.loose.mapsize);
if (st->u.loose.mapped)
break;
}
if (!st->u.loose.mapped)
return -1;

switch (unpack_loose_header(&st->z, st->u.loose.mapped,
st->u.loose.mapsize, st->u.loose.hdr,
sizeof(st->u.loose.hdr))) {
case ULHR_OK:
break;
case ULHR_BAD:
case ULHR_TOO_LONG:
goto error;
}
if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0)
goto error;

st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
st->u.loose.hdr_avail = st->z.total_out;
st->z_state = z_used;
st->close = close_istream_loose;
st->read = read_istream_loose;

return 0;
error:
git_inflate_end(&st->z);
munmap(st->u.loose.mapped, st->u.loose.mapsize);
return -1;
}


/*****************************************************************
*
* Non-delta packed object stream
*
*****************************************************************/

static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
size_t sz)
{
size_t total_read = 0;

switch (st->z_state) {
case z_unused:
memset(&st->z, 0, sizeof(st->z));
git_inflate_init(&st->z);
st->z_state = z_used;
break;
case z_done:
return 0;
case z_error:
return -1;
case z_used:
break;
}

while (total_read < sz) {
int status;
struct pack_window *window = NULL;
unsigned char *mapped;

mapped = use_pack(st->u.in_pack.pack, &window,
st->u.in_pack.pos, &st->z.avail_in);

st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
st->z.next_in = mapped;
status = git_inflate(&st->z, Z_FINISH);

st->u.in_pack.pos += st->z.next_in - mapped;
total_read = st->z.next_out - (unsigned char *)buf;
unuse_pack(&window);

if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = z_done;
break;
}

/*
* Unlike the loose object case, we do not have to worry here
* about running out of input bytes and spinning infinitely. If
* we get Z_BUF_ERROR due to too few input bytes, then we'll
* replenish them in the next use_pack() call when we loop. If
* we truly hit the end of the pack (i.e., because it's corrupt
* or truncated), then use_pack() catches that and will die().
*/
if (status != Z_OK && status != Z_BUF_ERROR) {
git_inflate_end(&st->z);
st->z_state = z_error;
return -1;
}
}
return total_read;
}

static int close_istream_pack_non_delta(struct git_istream *st)
{
close_deflated_stream(st);
return 0;
}

static int open_istream_pack_non_delta(struct git_istream *st,
struct repository *r UNUSED,
const struct object_id *oid UNUSED,
enum object_type *type UNUSED)
{
struct pack_window *window;
enum object_type in_pack_type;

window = NULL;

in_pack_type = unpack_object_header(st->u.in_pack.pack,
&window,
&st->u.in_pack.pos,
&st->size);
unuse_pack(&window);
switch (in_pack_type) {
default:
return -1; /* we do not do deltas for now */
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
break;
}
st->z_state = z_unused;
st->close = close_istream_pack_non_delta;
st->read = read_istream_pack_non_delta;

return 0;
}


/*****************************************************************
*
* In-core stream
*
*****************************************************************/

static int close_istream_incore(struct git_istream *st)
{
free(st->u.incore.buf);
return 0;
}

static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
{
size_t read_size = sz;
size_t remainder = st->size - st->u.incore.read_ptr;

if (remainder <= read_size)
read_size = remainder;
if (read_size) {
memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
st->u.incore.read_ptr += read_size;
}
return read_size;
}

static int open_istream_incore(struct git_istream *st, struct repository *r,
const struct object_id *oid, enum object_type *type)
{
struct object_info oi = OBJECT_INFO_INIT;

st->u.incore.read_ptr = 0;
st->close = close_istream_incore;
st->read = read_istream_incore;

oi.typep = type;
oi.sizep = &st->size;
oi.contentp = (void **)&st->u.incore.buf;
return odb_read_object_info_extended(r->objects, oid, &oi,
OBJECT_INFO_DIE_IF_CORRUPT);
}

/*****************************************************************************
* static helpers variables and functions for users of streaming interface
*****************************************************************************/

static int istream_source(struct git_istream *st,
struct repository *r,
const struct object_id *oid,
enum object_type *type)
{
unsigned long size;
int status;
struct object_info oi = OBJECT_INFO_INIT;

oi.typep = type;
oi.sizep = &size;
status = odb_read_object_info_extended(r->objects, oid, &oi, 0);
if (status < 0)
return status;

switch (oi.whence) {
case OI_LOOSE:
st->open = open_istream_loose;
return 0;
case OI_PACKED:
if (!oi.u.packed.is_delta &&
repo_settings_get_big_file_threshold(the_repository) < size) {
st->u.in_pack.pack = oi.u.packed.pack;
st->u.in_pack.pos = oi.u.packed.offset;
st->open = open_istream_pack_non_delta;
return 0;
}
/* fallthru */
default:
st->open = open_istream_incore;
return 0;
}
}

/****************************************************************
* Users of streaming interface
****************************************************************/

int close_istream(struct git_istream *st)
{
int r = st->close(st);
free(st);
return r;
}

ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
{
return st->read(st, buf, sz);
}

struct git_istream *open_istream(struct repository *r,
const struct object_id *oid,
enum object_type *type,
unsigned long *size,
struct stream_filter *filter)
{
struct git_istream *st = xmalloc(sizeof(*st));
const struct object_id *real = lookup_replace_object(r, oid);
int ret = istream_source(st, r, real, type);

if (ret) {
free(st);
return NULL;
}

if (st->open(st, r, real, type)) {
if (open_istream_incore(st, r, real, type)) {
free(st);
return NULL;
}
}
if (filter) {
/* Add "&& !is_null_stream_filter(filter)" for performance */
struct git_istream *nst = attach_stream_filter(st, filter);
if (!nst) {
close_istream(st);
return NULL;
}
st = nst;
}

*size = st->size;
return st;
}

int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
int can_seek)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
ssize_t kept = 0;
int result = -1;

st = open_istream(the_repository, oid, &type, &sz, filter);
if (!st) {
if (filter)
free_stream_filter(filter);
return result;
}
if (type != OBJ_BLOB)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));

if (readlen < 0)
goto close_and_exit;
if (!readlen)
break;
if (can_seek && sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}

if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);

if (wrote < 0)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
xwrite(fd, "", 1) != 1))
goto close_and_exit;
result = 0;

close_and_exit:
close_istream(st);
return result;
}

View File

@ -1,21 +0,0 @@
/*
* Copyright (c) 2011, Google Inc.
*/
#ifndef STREAMING_H
#define STREAMING_H 1

#include "object.h"

/* opaque */
struct git_istream;
struct stream_filter;

struct git_istream *open_istream(struct repository *, const struct object_id *,
enum object_type *, unsigned long *,
struct stream_filter *);
int close_istream(struct git_istream *);
ssize_t read_istream(struct git_istream *, void *, size_t);

int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek);

#endif /* STREAMING_H */