diff --git a/builtin/hash-object.c b/builtin/hash-object.c index b96f46acf5..33911fd5e9 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -14,8 +14,11 @@ static void hash_fd(int fd, const char *type, int write_object, const char *path { struct stat st; unsigned char sha1[20]; + unsigned flags = (HASH_FORMAT_CHECK | + (write_object ? HASH_WRITE_OBJECT : 0)); + if (fstat(fd, &st) < 0 || - index_fd(sha1, fd, &st, write_object, type_from_string(type), path, 1)) + index_fd(sha1, fd, &st, type_from_string(type), path, flags)) die(write_object ? "Unable to add %s to database" : "Unable to hash %s", path); diff --git a/builtin/mktag.c b/builtin/mktag.c index 324a267163..640ab64f41 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -23,8 +23,8 @@ static int verify_object(const unsigned char *sha1, const char *expected_type) int ret = -1; enum object_type type; unsigned long size; - const unsigned char *repl; - void *buffer = read_sha1_file_repl(sha1, &type, &size, &repl); + void *buffer = read_sha1_file(sha1, &type, &size); + const unsigned char *repl = lookup_replace_object(sha1); if (buffer) { if (type == type_from_string(expected_type)) diff --git a/builtin/update-index.c b/builtin/update-index.c index d7850c6309..f14bc90830 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -99,7 +99,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru fill_stat_cache_info(ce, st); ce->ce_mode = ce_mode_from_stat(old, st->st_mode); - if (index_path(ce->sha1, path, st, !info_only)) + if (index_path(ce->sha1, path, st, + info_only ? 0 : HASH_WRITE_OBJECT)) return -1; option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; diff --git a/cache.h b/cache.h index 2b34116624..b1b5bb5896 100644 --- a/cache.h +++ b/cache.h @@ -518,8 +518,11 @@ struct pathspec { extern int init_pathspec(struct pathspec *, const char **); extern void free_pathspec(struct pathspec *); extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec); -extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check); -extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object); + +#define HASH_WRITE_OBJECT 1 +#define HASH_FORMAT_CHECK 2 +extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); +extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ @@ -606,7 +609,7 @@ enum eol { #endif }; -extern enum eol eol; +extern enum eol core_eol; enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, @@ -756,13 +759,23 @@ char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); -/* Read and unpack a sha1 file into memory, write memory to a sha1 file */ -extern int sha1_object_info(const unsigned char *, unsigned long *); -extern void *read_sha1_file_repl(const unsigned char *sha1, enum object_type *type, unsigned long *size, const unsigned char **replacement); +/* object replacement */ +#define READ_SHA1_FILE_REPLACE 1 +extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { - return read_sha1_file_repl(sha1, type, size, NULL); + return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE); } +extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); +static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) +{ + if (!read_replace_refs) + return sha1; + return do_lookup_replace_object(sha1); +} + +/* Read and unpack a sha1 file into memory, write memory to a sha1 file */ +extern int sha1_object_info(const unsigned char *, unsigned long *); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); diff --git a/commit.h b/commit.h index b3c3bb70c5..f251e75a5b 100644 --- a/commit.h +++ b/commit.h @@ -145,8 +145,6 @@ struct commit_graft *read_graft_line(char *buf, int len); int register_commit_graft(struct commit_graft *, int); struct commit_graft *lookup_commit_graft(const unsigned char *sha1); -const unsigned char *lookup_replace_object(const unsigned char *sha1); - extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2, int cleanup); extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos, int cleanup); extern struct commit_list *get_octopus_merge_bases(struct commit_list *in); diff --git a/config.c b/config.c index 5f9ec28945..671c8df2cc 100644 --- a/config.c +++ b/config.c @@ -583,7 +583,7 @@ static int git_default_core_config(const char *var, const char *value) if (!strcmp(var, "core.autocrlf")) { if (value && !strcasecmp(value, "input")) { - if (eol == EOL_CRLF) + if (core_eol == EOL_CRLF) return error("core.autocrlf=input conflicts with core.eol=crlf"); auto_crlf = AUTO_CRLF_INPUT; return 0; @@ -603,14 +603,14 @@ static int git_default_core_config(const char *var, const char *value) if (!strcmp(var, "core.eol")) { if (value && !strcasecmp(value, "lf")) - eol = EOL_LF; + core_eol = EOL_LF; else if (value && !strcasecmp(value, "crlf")) - eol = EOL_CRLF; + core_eol = EOL_CRLF; else if (value && !strcasecmp(value, "native")) - eol = EOL_NATIVE; + core_eol = EOL_NATIVE; else - eol = EOL_UNSET; - if (eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT) + core_eol = EOL_UNSET; + if (core_eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT) return error("core.autocrlf=input conflicts with core.eol=crlf"); return 0; } diff --git a/convert.c b/convert.c index 7eb51b16ed..efc7e07d47 100644 --- a/convert.c +++ b/convert.c @@ -12,7 +12,7 @@ * translation when the "text" attribute or "auto_crlf" option is set. */ -enum action { +enum crlf_action { CRLF_GUESS = -1, CRLF_BINARY = 0, CRLF_TEXT, @@ -94,9 +94,9 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } -static enum eol determine_output_conversion(enum action action) +static enum eol output_eol(enum crlf_action crlf_action) { - switch (action) { + switch (crlf_action) { case CRLF_BINARY: return EOL_UNSET; case CRLF_CRLF: @@ -113,19 +113,19 @@ static enum eol determine_output_conversion(enum action action) return EOL_CRLF; else if (auto_crlf == AUTO_CRLF_INPUT) return EOL_LF; - else if (eol == EOL_UNSET) + else if (core_eol == EOL_UNSET) return EOL_NATIVE; } - return eol; + return core_eol; } -static void check_safe_crlf(const char *path, enum action action, +static void check_safe_crlf(const char *path, enum crlf_action crlf_action, struct text_stat *stats, enum safe_crlf checksafe) { if (!checksafe) return; - if (determine_output_conversion(action) == EOL_LF) { + if (output_eol(crlf_action) == EOL_LF) { /* * CRLFs would not be restored by checkout: * check if we'd remove CRLFs @@ -136,7 +136,7 @@ static void check_safe_crlf(const char *path, enum action action, else /* i.e. SAFE_CRLF_FAIL */ die("CRLF would be replaced by LF in %s.", path); } - } else if (determine_output_conversion(action) == EOL_CRLF) { + } else if (output_eol(crlf_action) == EOL_CRLF) { /* * CRLFs would be added by checkout: * check if we have "naked" LFs @@ -188,18 +188,19 @@ static int has_cr_in_index(const char *path) } static int crlf_to_git(const char *path, const char *src, size_t len, - struct strbuf *buf, enum action action, enum safe_crlf checksafe) + struct strbuf *buf, + enum crlf_action crlf_action, enum safe_crlf checksafe) { struct text_stat stats; char *dst; - if (action == CRLF_BINARY || - (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len) + if (crlf_action == CRLF_BINARY || + (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len) return 0; gather_stats(src, len, &stats); - if (action == CRLF_AUTO || action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { /* * We're currently not going to even try to convert stuff * that has bare CR characters. Does anybody do that crazy @@ -214,7 +215,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, if (is_binary(len, &stats)) return 0; - if (action == CRLF_GUESS) { + if (crlf_action == CRLF_GUESS) { /* * If the file in the index has any CR in it, do not convert. * This is the new safer autocrlf handling. @@ -224,7 +225,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, } } - check_safe_crlf(path, action, &stats, checksafe); + check_safe_crlf(path, crlf_action, &stats, checksafe); /* Optimization: No CR? Nothing to convert, regardless. */ if (!stats.cr) @@ -234,7 +235,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); dst = buf->buf; - if (action == CRLF_AUTO || action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { /* * If we guessed, we already know we rejected a file with * lone CR, and we can strip a CR without looking at what @@ -257,12 +258,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len, } static int crlf_to_worktree(const char *path, const char *src, size_t len, - struct strbuf *buf, enum action action) + struct strbuf *buf, enum crlf_action crlf_action) { char *to_free = NULL; struct text_stat stats; - if (!len || determine_output_conversion(action) != EOL_CRLF) + if (!len || output_eol(crlf_action) != EOL_CRLF) return 0; gather_stats(src, len, &stats); @@ -275,8 +276,8 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, if (stats.lf == stats.crlf) return 0; - if (action == CRLF_AUTO || action == CRLF_GUESS) { - if (action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { + if (crlf_action == CRLF_GUESS) { /* If we have any CR or CRLF line endings, we do not touch it */ /* This is the new safer autocrlf-handling */ if (stats.cr > 0 || stats.crlf > 0) @@ -474,30 +475,6 @@ static int read_convert_config(const char *var, const char *value, void *cb) return 0; } -static void setup_convert_check(struct git_attr_check *check) -{ - static struct git_attr *attr_text; - static struct git_attr *attr_crlf; - static struct git_attr *attr_eol; - static struct git_attr *attr_ident; - static struct git_attr *attr_filter; - - if (!attr_text) { - attr_text = git_attr("text"); - attr_crlf = git_attr("crlf"); - attr_eol = git_attr("eol"); - attr_ident = git_attr("ident"); - attr_filter = git_attr("filter"); - user_convert_tail = &user_convert; - git_config(read_convert_config, NULL); - } - check[0].attr = attr_crlf; - check[1].attr = attr_ident; - check[2].attr = attr_filter; - check[3].attr = attr_eol; - check[4].attr = attr_text; -} - static int count_ident(const char *cp, unsigned long size) { /* @@ -715,7 +692,7 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } -static enum action determine_action(enum action text_attr, enum eol eol_attr) +static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol eol_attr) { if (text_attr == CRLF_BINARY) return CRLF_BINARY; @@ -726,66 +703,83 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr) return text_attr; } +struct conv_attrs { + struct convert_driver *drv; + enum crlf_action crlf_action; + enum eol eol_attr; + int ident; +}; + +static const char *conv_attr_name[] = { + "crlf", "ident", "filter", "eol", "text", +}; +#define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name) + +static void convert_attrs(struct conv_attrs *ca, const char *path) +{ + int i; + static struct git_attr_check ccheck[NUM_CONV_ATTRS]; + + if (!ccheck[0].attr) { + for (i = 0; i < NUM_CONV_ATTRS; i++) + ccheck[i].attr = git_attr(conv_attr_name[i]); + user_convert_tail = &user_convert; + git_config(read_convert_config, NULL); + } + + if (!git_checkattr(path, NUM_CONV_ATTRS, ccheck)) { + ca->crlf_action = git_path_check_crlf(path, ccheck + 4); + if (ca->crlf_action == CRLF_GUESS) + ca->crlf_action = git_path_check_crlf(path, ccheck + 0); + ca->ident = git_path_check_ident(path, ccheck + 1); + ca->drv = git_path_check_convert(path, ccheck + 2); + ca->eol_attr = git_path_check_eol(path, ccheck + 3); + } else { + ca->drv = NULL; + ca->crlf_action = CRLF_GUESS; + ca->eol_attr = EOL_UNSET; + ca->ident = 0; + } +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { - struct git_attr_check check[5]; - enum action action = CRLF_GUESS; - enum eol eol_attr = EOL_UNSET; - int ident = 0, ret = 0; + int ret = 0; const char *filter = NULL; + struct conv_attrs ca; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { - struct convert_driver *drv; - action = git_path_check_crlf(path, check + 4); - if (action == CRLF_GUESS) - action = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); - eol_attr = git_path_check_eol(path, check + 3); - if (drv && drv->clean) - filter = drv->clean; - } + convert_attrs(&ca, path); + if (ca.drv) + filter = ca.drv->clean; ret |= apply_filter(path, src, len, dst, filter); if (ret) { src = dst->buf; len = dst->len; } - action = determine_action(action, eol_attr); - ret |= crlf_to_git(path, src, len, dst, action, checksafe); + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe); if (ret) { src = dst->buf; len = dst->len; } - return ret | ident_to_git(path, src, len, dst, ident); + return ret | ident_to_git(path, src, len, dst, ca.ident); } static int convert_to_working_tree_internal(const char *path, const char *src, size_t len, struct strbuf *dst, int normalizing) { - struct git_attr_check check[5]; - enum action action = CRLF_GUESS; - enum eol eol_attr = EOL_UNSET; - int ident = 0, ret = 0; + int ret = 0; const char *filter = NULL; + struct conv_attrs ca; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { - struct convert_driver *drv; - action = git_path_check_crlf(path, check + 4); - if (action == CRLF_GUESS) - action = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); - eol_attr = git_path_check_eol(path, check + 3); - if (drv && drv->smudge) - filter = drv->smudge; - } + convert_attrs(&ca, path); + if (ca.drv) + filter = ca.drv->smudge; - ret |= ident_to_worktree(path, src, len, dst, ident); + ret |= ident_to_worktree(path, src, len, dst, ca.ident); if (ret) { src = dst->buf; len = dst->len; @@ -795,8 +789,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src, * is a smudge filter. The filter might expect CRLFs. */ if (filter || !normalizing) { - action = determine_action(action, eol_attr); - ret |= crlf_to_worktree(path, src, len, dst, action); + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, ca.crlf_action); if (ret) { src = dst->buf; len = dst->len; diff --git a/environment.c b/environment.c index 40185bc854..94d58fd244 100644 --- a/environment.c +++ b/environment.c @@ -42,8 +42,8 @@ const char *editor_program; const char *askpass_program; const char *excludes_file; enum auto_crlf auto_crlf = AUTO_CRLF_FALSE; -int read_replace_refs = 1; -enum eol eol = EOL_UNSET; +int read_replace_refs = 1; /* NEEDSWORK: rename to use_replace_refs */ +enum eol core_eol = EOL_UNSET; enum safe_crlf safe_crlf = SAFE_CRLF_WARN; unsigned whitespace_rule_cfg = WS_DEFAULT_RULE; enum branch_track git_branch_track = BRANCH_TRACK_REMOTE; diff --git a/notes-merge.c b/notes-merge.c index 28046a9984..e1aaf43b43 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -707,7 +707,7 @@ int notes_merge_commit(struct notes_merge_options *o, /* write file as blob, and add to partial_tree */ if (stat(ent->name, &st)) die_errno("Failed to stat '%s'", ent->name); - if (index_path(blob_sha1, ent->name, &st, 1)) + if (index_path(blob_sha1, ent->name, &st, HASH_WRITE_OBJECT)) die("Failed to write blob object from '%s'", ent->name); if (add_note(partial_tree, obj_sha1, blob_sha1, NULL)) die("Failed to add resolved note '%s' to notes tree", diff --git a/object.c b/object.c index 7e1f2bbed2..31976b5d70 100644 --- a/object.c +++ b/object.c @@ -188,8 +188,8 @@ struct object *parse_object(const unsigned char *sha1) unsigned long size; enum object_type type; int eaten; - const unsigned char *repl; - void *buffer = read_sha1_file_repl(sha1, &type, &size, &repl); + const unsigned char *repl = lookup_replace_object(sha1); + void *buffer = read_sha1_file(sha1, &type, &size); if (buffer) { struct object *obj; diff --git a/read-cache.c b/read-cache.c index f38471cac3..4ac9a037f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -92,7 +92,7 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st) if (fd >= 0) { unsigned char sha1[20]; - if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name, 0)) + if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0)) match = hashcmp(sha1, ce->sha1); /* index_fd() closed the file descriptor already */ } @@ -641,7 +641,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, return 0; } if (!intent_only) { - if (index_path(ce->sha1, path, st, 1)) + if (index_path(ce->sha1, path, st, HASH_WRITE_OBJECT)) return error("unable to index file %s", path); } else record_intent_to_add(ce); diff --git a/replace_object.c b/replace_object.c index 7c6c7544ad..d0b1548726 100644 --- a/replace_object.c +++ b/replace_object.c @@ -85,12 +85,14 @@ static void prepare_replace_object(void) for_each_replace_ref(register_replace_ref, NULL); replace_object_prepared = 1; + if (!replace_object_nr) + read_replace_refs = 0; } /* We allow "recursive" replacement. Only within reason, though */ #define MAXREPLACEDEPTH 5 -const unsigned char *lookup_replace_object(const unsigned char *sha1) +const unsigned char *do_lookup_replace_object(const unsigned char *sha1) { int pos, depth = MAXREPLACEDEPTH; const unsigned char *cur = sha1; diff --git a/sha1_file.c b/sha1_file.c index 1a7e41070e..064a330408 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -11,6 +11,7 @@ #include "pack.h" #include "blob.h" #include "commit.h" +#include "run-command.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@ -2205,23 +2206,21 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, * deal with them should arrange to call read_object() and give error * messages themselves. */ -void *read_sha1_file_repl(const unsigned char *sha1, - enum object_type *type, - unsigned long *size, - const unsigned char **replacement) +void *read_sha1_file_extended(const unsigned char *sha1, + enum object_type *type, + unsigned long *size, + unsigned flag) { - const unsigned char *repl = lookup_replace_object(sha1); void *data; char *path; const struct packed_git *p; + const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE) + ? lookup_replace_object(sha1) : sha1; errno = 0; data = read_object(repl, type, size); - if (data) { - if (replacement) - *replacement = repl; + if (data) return data; - } if (errno && errno != ENOENT) die_errno("failed to read object %s", sha1_to_hex(sha1)); @@ -2580,10 +2579,11 @@ static void check_tag(const void *buf, size_t size) } static int index_mem(unsigned char *sha1, void *buf, size_t size, - int write_object, enum object_type type, - const char *path, int format_check) + enum object_type type, + const char *path, unsigned flags) { int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; if (!type) type = OBJ_BLOB; @@ -2599,7 +2599,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, re_allocated = 1; } } - if (format_check) { + if (flags & HASH_FORMAT_CHECK) { if (type == OBJ_TREE) check_tree(buf, size); if (type == OBJ_COMMIT) @@ -2617,44 +2617,141 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_pipe(unsigned char *sha1, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path, int format_check) +static int index_core(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) { - struct strbuf sbuf = STRBUF_INIT; - if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, - type, path, format_check); - else - ret = -1; - strbuf_release(&sbuf); - } else if (!size) { - ret = index_mem(sha1, NULL, size, write_object, type, path, - format_check); + if (!size) { + ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, write_object, type, - path, format_check); + ret = index_mem(sha1, buf, size, type, path, flags); else ret = error("short read %s", strerror(errno)); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = index_mem(sha1, buf, size, write_object, type, path, - format_check); + ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } + return ret; +} + +/* + * This creates one packfile per large blob, because the caller + * immediately wants the result sha1, and fast-import can report the + * object name via marks mechanism only by closing the created + * packfile. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above), but the primary + * motivation for trying to stream from the working tree file and to + * avoid mmaping it in core is to deal with large binary blobs, and + * by definition they do _not_ want to get any conversion. + */ +static int index_stream(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + struct child_process fast_import; + char export_marks[512]; + const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; + char tmpfile[512]; + char fast_import_cmd[512]; + char buf[512]; + int len, tmpfd; + + strcpy(tmpfile, git_path("hashstream_XXXXXX")); + tmpfd = git_mkstemp_mode(tmpfile, 0600); + if (tmpfd < 0) + die_errno("cannot create tempfile: %s", tmpfile); + if (close(tmpfd)) + die_errno("cannot close tempfile: %s", tmpfile); + sprintf(export_marks, "--export-marks=%s", tmpfile); + + memset(&fast_import, 0, sizeof(fast_import)); + fast_import.in = -1; + fast_import.argv = argv; + fast_import.git_cmd = 1; + if (start_command(&fast_import)) + die_errno("index-stream: git fast-import failed"); + + len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", + (unsigned long) size); + write_or_whine(fast_import.in, fast_import_cmd, len, + "index-stream: feeding fast-import"); + while (size) { + char buf[10240]; + size_t sz = size < sizeof(buf) ? size : sizeof(buf); + size_t actual; + + actual = read_in_full(fd, buf, sz); + if (actual < 0) + die_errno("index-stream: reading input"); + if (write_in_full(fast_import.in, buf, actual) != actual) + die_errno("index-stream: feeding fast-import"); + size -= actual; + } + if (close(fast_import.in)) + die_errno("index-stream: closing fast-import"); + if (finish_command(&fast_import)) + die_errno("index-stream: finishing fast-import"); + + tmpfd = open(tmpfile, O_RDONLY); + if (tmpfd < 0) + die_errno("index-stream: cannot open fast-import mark"); + len = read(tmpfd, buf, sizeof(buf)); + if (len < 0) + die_errno("index-stream: reading fast-import mark"); + if (close(tmpfd) < 0) + die_errno("index-stream: closing fast-import mark"); + if (unlink(tmpfile)) + die_errno("index-stream: unlinking fast-import mark"); + if (len != 44 || + memcmp(":1 ", buf, 3) || + get_sha1_hex(buf + 3, sha1)) + die_errno("index-stream: unexpected fast-import mark: <%s>", buf); + return 0; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) + ret = index_pipe(sha1, fd, type, path, flags); + else if (size <= big_file_threshold || type != OBJ_BLOB) + ret = index_core(sha1, fd, size, type, path, flags); + else + ret = index_stream(sha1, fd, size, type, path, flags); close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) +int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@ -2665,7 +2762,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0) + if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@ -2675,7 +2772,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write return error("readlink(\"%s\"): %s", path, errstr); } - if (!write_object) + if (!(flags & HASH_WRITE_OBJECT)) hash_sha1_file(sb.buf, sb.len, blob_type, sha1); else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database", diff --git a/t/t1050-large.sh b/t/t1050-large.sh new file mode 100755 index 0000000000..deba111bd7 --- /dev/null +++ b/t/t1050-large.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright (c) 2011, Google Inc. + +test_description='adding and checking out large blobs' + +. ./test-lib.sh + +test_expect_success setup ' + git config core.bigfilethreshold 200k && + echo X | dd of=large bs=1k seek=2000 +' + +test_expect_success 'add a large file' ' + git add large && + # make sure we got a packfile and no loose objects + test -f .git/objects/pack/pack-*.pack && + test ! -f .git/objects/??/?????????????????????????????????????? +' + +test_expect_success 'checkout a large file' ' + large=$(git rev-parse :large) && + git update-index --add --cacheinfo 100644 $large another && + git checkout another && + cmp large another ;# this must not be test_cmp +' + +test_done diff --git a/t/t6050-replace.sh b/t/t6050-replace.sh index ae2194e07d..5c87f28e4e 100755 --- a/t/t6050-replace.sh +++ b/t/t6050-replace.sh @@ -236,6 +236,20 @@ test_expect_success 'index-pack and replacements' ' git index-pack test-*.pack ' -# -# +test_expect_success 'not just commits' ' + echo replaced >file && + git add file && + REPLACED=$(git rev-parse :file) && + mv file file.replaced && + + echo original >file && + git add file && + ORIGINAL=$(git rev-parse :file) && + git update-ref refs/replace/$ORIGINAL $REPLACED && + mv file file.original && + + git checkout file && + test_cmp file.replaced file +' + test_done