From d1bf0e08313927c269846476f99f59d878807371 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 12:59:01 -0700 Subject: [PATCH 1/8] convert.h: move declarations for conversion from cache.h Before adding the streaming filter API to the conversion layer, move the existing declarations related to the conversion to its own header file. Signed-off-by: Junio C Hamano --- cache.h | 38 +------------------------------------- convert.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 37 deletions(-) create mode 100644 convert.h diff --git a/cache.h b/cache.h index a5067ba12d..c781c118d2 100644 --- a/cache.h +++ b/cache.h @@ -6,6 +6,7 @@ #include "hash.h" #include "advice.h" #include "gettext.h" +#include "convert.h" #include SHA1_HEADER #ifndef git_SHA_CTX @@ -582,35 +583,6 @@ extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; -enum safe_crlf { - SAFE_CRLF_FALSE = 0, - SAFE_CRLF_FAIL = 1, - SAFE_CRLF_WARN = 2 -}; - -extern enum safe_crlf safe_crlf; - -enum auto_crlf { - AUTO_CRLF_FALSE = 0, - AUTO_CRLF_TRUE = 1, - AUTO_CRLF_INPUT = -1 -}; - -extern enum auto_crlf auto_crlf; - -enum eol { - EOL_UNSET, - EOL_CRLF, - EOL_LF, -#ifdef NATIVE_CRLF - EOL_NATIVE = EOL_CRLF -#else - EOL_NATIVE = EOL_LF -#endif -}; - -extern enum eol core_eol; - enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, BRANCH_TRACK_NEVER = 0, @@ -1153,14 +1125,6 @@ extern void trace_strbuf(const char *key, const struct strbuf *buf); void packet_trace_identity(const char *prog); -/* convert.c */ -/* returns 1 if *dst was used */ -extern int convert_to_git(const char *path, const char *src, size_t len, - struct strbuf *dst, enum safe_crlf checksafe); -extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); -extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); -extern int can_bypass_conversion(const char *path); - /* add */ /* * return 0 if success, 1 - if addition of a file failed and diff --git a/convert.h b/convert.h new file mode 100644 index 0000000000..b1b4a382df --- /dev/null +++ b/convert.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef CONVERT_H +#define CONVERT_H + +enum safe_crlf { + SAFE_CRLF_FALSE = 0, + SAFE_CRLF_FAIL = 1, + SAFE_CRLF_WARN = 2 +}; + +extern enum safe_crlf safe_crlf; + +enum auto_crlf { + AUTO_CRLF_FALSE = 0, + AUTO_CRLF_TRUE = 1, + AUTO_CRLF_INPUT = -1 +}; + +extern enum auto_crlf auto_crlf; + +enum eol { + EOL_UNSET, + EOL_CRLF, + EOL_LF, +#ifdef NATIVE_CRLF + EOL_NATIVE = EOL_CRLF +#else + EOL_NATIVE = EOL_LF +#endif +}; + +extern enum eol core_eol; + +/* returns 1 if *dst was used */ +extern int convert_to_git(const char *path, const char *src, size_t len, + struct strbuf *dst, enum safe_crlf checksafe); +extern int convert_to_working_tree(const char *path, const char *src, + size_t len, struct strbuf *dst); +extern int renormalize_buffer(const char *path, const char *src, size_t len, + struct strbuf *dst); +extern int can_bypass_conversion(const char *path); +#endif /* CONVERT_H */ From b6691092d707860019bbab80eaaf9173ada10586 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 14:33:31 -0700 Subject: [PATCH 2/8] Add streaming filter API This introduces an API to plug custom filters to an input stream. The caller gets get_stream_filter("path") to obtain an appropriate filter for the path, and then uses it when opening an input stream via open_istream(). After that, the caller can read from the stream with read_istream(), and close it with close_istream(), just like an unfiltered stream. This only adds a "null" filter that is a pass-thru filter, but later changes can add LF-to-CRLF and other filters, and the callers of the streaming API do not have to change. Signed-off-by: Junio C Hamano --- convert.c | 84 +++++++++++++++++++++++++++++++++++++++---- convert.h | 23 +++++++++++- entry.c | 16 +++++---- streaming.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++- streaming.h | 2 +- 5 files changed, 209 insertions(+), 16 deletions(-) diff --git a/convert.c b/convert.c index 264af1d5ba..1ec91a370e 100644 --- a/convert.c +++ b/convert.c @@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str return ret | convert_to_git(path, src, len, dst, 0); } +/***************************************************************** + * + * Streaming converison support + * + *****************************************************************/ + +typedef int (*filter_fn)(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); +typedef void (*free_fn)(struct stream_filter *); + +struct stream_filter_vtbl { + filter_fn filter; + free_fn free; +}; + +struct stream_filter { + struct stream_filter_vtbl *vtbl; +}; + +static int null_filter_fn(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + size_t count = *isize_p; + if (*osize_p < count) + count = *osize_p; + if (count) { + memmove(output, input, count); + *isize_p -= count; + *osize_p -= count; + } + return 0; +} + +static void null_free_fn(struct stream_filter *filter) +{ + ; /* nothing -- null instances are shared */ +} + +static struct stream_filter_vtbl null_vtbl = { + null_filter_fn, + null_free_fn, +}; + +static struct stream_filter null_filter_singleton = { + &null_vtbl, +}; + +int is_null_stream_filter(struct stream_filter *filter) +{ + return filter == &null_filter_singleton; +} + /* - * You would be crazy to set CRLF, smuge/clean or ident to - * a large binary blob you would want us not to slurp into - * the memory! + * Return an appropriately constructed filter for the path, or NULL if + * the contents cannot be filtered without reading the whole thing + * in-core. + * + * Note that you would be crazy to set CRLF, smuge/clean or ident to a + * large binary blob you would want us not to slurp into the memory! */ -int can_bypass_conversion(const char *path) +struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1) { struct conv_attrs ca; enum crlf_action crlf_action; @@ -828,11 +885,24 @@ int can_bypass_conversion(const char *path) if (ca.ident || (ca.drv && (ca.drv->smudge || ca.drv->clean))) - return 0; + return NULL; crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) || (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) - return 1; - return 0; + return &null_filter_singleton; + + return NULL; +} + +void free_stream_filter(struct stream_filter *filter) +{ + filter->vtbl->free(filter); +} + +int stream_filter(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + return filter->vtbl->filter(filter, input, isize_p, output, osize_p); } diff --git a/convert.h b/convert.h index b1b4a382df..17d7509832 100644 --- a/convert.h +++ b/convert.h @@ -40,5 +40,26 @@ extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); -extern int can_bypass_conversion(const char *path); + +/***************************************************************** + * + * Streaming converison support + * + *****************************************************************/ + +struct stream_filter; /* opaque */ + +extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *); +extern void free_stream_filter(struct stream_filter *); +extern int is_null_stream_filter(struct stream_filter *); + +/* + * Use as much input up to *isize_p and fill output up to *osize_p; + * update isize_p and osize_p to indicate how much buffer space was + * consumed and filled. Return 0 on success, non-zero on error. + */ +extern int stream_filter(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); + #endif /* CONVERT_H */ diff --git a/entry.c b/entry.c index e2dc16c131..852fea1395 100644 --- a/entry.c +++ b/entry.c @@ -116,6 +116,7 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st) } static int streaming_write_entry(struct cache_entry *ce, char *path, + struct stream_filter *filter, const struct checkout *state, int to_tempfile, int *fstat_done, struct stat *statbuf) { @@ -126,7 +127,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, ssize_t kept = 0; int fd = -1; - st = open_istream(ce->sha1, &type, &sz); + st = open_istream(ce->sha1, &type, &sz, filter); if (!st) return -1; if (type != OBJ_BLOB) @@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout size_t wrote, newsize = 0; struct stat st; - if ((ce_mode_s_ifmt == S_IFREG) && - can_bypass_conversion(path) && - !streaming_write_entry(ce, path, state, to_tempfile, - &fstat_done, &st)) - goto finish; + if (ce_mode_s_ifmt == S_IFREG) { + struct stream_filter *filter = get_stream_filter(path, ce->sha1); + if (filter && + !streaming_write_entry(ce, path, filter, + state, to_tempfile, + &fstat_done, &st)) + goto finish; + } switch (ce_mode_s_ifmt) { case S_IFREG: diff --git a/streaming.c b/streaming.c index 0602926644..565f000790 100644 --- a/streaming.c +++ b/streaming.c @@ -41,6 +41,9 @@ struct stream_vtbl { static open_method_decl(incore); static open_method_decl(loose); static open_method_decl(pack_non_delta); +static struct git_istream *attach_stream_filter(struct git_istream *st, + struct stream_filter *filter); + static open_istream_fn open_istream_tbl[] = { open_istream_incore, @@ -48,6 +51,17 @@ static open_istream_fn open_istream_tbl[] = { open_istream_pack_non_delta, }; +#define FILTER_BUFFER (1024*16) + +struct filtered_istream { + struct git_istream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; +}; + struct git_istream { const struct stream_vtbl *vtbl; unsigned long size; /* inflated size of full object */ @@ -72,6 +86,8 @@ struct git_istream { struct packed_git *pack; off_t pos; } in_pack; + + struct filtered_istream filtered; } u; }; @@ -112,7 +128,8 @@ static enum input_source istream_source(const unsigned char *sha1, struct git_istream *open_istream(const unsigned char *sha1, enum object_type *type, - unsigned long *size) + unsigned long *size, + struct stream_filter *filter) { struct git_istream *st; struct object_info oi; @@ -129,6 +146,14 @@ struct git_istream *open_istream(const unsigned char *sha1, return NULL; } } + if (st && filter) { + /* Add "&& !is_null_stream_filter(filter)" for performance */ + struct git_istream *nst = attach_stream_filter(st, filter); + if (!nst) + close_istream(st); + st = nst; + } + *size = st->size; return st; } @@ -147,6 +172,79 @@ static void close_deflated_stream(struct git_istream *st) } +/***************************************************************** + * + * Filtered stream + * + *****************************************************************/ + +static close_method_decl(filtered) +{ + free_stream_filter(st->u.filtered.filter); + return close_istream(st->u.filtered.upstream); +} + +static read_method_decl(filtered) +{ + struct filtered_istream *fs = &(st->u.filtered); + size_t filled = 0; + + while (sz) { + /* do we already have filtered output? */ + if (fs->o_ptr < fs->o_end) { + size_t to_move = fs->o_end - fs->o_ptr; + if (sz < to_move) + to_move = sz; + memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); + fs->o_ptr += to_move; + sz -= to_move; + filled += to_move; + continue; + } + fs->o_end = fs->o_ptr = 0; + + /* do we have anything to feed the filter with? */ + if (fs->i_ptr < fs->i_end) { + size_t to_feed = fs->i_end - fs->i_ptr; + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + fs->ibuf + fs->i_ptr, &to_feed, + fs->obuf, &to_receive)) + return -1; + fs->i_ptr = fs->i_end - to_feed; + fs->o_end = FILTER_BUFFER - to_receive; + continue; + } + fs->i_end = fs->i_ptr = 0; + + /* refill the input from the upstream */ + fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end <= 0) + break; + } + return filled; +} + +static struct stream_vtbl filtered_vtbl = { + close_istream_filtered, + read_istream_filtered, +}; + +static struct git_istream *attach_stream_filter(struct git_istream *st, + struct stream_filter *filter) +{ + struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct filtered_istream *fs = &(ifs->u.filtered); + + ifs->vtbl = &filtered_vtbl; + fs->upstream = st; + fs->filter = filter; + fs->i_end = fs->i_ptr = 0; + fs->o_end = fs->o_ptr = 0; + ifs->size = -1; /* unknown */ + return ifs; +} + /***************************************************************** * * Loose object stream diff --git a/streaming.h b/streaming.h index 18cbe68ac1..589e857b8c 100644 --- a/streaming.h +++ b/streaming.h @@ -8,7 +8,7 @@ /* opaque */ struct git_istream; -extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *); +extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *); extern int close_istream(struct git_istream *); extern ssize_t read_istream(struct git_istream *, char *, size_t); From 4ae6670444388f5ba68850e42a93af4019922c26 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 May 2011 14:05:51 -0700 Subject: [PATCH 3/8] stream filter: add "no more input" to the filters Some filters may need to buffer the input and look-ahead inside it to decide what to output, and they may consume more than zero bytes of input and still not produce any output. After feeding all the input, pass NULL as input as keep calling stream_filter() to let such filters know there is no more input coming, and it is time for them to produce the remaining output based on the buffered input. Signed-off-by: Junio C Hamano --- convert.c | 6 +++++- convert.h | 7 +++++++ streaming.c | 26 +++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/convert.c b/convert.c index 1ec91a370e..4951372db8 100644 --- a/convert.c +++ b/convert.c @@ -838,7 +838,11 @@ static int null_filter_fn(struct stream_filter *filter, const char *input, size_t *isize_p, char *output, size_t *osize_p) { - size_t count = *isize_p; + size_t count; + + if (!input) + return 0; /* we do not keep any states */ + count = *isize_p; if (*osize_p < count) count = *osize_p; if (count) { diff --git a/convert.h b/convert.h index 17d7509832..d799a165b4 100644 --- a/convert.h +++ b/convert.h @@ -57,6 +57,13 @@ extern int is_null_stream_filter(struct stream_filter *); * Use as much input up to *isize_p and fill output up to *osize_p; * update isize_p and osize_p to indicate how much buffer space was * consumed and filled. Return 0 on success, non-zero on error. + * + * Some filters may need to buffer the input and look-ahead inside it + * to decide what to output, and they may consume more than zero bytes + * of input and still not produce any output. After feeding all the + * input, pass NULL as input and keep calling this function, to let + * such filters know there is no more input coming and it is time for + * them to produce the remaining output based on the buffered input. */ extern int stream_filter(struct stream_filter *, const char *input, size_t *isize_p, diff --git a/streaming.c b/streaming.c index 565f000790..91414f4592 100644 --- a/streaming.c +++ b/streaming.c @@ -60,6 +60,7 @@ struct filtered_istream { char obuf[FILTER_BUFFER]; int i_end, i_ptr; int o_end, o_ptr; + int input_finished; }; struct git_istream { @@ -215,12 +216,30 @@ static read_method_decl(filtered) fs->o_end = FILTER_BUFFER - to_receive; continue; } + + /* tell the filter to drain upon no more input */ + if (fs->input_finished) { + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + NULL, NULL, + fs->obuf, &to_receive)) + return -1; + fs->o_end = FILTER_BUFFER - to_receive; + if (!fs->o_end) + break; + continue; + } fs->i_end = fs->i_ptr = 0; /* refill the input from the upstream */ - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); - if (fs->i_end <= 0) - break; + if (!fs->input_finished) { + fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end < 0) + break; + if (fs->i_end) + continue; + } + fs->input_finished = 1; } return filled; } @@ -241,6 +260,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st, fs->filter = filter; fs->i_end = fs->i_ptr = 0; fs->o_end = fs->o_ptr = 0; + fs->input_finished = 0; ifs->size = -1; /* unknown */ return ifs; } From e322ee38ad8d655f5a32b3482ae9ce813b73e4bc Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 16:47:56 -0700 Subject: [PATCH 4/8] Add LF-to-CRLF streaming conversion If we do not have to guess or validate by scanning the input, we can just stream this through. Signed-off-by: Junio C Hamano --- convert.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/convert.c b/convert.c index 4951372db8..c352d66342 100644 --- a/convert.c +++ b/convert.c @@ -872,6 +872,43 @@ int is_null_stream_filter(struct stream_filter *filter) return filter == &null_filter_singleton; } +static int lf_to_crlf_filter_fn(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + size_t count; + + if (!input) + return 0; /* we do not keep any states */ + count = *isize_p; + if (count) { + size_t i, o; + for (i = o = 0; o < *osize_p && i < count; i++) { + char ch = input[i]; + if (ch == '\n') { + if (o + 1 < *osize_p) + output[o++] = '\r'; + else + break; + } + output[o++] = ch; + } + + *osize_p -= o; + *isize_p -= i; + } + return 0; +} + +static struct stream_filter_vtbl lf_to_crlf_vtbl = { + lf_to_crlf_filter_fn, + null_free_fn, +}; + +static struct stream_filter lf_to_crlf_filter_singleton = { + &lf_to_crlf_vtbl, +}; + /* * Return an appropriately constructed filter for the path, or NULL if * the contents cannot be filtered without reading the whole thing @@ -896,6 +933,10 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) return &null_filter_singleton; + if (output_eol(crlf_action) == EOL_CRLF && + !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) + return &lf_to_crlf_filter_singleton; + return NULL; } From b84c7839170ab35342d4ff098083c780075f015a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 18:28:00 -0700 Subject: [PATCH 5/8] streaming filter: ident filter Add support for "ident" filter on the output codepath. This does not work with lf-to-crlf filter together (yet). Signed-off-by: Junio C Hamano --- convert.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 169 insertions(+), 8 deletions(-) diff --git a/convert.c b/convert.c index c352d66342..0c42bcb524 100644 --- a/convert.c +++ b/convert.c @@ -872,6 +872,10 @@ int is_null_stream_filter(struct stream_filter *filter) return filter == &null_filter_singleton; } + +/* + * LF-to-CRLF filter + */ static int lf_to_crlf_filter_fn(struct stream_filter *filter, const char *input, size_t *isize_p, char *output, size_t *osize_p) @@ -909,6 +913,151 @@ static struct stream_filter lf_to_crlf_filter_singleton = { &lf_to_crlf_vtbl, }; + +/* + * ident filter + */ +#define IDENT_DRAINING (-1) +#define IDENT_SKIPPING (-2) +struct ident_filter { + struct stream_filter filter; + struct strbuf left; + int state; + char ident[45]; /* ": x40 $" */ +}; + +static int is_foreign_ident(const char *str) +{ + int i; + + if (prefixcmp(str, "$Id: ")) + return 0; + for (i = 5; str[i]; i++) { + if (isspace(str[i]) && str[i+1] != '$') + return 1; + } + return 0; +} + +static void ident_drain(struct ident_filter *ident, char **output_p, size_t *osize_p) +{ + size_t to_drain = ident->left.len; + + if (*osize_p < to_drain) + to_drain = *osize_p; + if (to_drain) { + memcpy(*output_p, ident->left.buf, to_drain); + strbuf_remove(&ident->left, 0, to_drain); + *output_p += to_drain; + *osize_p -= to_drain; + } + if (!ident->left.len) + ident->state = 0; +} + +static int ident_filter_fn(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + struct ident_filter *ident = (struct ident_filter *)filter; + static const char head[] = "$Id"; + + if (!input) { + /* drain upon eof */ + switch (ident->state) { + default: + strbuf_add(&ident->left, head, ident->state); + case IDENT_SKIPPING: + /* fallthru */ + case IDENT_DRAINING: + ident_drain(ident, &output, osize_p); + } + return 0; + } + + while (*isize_p || (ident->state == IDENT_DRAINING)) { + int ch; + + if (ident->state == IDENT_DRAINING) { + ident_drain(ident, &output, osize_p); + if (!*osize_p) + break; + continue; + } + + ch = *(input++); + (*isize_p)--; + + if (ident->state == IDENT_SKIPPING) { + /* + * Skipping until '$' or LF, but keeping them + * in case it is a foreign ident. + */ + strbuf_addch(&ident->left, ch); + if (ch != '\n' && ch != '$') + continue; + if (ch == '$' && !is_foreign_ident(ident->left.buf)) { + strbuf_setlen(&ident->left, sizeof(head) - 1); + strbuf_addstr(&ident->left, ident->ident); + } + ident->state = IDENT_DRAINING; + continue; + } + + if (ident->state < sizeof(head) && + head[ident->state] == ch) { + ident->state++; + continue; + } + + if (ident->state) + strbuf_add(&ident->left, head, ident->state); + if (ident->state == sizeof(head) - 1) { + if (ch != ':' && ch != '$') { + strbuf_addch(&ident->left, ch); + ident->state = 0; + continue; + } + + if (ch == ':') { + strbuf_addch(&ident->left, ch); + ident->state = IDENT_SKIPPING; + } else { + strbuf_addstr(&ident->left, ident->ident); + ident->state = IDENT_DRAINING; + } + continue; + } + + strbuf_addch(&ident->left, ch); + ident->state = IDENT_DRAINING; + } + return 0; +} + +static void ident_free_fn(struct stream_filter *filter) +{ + struct ident_filter *ident = (struct ident_filter *)filter; + strbuf_release(&ident->left); + free(filter); +} + +static struct stream_filter_vtbl ident_vtbl = { + ident_filter_fn, + ident_free_fn, +}; + +static struct stream_filter *ident_filter(const unsigned char *sha1) +{ + struct ident_filter *ident = xmalloc(sizeof(*ident)); + + sprintf(ident->ident, ": %s $", sha1_to_hex(sha1)); + strbuf_init(&ident->left, 0); + ident->filter.vtbl = &ident_vtbl; + ident->state = 0; + return (struct stream_filter *)ident; +} + /* * Return an appropriately constructed filter for the path, or NULL if * the contents cannot be filtered without reading the whole thing @@ -921,23 +1070,35 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s { struct conv_attrs ca; enum crlf_action crlf_action; + struct stream_filter *filter = NULL; convert_attrs(&ca, path); - if (ca.ident || - (ca.drv && (ca.drv->smudge || ca.drv->clean))) - return NULL; + if (ca.drv && (ca.drv->smudge || ca.drv->clean)) + return filter; + + if (ca.ident) + filter = ident_filter(sha1); crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) || - (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) + (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) { + if (filter) { + free_stream_filter(filter); + return NULL; + } return &null_filter_singleton; - - if (output_eol(crlf_action) == EOL_CRLF && - !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) + } else if (output_eol(crlf_action) == EOL_CRLF && + !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) { + if (filter) { + free_stream_filter(filter); + return NULL; + } return &lf_to_crlf_filter_singleton; + } - return NULL; + return filter; } void free_stream_filter(struct stream_filter *filter) From a265a7f95e6e3ebd81df0c6813b17d61a5ab890a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 May 2011 18:28:41 -0700 Subject: [PATCH 6/8] streaming: filter cascading This implements an internal "cascade" filter mechanism that plugs two filters in series. Signed-off-by: Junio C Hamano --- convert.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 112 insertions(+), 14 deletions(-) diff --git a/convert.c b/convert.c index 0c42bcb524..85939c29be 100644 --- a/convert.c +++ b/convert.c @@ -914,6 +914,112 @@ static struct stream_filter lf_to_crlf_filter_singleton = { }; +/* + * Cascade filter + */ +#define FILTER_BUFFER 1024 +struct cascade_filter { + struct stream_filter filter; + struct stream_filter *one; + struct stream_filter *two; + char buf[FILTER_BUFFER]; + int end, ptr; +}; + +static int cascade_filter_fn(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + struct cascade_filter *cas = (struct cascade_filter *) filter; + size_t filled = 0; + size_t sz = *osize_p; + size_t to_feed, remaining; + + /* + * input -- (one) --> buf -- (two) --> output + */ + while (filled < sz) { + remaining = sz - filled; + + /* do we already have something to feed two with? */ + if (cas->ptr < cas->end) { + to_feed = cas->end - cas->ptr; + if (stream_filter(cas->two, + cas->buf + cas->ptr, &to_feed, + output + filled, &remaining)) + return -1; + cas->ptr += (cas->end - cas->ptr) - to_feed; + filled = sz - remaining; + continue; + } + + /* feed one from upstream and have it emit into our buffer */ + to_feed = input ? *isize_p : 0; + if (input && !to_feed) + break; + remaining = sizeof(cas->buf); + if (stream_filter(cas->one, + input, &to_feed, + cas->buf, &remaining)) + return -1; + cas->end = sizeof(cas->buf) - remaining; + cas->ptr = 0; + if (input) { + size_t fed = *isize_p - to_feed; + *isize_p -= fed; + input += fed; + } + + /* do we know that we drained one completely? */ + if (input || cas->end) + continue; + + /* tell two to drain; we have nothing more to give it */ + to_feed = 0; + remaining = sz - filled; + if (stream_filter(cas->two, + NULL, &to_feed, + output + filled, &remaining)) + return -1; + if (remaining == (sz - filled)) + break; /* completely drained two */ + filled = sz - remaining; + } + *osize_p -= filled; + return 0; +} + +static void cascade_free_fn(struct stream_filter *filter) +{ + struct cascade_filter *cas = (struct cascade_filter *)filter; + free_stream_filter(cas->one); + free_stream_filter(cas->two); + free(filter); +} + +static struct stream_filter_vtbl cascade_vtbl = { + cascade_filter_fn, + cascade_free_fn, +}; + +static struct stream_filter *cascade_filter(struct stream_filter *one, + struct stream_filter *two) +{ + struct cascade_filter *cascade; + + if (!one || is_null_stream_filter(one)) + return two; + if (!two || is_null_stream_filter(two)) + return one; + + cascade = xmalloc(sizeof(*cascade)); + cascade->one = one; + cascade->two = two; + cascade->end = cascade->ptr = 0; + cascade->filter.vtbl = &cascade_vtbl; + return (struct stream_filter *)cascade; +} + /* * ident filter */ @@ -1083,20 +1189,12 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) || - (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) { - if (filter) { - free_stream_filter(filter); - return NULL; - } - return &null_filter_singleton; - } else if (output_eol(crlf_action) == EOL_CRLF && - !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) { - if (filter) { - free_stream_filter(filter); - return NULL; - } - return &lf_to_crlf_filter_singleton; - } + (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) + filter = cascade_filter(filter, &null_filter_singleton); + + else if (output_eol(crlf_action) == EOL_CRLF && + !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) + filter = cascade_filter(filter, &lf_to_crlf_filter_singleton); return filter; } From dd555d8bed255c7949620e6551ea5afd2213bae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 21 May 2011 23:25:06 +0200 Subject: [PATCH 7/8] t0021-conversion.sh: fix NoTerminatingSymbolAtEOF test The last line of the test file "expanded-keywords" ended in a newline, which is a valid terminator for ident. Use printf instead of echo to omit it and thus really test if a file that ends unexpectedly in the middle of an ident tag is handled properly. Also take the oppertunity to calculate the expected ID dynamically instead of hardcoding it into the test script. This should make future changes easier. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t0021-conversion.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 9078b84ae6..275421e48e 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -66,25 +66,26 @@ test_expect_success expanded_in_repo ' echo "\$Id:NoSpaceAtEitherEnd\$" echo "\$Id: NoTerminatingSymbol" echo "\$Id: Foreign Commit With Spaces \$" - echo "\$Id: NoTerminatingSymbolAtEOF" + printf "\$Id: NoTerminatingSymbolAtEOF" } > expanded-keywords && + git add expanded-keywords && + git commit -m "File with keywords expanded" && + id=$(git rev-parse --verify :expanded-keywords) && + { echo "File with expanded keywords" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" - echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$" + echo "\$Id: $id \$" + echo "\$Id: $id \$" + echo "\$Id: $id \$" + echo "\$Id: $id \$" + echo "\$Id: $id \$" + echo "\$Id: $id \$" echo "\$Id: NoTerminatingSymbol" echo "\$Id: Foreign Commit With Spaces \$" - echo "\$Id: NoTerminatingSymbolAtEOF" + printf "\$Id: NoTerminatingSymbolAtEOF" } > expected-output && - git add expanded-keywords && - git commit -m "File with keywords expanded" && - echo "expanded-keywords ident" >> .gitattributes && rm -f expanded-keywords && From 6b6cab3f9af2ab7010592e50bb343b990a6a7666 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 24 May 2011 18:02:48 -0700 Subject: [PATCH 8/8] t0021: test application of both crlf and ident Signed-off-by: Junio C Hamano --- t/t0021-conversion.sh | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 275421e48e..f19e6510d0 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -66,10 +66,14 @@ test_expect_success expanded_in_repo ' echo "\$Id:NoSpaceAtEitherEnd\$" echo "\$Id: NoTerminatingSymbol" echo "\$Id: Foreign Commit With Spaces \$" - printf "\$Id: NoTerminatingSymbolAtEOF" - } > expanded-keywords && + } >expanded-keywords.0 && - git add expanded-keywords && + { + cat expanded-keywords.0 && + printf "\$Id: NoTerminatingSymbolAtEOF" + } >expanded-keywords && + cat expanded-keywords >expanded-keywords-crlf && + git add expanded-keywords expanded-keywords-crlf && git commit -m "File with keywords expanded" && id=$(git rev-parse --verify :expanded-keywords) && @@ -83,15 +87,27 @@ test_expect_success expanded_in_repo ' echo "\$Id: $id \$" echo "\$Id: NoTerminatingSymbol" echo "\$Id: Foreign Commit With Spaces \$" + } >expected-output.0 && + { + cat expected-output.0 && printf "\$Id: NoTerminatingSymbolAtEOF" - } > expected-output && + } >expected-output && + { + append_cr expected-output-crlf && + { + echo "expanded-keywords ident" + echo "expanded-keywords-crlf ident text eol=crlf" + } >>.gitattributes && - echo "expanded-keywords ident" >> .gitattributes && + rm -f expanded-keywords expanded-keywords-crlf && - rm -f expanded-keywords && git checkout -- expanded-keywords && - cat expanded-keywords && - cmp expanded-keywords expected-output + test_cmp expanded-keywords expected-output && + + git checkout -- expanded-keywords-crlf && + test_cmp expanded-keywords-crlf expected-output-crlf ' # The use of %f in a filter definition is expanded to the path to