|
|
|
#include "cache.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tree-walk.h"
|
|
|
|
#include "attr.h"
|
|
|
|
#include "archive.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "unpack-trees.h"
|
|
|
|
#include "dir.h"
|
|
|
|
|
|
|
|
static char const * const archive_usage[] = {
|
|
|
|
N_("git archive [<options>] <tree-ish> [<path>...]"),
|
|
|
|
N_("git archive --list"),
|
|
|
|
N_("git archive --remote <repo> [--exec <cmd>] [<options>] <tree-ish> [<path>...]"),
|
|
|
|
N_("git archive --remote <repo> [--exec <cmd>] --list"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct archiver **archivers;
|
|
|
|
static int nr_archivers;
|
|
|
|
static int alloc_archivers;
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
static int remote_allow_unreachable;
|
|
|
|
|
|
|
|
void register_archiver(struct archiver *ar)
|
|
|
|
{
|
|
|
|
ALLOC_GROW(archivers, nr_archivers + 1, alloc_archivers);
|
|
|
|
archivers[nr_archivers++] = ar;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void format_subst(const struct commit *commit,
|
|
|
|
const char *src, size_t len,
|
|
|
|
struct strbuf *buf)
|
|
|
|
{
|
|
|
|
char *to_free = NULL;
|
|
|
|
struct strbuf fmt = STRBUF_INIT;
|
|
|
|
struct pretty_print_context ctx = {0};
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
ctx.date_mode.type = DATE_NORMAL;
|
|
|
|
ctx.abbrev = DEFAULT_ABBREV;
|
|
|
|
|
|
|
|
if (src == buf->buf)
|
|
|
|
to_free = strbuf_detach(buf, NULL);
|
|
|
|
for (;;) {
|
|
|
|
const char *b, *c;
|
|
|
|
|
|
|
|
b = memmem(src, len, "$Format:", 8);
|
|
|
|
if (!b)
|
|
|
|
break;
|
|
|
|
c = memchr(b + 8, '$', (src + len) - b - 8);
|
|
|
|
if (!c)
|
|
|
|
break;
|
|
|
|
|
|
|
|
strbuf_reset(&fmt);
|
|
|
|
strbuf_add(&fmt, b + 8, c - b - 8);
|
|
|
|
|
|
|
|
strbuf_add(buf, src, b - src);
|
|
|
|
format_commit_message(commit, fmt.buf, buf, &ctx);
|
|
|
|
len -= c + 1 - src;
|
|
|
|
src = c + 1;
|
|
|
|
}
|
|
|
|
strbuf_add(buf, src, len);
|
|
|
|
strbuf_release(&fmt);
|
|
|
|
free(to_free);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *sha1_file_to_archive(const struct archiver_args *args,
|
|
|
|
const char *path, const unsigned char *sha1,
|
|
|
|
unsigned int mode, enum object_type *type,
|
|
|
|
unsigned long *sizep)
|
|
|
|
{
|
|
|
|
void *buffer;
|
|
|
|
const struct commit *commit = args->convert ? args->commit : NULL;
|
|
|
|
|
|
|
|
path += args->baselen;
|
|
|
|
buffer = read_sha1_file(sha1, type, sizep);
|
|
|
|
if (buffer && S_ISREG(mode)) {
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
size_t size = 0;
|
|
|
|
|
|
|
|
strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
|
|
|
|
convert_to_working_tree(path, buf.buf, buf.len, &buf);
|
|
|
|
if (commit)
|
|
|
|
format_subst(commit, buf.buf, buf.len, &buf);
|
|
|
|
buffer = strbuf_detach(&buf, &size);
|
|
|
|
*sizep = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct directory {
|
|
|
|
struct directory *up;
|
|
|
|
struct object_id oid;
|
|
|
|
int baselen, len;
|
|
|
|
unsigned mode;
|
|
|
|
int stage;
|
|
|
|
char path[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct archiver_context {
|
|
|
|
struct archiver_args *args;
|
|
|
|
write_archive_entry_fn_t write_entry;
|
|
|
|
struct directory *bottom;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attr_check *get_archive_attrs(const char *path)
|
|
|
|
{
|
|
|
|
static struct attr_check *check;
|
|
|
|
if (!check)
|
|
|
|
check = attr_check_initl("export-ignore", "export-subst", NULL);
|
|
|
|
return git_check_attr(path, check) ? NULL : check;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_attr_export_ignore(const struct attr_check *check)
|
|
|
|
{
|
|
|
|
return check && ATTR_TRUE(check->items[0].value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_attr_export_subst(const struct attr_check *check)
|
|
|
|
{
|
|
|
|
return check && ATTR_TRUE(check->items[1].value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int should_queue_directories(const struct archiver_args *args)
|
|
|
|
{
|
|
|
|
return args->pathspec.has_wildcard;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_archive_entry(const unsigned char *sha1, const char *base,
|
|
|
|
int baselen, const char *filename, unsigned mode, int stage,
|
|
|
|
void *context)
|
|
|
|
{
|
|
|
|
static struct strbuf path = STRBUF_INIT;
|
|
|
|
struct archiver_context *c = context;
|
|
|
|
struct archiver_args *args = c->args;
|
|
|
|
write_archive_entry_fn_t write_entry = c->write_entry;
|
|
|
|
int err;
|
|
|
|
const char *path_without_prefix;
|
|
|
|
|
|
|
|
args->convert = 0;
|
|
|
|
strbuf_reset(&path);
|
|
|
|
strbuf_grow(&path, PATH_MAX);
|
|
|
|
strbuf_add(&path, args->base, args->baselen);
|
|
|
|
strbuf_add(&path, base, baselen);
|
|
|
|
strbuf_addstr(&path, filename);
|
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode))
|
|
|
|
strbuf_addch(&path, '/');
|
|
|
|
path_without_prefix = path.buf + args->baselen;
|
|
|
|
|
|
|
|
if (!S_ISDIR(mode) || !should_queue_directories(args)) {
|
|
|
|
const struct attr_check *check;
|
|
|
|
check = get_archive_attrs(path_without_prefix);
|
|
|
|
if (check_attr_export_ignore(check))
|
|
|
|
return 0;
|
|
|
|
args->convert = check_attr_export_subst(check);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
|
|
|
|
err = write_entry(args, sha1, path.buf, path.len, mode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
|
|
|
|
return write_entry(args, sha1, path.buf, path.len, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_archive_entry_buf(const unsigned char *sha1, struct strbuf *base,
|
|
|
|
const char *filename, unsigned mode, int stage,
|
|
|
|
void *context)
|
|
|
|
{
|
|
|
|
return write_archive_entry(sha1, base->buf, base->len,
|
|
|
|
filename, mode, stage, context);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void queue_directory(const unsigned char *sha1,
|
|
|
|
struct strbuf *base, const char *filename,
|
|
|
|
unsigned mode, int stage, struct archiver_context *c)
|
|
|
|
{
|
|
|
|
struct directory *d;
|
|
|
|
size_t len = st_add4(base->len, 1, strlen(filename), 1);
|
|
|
|
d = xmalloc(st_add(sizeof(*d), len));
|
|
|
|
d->up = c->bottom;
|
|
|
|
d->baselen = base->len;
|
|
|
|
d->mode = mode;
|
|
|
|
d->stage = stage;
|
|
|
|
c->bottom = d;
|
avoid sprintf and strcpy with flex arrays
When we are allocating a struct with a FLEX_ARRAY member, we
generally compute the size of the array and then sprintf or
strcpy into it. Normally we could improve a dynamic allocation
like this by using xstrfmt, but it doesn't work here; we
have to account for the size of the rest of the struct.
But we can improve things a bit by storing the length that
we use for the allocation, and then feeding it to xsnprintf
or memcpy, which makes it more obvious that we are not
writing more than the allocated number of bytes.
It would be nice if we had some kind of helper for
allocating generic flex arrays, but it doesn't work that
well:
- the call signature is a little bit unwieldy:
d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...);
You need offsetof here instead of just writing to the
end of the base size, because we don't know how the
struct is packed (partially this is because FLEX_ARRAY
might not be zero, though we can account for that; but
the size of the struct may actually be rounded up for
alignment, and we can't know that).
- some sites do clever things, like over-allocating because
they know they will write larger things into the buffer
later (e.g., struct packed_git here).
So we're better off to just write out each allocation (or
add type-specific helpers, though many of these are one-off
allocations anyway).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename);
|
|
|
|
hashcpy(d->oid.hash, sha1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_directory(struct archiver_context *c)
|
|
|
|
{
|
|
|
|
struct directory *d = c->bottom;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!d)
|
|
|
|
return 0;
|
|
|
|
c->bottom = d->up;
|
|
|
|
d->path[d->len - 1] = '\0'; /* no trailing slash */
|
|
|
|
ret =
|
|
|
|
write_directory(c) ||
|
|
|
|
write_archive_entry(d->oid.hash, d->path, d->baselen,
|
|
|
|
d->path + d->baselen, d->mode,
|
|
|
|
d->stage, c) != READ_TREE_RECURSIVE;
|
|
|
|
free(d);
|
|
|
|
return ret ? -1 : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int queue_or_write_archive_entry(const unsigned char *sha1,
|
|
|
|
struct strbuf *base, const char *filename,
|
|
|
|
unsigned mode, int stage, void *context)
|
|
|
|
{
|
|
|
|
struct archiver_context *c = context;
|
|
|
|
|
|
|
|
while (c->bottom &&
|
|
|
|
!(base->len >= c->bottom->len &&
|
|
|
|
!strncmp(base->buf, c->bottom->path, c->bottom->len))) {
|
|
|
|
struct directory *next = c->bottom->up;
|
|
|
|
free(c->bottom);
|
|
|
|
c->bottom = next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (S_ISDIR(mode)) {
|
|
|
|
size_t baselen = base->len;
|
|
|
|
const struct attr_check *check;
|
|
|
|
|
|
|
|
/* Borrow base, but restore its original value when done. */
|
|
|
|
strbuf_addstr(base, filename);
|
|
|
|
strbuf_addch(base, '/');
|
|
|
|
check = get_archive_attrs(base->buf);
|
|
|
|
strbuf_setlen(base, baselen);
|
|
|
|
|
|
|
|
if (check_attr_export_ignore(check))
|
|
|
|
return 0;
|
|
|
|
queue_directory(sha1, base, filename,
|
|
|
|
mode, stage, c);
|
|
|
|
return READ_TREE_RECURSIVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (write_directory(c))
|
|
|
|
return -1;
|
|
|
|
return write_archive_entry(sha1, base->buf, base->len, filename, mode,
|
|
|
|
stage, context);
|
|
|
|
}
|
|
|
|
|
|
|
|
int write_archive_entries(struct archiver_args *args,
|
|
|
|
write_archive_entry_fn_t write_entry)
|
|
|
|
{
|
|
|
|
struct archiver_context context;
|
|
|
|
struct unpack_trees_options opts;
|
|
|
|
struct tree_desc t;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (args->baselen > 0 && args->base[args->baselen - 1] == '/') {
|
|
|
|
size_t len = args->baselen;
|
|
|
|
|
|
|
|
while (len > 1 && args->base[len - 2] == '/')
|
|
|
|
len--;
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)len, args->base);
|
|
|
|
err = write_entry(args, args->tree->object.oid.hash, args->base,
|
|
|
|
len, 040777);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&context, 0, sizeof(context));
|
|
|
|
context.args = args;
|
|
|
|
context.write_entry = write_entry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup index and instruct attr to read index only
|
|
|
|
*/
|
|
|
|
if (!args->worktree_attributes) {
|
|
|
|
memset(&opts, 0, sizeof(opts));
|
|
|
|
opts.index_only = 1;
|
|
|
|
opts.head_idx = -1;
|
|
|
|
opts.src_index = &the_index;
|
|
|
|
opts.dst_index = &the_index;
|
|
|
|
opts.fn = oneway_merge;
|
|
|
|
init_tree_desc(&t, args->tree->buffer, args->tree->size);
|
|
|
|
if (unpack_trees(1, &t, &opts))
|
|
|
|
return -1;
|
|
|
|
git_attr_set_direction(GIT_ATTR_INDEX, &the_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
err = read_tree_recursive(args->tree, "", 0, 0, &args->pathspec,
|
|
|
|
should_queue_directories(args) ?
|
|
|
|
queue_or_write_archive_entry :
|
|
|
|
write_archive_entry_buf,
|
|
|
|
&context);
|
|
|
|
if (err == READ_TREE_RECURSIVE)
|
|
|
|
err = 0;
|
|
|
|
while (context.bottom) {
|
|
|
|
struct directory *next = context.bottom->up;
|
|
|
|
free(context.bottom);
|
|
|
|
context.bottom = next;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct archiver *lookup_archiver(const char *name)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_archivers; i++) {
|
|
|
|
if (!strcmp(name, archivers[i]->name))
|
|
|
|
return archivers[i];
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reject_entry(const unsigned char *sha1, struct strbuf *base,
|
|
|
|
const char *filename, unsigned mode,
|
|
|
|
int stage, void *context)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
if (S_ISDIR(mode)) {
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
strbuf_addbuf(&sb, base);
|
|
|
|
strbuf_addstr(&sb, filename);
|
|
|
|
if (!match_pathspec(context, sb.buf, sb.len, 0, NULL, 1))
|
|
|
|
ret = READ_TREE_RECURSIVE;
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int path_exists(struct tree *tree, const char *path)
|
|
|
|
{
|
|
|
|
const char *paths[] = { path, NULL };
|
|
|
|
struct pathspec pathspec;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
parse_pathspec(&pathspec, 0, 0, "", paths);
|
|
|
|
pathspec.recursive = 1;
|
|
|
|
ret = read_tree_recursive(tree, "", 0, 0, &pathspec,
|
|
|
|
reject_entry, &pathspec);
|
|
|
|
clear_pathspec(&pathspec);
|
|
|
|
return ret != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_pathspec_arg(const char **pathspec,
|
|
|
|
struct archiver_args *ar_args)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* must be consistent with parse_pathspec in path_exists()
|
|
|
|
* Also if pathspec patterns are dependent, we're in big
|
|
|
|
* trouble as we test each one separately
|
|
|
|
*/
|
|
|
|
parse_pathspec(&ar_args->pathspec, 0,
|
|
|
|
PATHSPEC_PREFER_FULL,
|
|
|
|
"", pathspec);
|
|
|
|
ar_args->pathspec.recursive = 1;
|
|
|
|
if (pathspec) {
|
|
|
|
while (*pathspec) {
|
archive: handle commits with an empty tree
git-archive relies on get_pathspec to convert its argv into
a list of pathspecs. When get_pathspec is given an empty
argv list, it returns a single pathspec, the empty string,
to indicate that everything matches. When we feed this to
our path_exists function, we typically see that the pathspec
turns up at least one item in the tree, and we are happy.
But when our tree is empty, we erroneously think it is
because the pathspec is too limited, when in fact it is
simply that there is nothing to be found in the tree. This
is a weird corner case, but the correct behavior is almost
certainly to produce an empty archive, not to exit with an
error.
This patch teaches git-archive to create empty archives when
there is no pathspec given (we continue to complain if a
pathspec is given, since it obviously is not matched). It
also confirms that the tar and zip writers produce sane
output in this instance.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (**pathspec && !path_exists(ar_args->tree, *pathspec))
|
|
|
|
die(_("pathspec '%s' did not match any files"), *pathspec);
|
|
|
|
pathspec++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_treeish_arg(const char **argv,
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
struct archiver_args *ar_args, const char *prefix,
|
|
|
|
int remote)
|
|
|
|
{
|
|
|
|
const char *name = argv[0];
|
|
|
|
const unsigned char *commit_sha1;
|
|
|
|
time_t archive_time;
|
|
|
|
struct tree *tree;
|
|
|
|
const struct commit *commit;
|
|
|
|
struct object_id oid;
|
|
|
|
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
/* Remotes are only allowed to fetch actual refs */
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
11 years ago
|
|
|
if (remote && !remote_allow_unreachable) {
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
char *ref = NULL;
|
|
|
|
const char *colon = strchrnul(name, ':');
|
|
|
|
int refnamelen = colon - name;
|
|
|
|
|
|
|
|
if (!dwim_ref(name, refnamelen, oid.hash, &ref))
|
|
|
|
die("no such ref: %.*s", refnamelen, name);
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
free(ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (get_sha1(name, oid.hash))
|
|
|
|
die("Not a valid object name");
|
|
|
|
|
Convert lookup_commit* to struct object_id
Convert lookup_commit, lookup_commit_or_die,
lookup_commit_reference, and lookup_commit_reference_gently to take
struct object_id arguments.
Introduce a temporary in parse_object buffer in order to convert this
function. This is required since in order to convert parse_object and
parse_object_buffer, lookup_commit_reference_gently and
lookup_commit_or_die would need to be converted. Not introducing a
temporary would therefore require that lookup_commit_or_die take a
struct object_id *, but lookup_commit would take unsigned char *,
leaving a confusing and hard-to-use interface.
parse_object_buffer will lose this temporary in a later patch.
This commit was created with manual changes to commit.c, commit.h, and
object.c, plus the following semantic patch:
@@
expression E1, E2;
@@
- lookup_commit_reference_gently(E1.hash, E2)
+ lookup_commit_reference_gently(&E1, E2)
@@
expression E1, E2;
@@
- lookup_commit_reference_gently(E1->hash, E2)
+ lookup_commit_reference_gently(E1, E2)
@@
expression E1;
@@
- lookup_commit_reference(E1.hash)
+ lookup_commit_reference(&E1)
@@
expression E1;
@@
- lookup_commit_reference(E1->hash)
+ lookup_commit_reference(E1)
@@
expression E1;
@@
- lookup_commit(E1.hash)
+ lookup_commit(&E1)
@@
expression E1;
@@
- lookup_commit(E1->hash)
+ lookup_commit(E1)
@@
expression E1, E2;
@@
- lookup_commit_or_die(E1.hash, E2)
+ lookup_commit_or_die(&E1, E2)
@@
expression E1, E2;
@@
- lookup_commit_or_die(E1->hash, E2)
+ lookup_commit_or_die(E1, E2)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
8 years ago
|
|
|
commit = lookup_commit_reference_gently(&oid, 1);
|
|
|
|
if (commit) {
|
|
|
|
commit_sha1 = commit->object.oid.hash;
|
|
|
|
archive_time = commit->date;
|
|
|
|
} else {
|
|
|
|
commit_sha1 = NULL;
|
|
|
|
archive_time = time(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
tree = parse_tree_indirect(&oid);
|
|
|
|
if (tree == NULL)
|
|
|
|
die("not a tree object");
|
|
|
|
|
|
|
|
if (prefix) {
|
|
|
|
struct object_id tree_oid;
|
|
|
|
unsigned int mode;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = get_tree_entry(tree->object.oid.hash, prefix,
|
|
|
|
tree_oid.hash, &mode);
|
|
|
|
if (err || !S_ISDIR(mode))
|
|
|
|
die("current working directory is untracked");
|
|
|
|
|
|
|
|
tree = parse_tree_indirect(&tree_oid);
|
|
|
|
}
|
|
|
|
ar_args->tree = tree;
|
|
|
|
ar_args->commit_sha1 = commit_sha1;
|
|
|
|
ar_args->commit = commit;
|
|
|
|
ar_args->time = archive_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define OPT__COMPR(s, v, h, p) \
|
|
|
|
{ OPTION_SET_INT, (s), NULL, (v), NULL, (h), \
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, (p) }
|
|
|
|
#define OPT__COMPR_HIDDEN(s, v, p) \
|
|
|
|
{ OPTION_SET_INT, (s), NULL, (v), NULL, "", \
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_HIDDEN, NULL, (p) }
|
|
|
|
|
|
|
|
static int parse_archive_args(int argc, const char **argv,
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
const struct archiver **ar, struct archiver_args *args,
|
|
|
|
const char *name_hint, int is_remote)
|
|
|
|
{
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
const char *format = NULL;
|
|
|
|
const char *base = NULL;
|
|
|
|
const char *remote = NULL;
|
|
|
|
const char *exec = NULL;
|
|
|
|
const char *output = NULL;
|
|
|
|
int compression_level = -1;
|
|
|
|
int verbose = 0;
|
|
|
|
int i;
|
|
|
|
int list = 0;
|
|
|
|
int worktree_attributes = 0;
|
|
|
|
struct option opts[] = {
|
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
|
|
|
|
OPT_STRING(0, "prefix", &base, N_("prefix"),
|
|
|
|
N_("prepend prefix to each pathname in the archive")),
|
|
|
|
OPT_STRING('o', "output", &output, N_("file"),
|
|
|
|
N_("write the archive to this file")),
|
|
|
|
OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
|
|
|
|
N_("read .gitattributes in working directory")),
|
|
|
|
OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
|
|
|
|
OPT__COMPR('0', &compression_level, N_("store only"), 0),
|
|
|
|
OPT__COMPR('1', &compression_level, N_("compress faster"), 1),
|
|
|
|
OPT__COMPR_HIDDEN('2', &compression_level, 2),
|
|
|
|
OPT__COMPR_HIDDEN('3', &compression_level, 3),
|
|
|
|
OPT__COMPR_HIDDEN('4', &compression_level, 4),
|
|
|
|
OPT__COMPR_HIDDEN('5', &compression_level, 5),
|
|
|
|
OPT__COMPR_HIDDEN('6', &compression_level, 6),
|
|
|
|
OPT__COMPR_HIDDEN('7', &compression_level, 7),
|
|
|
|
OPT__COMPR_HIDDEN('8', &compression_level, 8),
|
|
|
|
OPT__COMPR('9', &compression_level, N_("compress better"), 9),
|
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_BOOL('l', "list", &list,
|
|
|
|
N_("list supported archive formats")),
|
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_STRING(0, "remote", &remote, N_("repo"),
|
|
|
|
N_("retrieve the archive from remote repository <repo>")),
|
|
|
|
OPT_STRING(0, "exec", &exec, N_("command"),
|
|
|
|
N_("path to the remote git-upload-archive command")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, NULL, opts, archive_usage, 0);
|
|
|
|
|
|
|
|
if (remote)
|
|
|
|
die(_("Unexpected option --remote"));
|
|
|
|
if (exec)
|
|
|
|
die(_("Option --exec can only be used together with --remote"));
|
|
|
|
if (output)
|
|
|
|
die(_("Unexpected option --output"));
|
|
|
|
|
|
|
|
if (!base)
|
|
|
|
base = "";
|
|
|
|
|
|
|
|
if (list) {
|
|
|
|
for (i = 0; i < nr_archivers; i++)
|
|
|
|
if (!is_remote || archivers[i]->flags & ARCHIVER_REMOTE)
|
|
|
|
printf("%s\n", archivers[i]->name);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
if (!format && name_hint)
|
|
|
|
format = archive_format_from_filename(name_hint);
|
|
|
|
if (!format)
|
|
|
|
format = "tar";
|
|
|
|
|
|
|
|
/* We need at least one parameter -- tree-ish */
|
|
|
|
if (argc < 1)
|
|
|
|
usage_with_options(archive_usage, opts);
|
|
|
|
*ar = lookup_archiver(format);
|
|
|
|
if (!*ar || (is_remote && !((*ar)->flags & ARCHIVER_REMOTE)))
|
|
|
|
die(_("Unknown archive format '%s'"), format);
|
|
|
|
|
|
|
|
args->compression_level = Z_DEFAULT_COMPRESSION;
|
|
|
|
if (compression_level != -1) {
|
|
|
|
if ((*ar)->flags & ARCHIVER_WANT_COMPRESSION_LEVELS)
|
|
|
|
args->compression_level = compression_level;
|
|
|
|
else {
|
|
|
|
die(_("Argument not supported for format '%s': -%d"),
|
|
|
|
format, compression_level);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
args->verbose = verbose;
|
|
|
|
args->base = base;
|
|
|
|
args->baselen = strlen(base);
|
|
|
|
args->worktree_attributes = worktree_attributes;
|
|
|
|
|
|
|
|
return argc;
|
|
|
|
}
|
|
|
|
|
|
|
|
int write_archive(int argc, const char **argv, const char *prefix,
|
|
|
|
const char *name_hint, int remote)
|
|
|
|
{
|
|
|
|
const struct archiver *ar = NULL;
|
|
|
|
struct archiver_args args;
|
|
|
|
|
|
|
|
git_config_get_bool("uploadarchive.allowunreachable", &remote_allow_unreachable);
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
|
|
|
|
init_tar_archiver();
|
|
|
|
init_zip_archiver();
|
archive: reorder option parsing and config reading
The archive command does three things during its
initialization phase:
1. parse command-line options
2. setup the git directory
3. read config
During phase (1), if we see any options that do not require
a git directory (like "--list"), we handle them immediately
and exit, making it safe to abort step (2) if we are not in
a git directory.
Step (3) must come after step (2), since the git directory
may influence configuration. However, this leaves no
possibility of configuration from step (3) impacting the
command-line options in step (1) (which is useful, for
example, for supporting user-configurable output formats).
Instead, let's reorder this to:
1. setup the git directory, if it exists
2. read config
3. parse command-line options
4. if we are not in a git repository, die
This should have the same external behavior, but puts
configuration before command-line parsing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
|
|
|
|
argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote);
|
|
|
|
if (!startup_info->have_repository) {
|
archive: reorder option parsing and config reading
The archive command does three things during its
initialization phase:
1. parse command-line options
2. setup the git directory
3. read config
During phase (1), if we see any options that do not require
a git directory (like "--list"), we handle them immediately
and exit, making it safe to abort step (2) if we are not in
a git directory.
Step (3) must come after step (2), since the git directory
may influence configuration. However, this leaves no
possibility of configuration from step (3) impacting the
command-line options in step (1) (which is useful, for
example, for supporting user-configurable output formats).
Instead, let's reorder this to:
1. setup the git directory, if it exists
2. read config
3. parse command-line options
4. if we are not in a git repository, die
This should have the same external behavior, but puts
configuration before command-line parsing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
/*
|
|
|
|
* We know this will die() with an error, so we could just
|
|
|
|
* die ourselves; but its error message will be more specific
|
|
|
|
* than what we could write here.
|
|
|
|
*/
|
|
|
|
setup_git_directory();
|
|
|
|
}
|
|
|
|
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
parse_treeish_arg(argv, &args, prefix, remote);
|
|
|
|
parse_pathspec_arg(argv + 1, &args);
|
|
|
|
|
|
|
|
return ar->write_archive(ar, &args);
|
|
|
|
}
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
|
|
|
|
static int match_extension(const char *filename, const char *ext)
|
|
|
|
{
|
|
|
|
int prefixlen = strlen(filename) - strlen(ext);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need 1 character for the '.', and 1 character to ensure that the
|
|
|
|
* prefix is non-empty (k.e., we don't match .tar.gz with no actual
|
|
|
|
* filename).
|
|
|
|
*/
|
|
|
|
if (prefixlen < 2 || filename[prefixlen - 1] != '.')
|
|
|
|
return 0;
|
|
|
|
return !strcmp(filename + prefixlen, ext);
|
|
|
|
}
|
|
|
|
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
const char *archive_format_from_filename(const char *filename)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_archivers; i++)
|
|
|
|
if (match_extension(filename, archivers[i]->name))
|
|
|
|
return archivers[i]->name;
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
14 years ago
|
|
|
return NULL;
|
|
|
|
}
|