|
|
|
/*
|
|
|
|
* Utilities for paths and pathnames
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "string-list.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "worktree.h"
|
|
|
|
#include "submodule-config.h"
|
|
|
|
|
cygwin: Remove the Win32 l/stat() implementation
Commit adbc0b6b ("cygwin: Use native Win32 API for stat", 30-09-2008)
added a Win32 specific implementation of the stat functions. In order
to handle absolute paths, cygwin mount points and symbolic links, this
implementation may fall back on the standard cygwin l/stat() functions.
Also, the choice of cygwin or Win32 functions is made lazily (by the
first call(s) to l/stat) based on the state of some config variables.
Unfortunately, this "schizophrenic stat" implementation has been the
source of many problems ever since. For example, see commits 7faee6b8,
79748439, 452993c2, 085479e7, b8a97333, 924aaf3e, 05bab3ea and 0117c2f0.
In order to avoid further problems, such as the issue raised by the new
reference handling API, remove the Win32 l/stat() implementation.
Signed-off-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
static int get_st_mode_bits(const char *path, int *mode)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
*mode = st.st_mode;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char bad_path[] = "/bad-path/";
|
|
|
|
|
|
|
|
static struct strbuf *get_pathname(void)
|
|
|
|
{
|
|
|
|
static struct strbuf pathname_array[4] = {
|
|
|
|
STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
|
|
|
|
};
|
|
|
|
static int index;
|
|
|
|
struct strbuf *sb = &pathname_array[index];
|
|
|
|
index = (index + 1) % ARRAY_SIZE(pathname_array);
|
|
|
|
strbuf_reset(sb);
|
|
|
|
return sb;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *cleanup_path(char *path)
|
|
|
|
{
|
|
|
|
/* Clean it up */
|
|
|
|
if (!memcmp(path, "./", 2)) {
|
|
|
|
path += 2;
|
|
|
|
while (*path == '/')
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void strbuf_cleanup_path(struct strbuf *sb)
|
|
|
|
{
|
|
|
|
char *path = cleanup_path(sb->buf);
|
|
|
|
if (path > sb->buf)
|
|
|
|
strbuf_remove(sb, 0, path - sb->buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *mksnpath(char *buf, size_t n, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
len = vsnprintf(buf, n, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= n) {
|
|
|
|
strlcpy(buf, bad_path, n);
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
return cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dir_prefix(const char *buf, const char *dir)
|
|
|
|
{
|
|
|
|
int len = strlen(dir);
|
|
|
|
return !strncmp(buf, dir, len) &&
|
|
|
|
(is_dir_sep(buf[len]) || buf[len] == '\0');
|
|
|
|
}
|
|
|
|
|
|
|
|
/* $buf =~ m|$dir/+$file| but without regex */
|
|
|
|
static int is_dir_file(const char *buf, const char *dir, const char *file)
|
|
|
|
{
|
|
|
|
int len = strlen(dir);
|
|
|
|
if (strncmp(buf, dir, len) || !is_dir_sep(buf[len]))
|
|
|
|
return 0;
|
|
|
|
while (is_dir_sep(buf[len]))
|
|
|
|
len++;
|
|
|
|
return !strcmp(buf + len, file);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replace_dir(struct strbuf *buf, int len, const char *newdir)
|
|
|
|
{
|
|
|
|
int newlen = strlen(newdir);
|
|
|
|
int need_sep = (buf->buf[len] && !is_dir_sep(buf->buf[len])) &&
|
|
|
|
!is_dir_sep(newdir[newlen - 1]);
|
|
|
|
if (need_sep)
|
|
|
|
len--; /* keep one char, to be replaced with '/' */
|
|
|
|
strbuf_splice(buf, 0, len, newdir, newlen);
|
|
|
|
if (need_sep)
|
|
|
|
buf->buf[newlen] = '/';
|
|
|
|
}
|
|
|
|
|
|
|
|
struct common_dir {
|
|
|
|
/* Not considered garbage for report_linked_checkout_garbage */
|
|
|
|
unsigned ignore_garbage:1;
|
|
|
|
unsigned is_dir:1;
|
|
|
|
/* Not common even though its parent is */
|
|
|
|
unsigned exclude:1;
|
|
|
|
const char *dirname;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct common_dir common_list[] = {
|
|
|
|
{ 0, 1, 0, "branches" },
|
|
|
|
{ 0, 1, 0, "hooks" },
|
|
|
|
{ 0, 1, 0, "info" },
|
|
|
|
{ 0, 0, 1, "info/sparse-checkout" },
|
|
|
|
{ 1, 1, 0, "logs" },
|
|
|
|
{ 1, 1, 1, "logs/HEAD" },
|
|
|
|
{ 0, 1, 1, "logs/refs/bisect" },
|
|
|
|
{ 0, 1, 0, "lost-found" },
|
|
|
|
{ 0, 1, 0, "objects" },
|
|
|
|
{ 0, 1, 0, "refs" },
|
|
|
|
{ 0, 1, 1, "refs/bisect" },
|
|
|
|
{ 0, 1, 0, "remotes" },
|
|
|
|
{ 0, 1, 0, "worktrees" },
|
|
|
|
{ 0, 1, 0, "rr-cache" },
|
|
|
|
{ 0, 1, 0, "svn" },
|
|
|
|
{ 0, 0, 0, "config" },
|
|
|
|
{ 1, 0, 0, "gc.pid" },
|
|
|
|
{ 0, 0, 0, "packed-refs" },
|
|
|
|
{ 0, 0, 0, "shallow" },
|
|
|
|
{ 0, 0, 0, NULL }
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A compressed trie. A trie node consists of zero or more characters that
|
|
|
|
* are common to all elements with this prefix, optionally followed by some
|
|
|
|
* children. If value is not NULL, the trie node is a terminal node.
|
|
|
|
*
|
|
|
|
* For example, consider the following set of strings:
|
|
|
|
* abc
|
|
|
|
* def
|
|
|
|
* definite
|
|
|
|
* definition
|
|
|
|
*
|
|
|
|
* The trie would look like:
|
|
|
|
* root: len = 0, children a and d non-NULL, value = NULL.
|
|
|
|
* a: len = 2, contents = bc, value = (data for "abc")
|
|
|
|
* d: len = 2, contents = ef, children i non-NULL, value = (data for "def")
|
|
|
|
* i: len = 3, contents = nit, children e and i non-NULL, value = NULL
|
|
|
|
* e: len = 0, children all NULL, value = (data for "definite")
|
|
|
|
* i: len = 2, contents = on, children all NULL,
|
|
|
|
* value = (data for "definition")
|
|
|
|
*/
|
|
|
|
struct trie {
|
|
|
|
struct trie *children[256];
|
|
|
|
int len;
|
|
|
|
char *contents;
|
|
|
|
void *value;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trie *make_trie_node(const char *key, void *value)
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
{
|
|
|
|
struct trie *new_node = xcalloc(1, sizeof(*new_node));
|
|
|
|
new_node->len = strlen(key);
|
|
|
|
if (new_node->len) {
|
|
|
|
new_node->contents = xmalloc(new_node->len);
|
|
|
|
memcpy(new_node->contents, key, new_node->len);
|
|
|
|
}
|
|
|
|
new_node->value = value;
|
|
|
|
return new_node;
|
|
|
|
}
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a key/value pair to a trie. The key is assumed to be \0-terminated.
|
|
|
|
* If there was an existing value for this key, return it.
|
|
|
|
*/
|
|
|
|
static void *add_to_trie(struct trie *root, const char *key, void *value)
|
|
|
|
{
|
|
|
|
struct trie *child;
|
|
|
|
void *old;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!*key) {
|
|
|
|
/* we have reached the end of the key */
|
|
|
|
old = root->value;
|
|
|
|
root->value = value;
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < root->len; i++) {
|
|
|
|
if (root->contents[i] == key[i])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split this node: child will contain this node's
|
|
|
|
* existing children.
|
|
|
|
*/
|
|
|
|
child = malloc(sizeof(*child));
|
|
|
|
memcpy(child->children, root->children, sizeof(root->children));
|
|
|
|
|
|
|
|
child->len = root->len - i - 1;
|
|
|
|
if (child->len) {
|
|
|
|
child->contents = xstrndup(root->contents + i + 1,
|
|
|
|
child->len);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
}
|
|
|
|
child->value = root->value;
|
|
|
|
root->value = NULL;
|
|
|
|
root->len = i;
|
|
|
|
|
|
|
|
memset(root->children, 0, sizeof(root->children));
|
|
|
|
root->children[(unsigned char)root->contents[i]] = child;
|
|
|
|
|
|
|
|
/* This is the newly-added child. */
|
|
|
|
root->children[(unsigned char)key[i]] =
|
|
|
|
make_trie_node(key + i + 1, value);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We have matched the entire compressed section */
|
|
|
|
if (key[i]) {
|
|
|
|
child = root->children[(unsigned char)key[root->len]];
|
|
|
|
if (child) {
|
|
|
|
return add_to_trie(child, key + root->len + 1, value);
|
|
|
|
} else {
|
|
|
|
child = make_trie_node(key + root->len + 1, value);
|
|
|
|
root->children[(unsigned char)key[root->len]] = child;
|
|
|
|
return NULL;
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
old = root->value;
|
|
|
|
root->value = value;
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef int (*match_fn)(const char *unmatched, void *data, void *baton);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search a trie for some key. Find the longest /-or-\0-terminated
|
|
|
|
* prefix of the key for which the trie contains a value. Call fn
|
|
|
|
* with the unmatched portion of the key and the found value, and
|
|
|
|
* return its return value. If there is no such prefix, return -1.
|
|
|
|
*
|
|
|
|
* The key is partially normalized: consecutive slashes are skipped.
|
|
|
|
*
|
|
|
|
* For example, consider the trie containing only [refs,
|
|
|
|
* refs/worktree] (both with values).
|
|
|
|
*
|
|
|
|
* | key | unmatched | val from node | return value |
|
|
|
|
* |-----------------|------------|---------------|--------------|
|
|
|
|
* | a | not called | n/a | -1 |
|
|
|
|
* | refs | \0 | refs | as per fn |
|
|
|
|
* | refs/ | / | refs | as per fn |
|
|
|
|
* | refs/w | /w | refs | as per fn |
|
|
|
|
* | refs/worktree | \0 | refs/worktree | as per fn |
|
|
|
|
* | refs/worktree/ | / | refs/worktree | as per fn |
|
|
|
|
* | refs/worktree/a | /a | refs/worktree | as per fn |
|
|
|
|
* |-----------------|------------|---------------|--------------|
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static int trie_find(struct trie *root, const char *key, match_fn fn,
|
|
|
|
void *baton)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int result;
|
|
|
|
struct trie *child;
|
|
|
|
|
|
|
|
if (!*key) {
|
|
|
|
/* we have reached the end of the key */
|
|
|
|
if (root->value && !root->len)
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < root->len; i++) {
|
|
|
|
/* Partial path normalization: skip consecutive slashes. */
|
|
|
|
if (key[i] == '/' && key[i+1] == '/') {
|
|
|
|
key++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (root->contents[i] != key[i])
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Matched the entire compressed section */
|
|
|
|
key += i;
|
|
|
|
if (!*key)
|
|
|
|
/* End of key */
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
|
|
|
|
/* Partial path normalization: skip consecutive slashes */
|
|
|
|
while (key[0] == '/' && key[1] == '/')
|
|
|
|
key++;
|
|
|
|
|
|
|
|
child = root->children[(unsigned char)*key];
|
|
|
|
if (child)
|
|
|
|
result = trie_find(child, key + 1, fn, baton);
|
|
|
|
else
|
|
|
|
result = -1;
|
|
|
|
|
|
|
|
if (result >= 0 || (*key != '/' && *key != 0))
|
|
|
|
return result;
|
|
|
|
if (root->value)
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trie common_trie;
|
|
|
|
static int common_trie_done_setup;
|
|
|
|
|
|
|
|
static void init_common_trie(void)
|
|
|
|
{
|
|
|
|
struct common_dir *p;
|
|
|
|
|
|
|
|
if (common_trie_done_setup)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (p = common_list; p->dirname; p++)
|
|
|
|
add_to_trie(&common_trie, p->dirname, p);
|
|
|
|
|
|
|
|
common_trie_done_setup = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for update_common_dir: returns 1 if the dir
|
|
|
|
* prefix is common.
|
|
|
|
*/
|
|
|
|
static int check_common(const char *unmatched, void *value, void *baton)
|
|
|
|
{
|
|
|
|
struct common_dir *dir = value;
|
|
|
|
|
|
|
|
if (!dir)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
|
|
|
|
return !dir->exclude;
|
|
|
|
|
|
|
|
if (!dir->is_dir && unmatched[0] == 0)
|
|
|
|
return !dir->exclude;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void update_common_dir(struct strbuf *buf, int git_dir_len,
|
|
|
|
const char *common_dir)
|
|
|
|
{
|
|
|
|
char *base = buf->buf + git_dir_len;
|
|
|
|
init_common_trie();
|
|
|
|
if (!common_dir)
|
|
|
|
common_dir = get_git_common_dir();
|
|
|
|
if (trie_find(&common_trie, base, check_common, NULL) > 0)
|
|
|
|
replace_dir(buf, git_dir_len, common_dir);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
}
|
|
|
|
|
|
|
|
void report_linked_checkout_garbage(void)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
const struct common_dir *p;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (!git_common_dir_env)
|
|
|
|
return;
|
|
|
|
strbuf_addf(&sb, "%s/", get_git_dir());
|
|
|
|
len = sb.len;
|
|
|
|
for (p = common_list; p->dirname; p++) {
|
|
|
|
const char *path = p->dirname;
|
|
|
|
if (p->ignore_garbage)
|
|
|
|
continue;
|
|
|
|
strbuf_setlen(&sb, len);
|
|
|
|
strbuf_addstr(&sb, path);
|
|
|
|
if (file_exists(sb.buf))
|
|
|
|
report_garbage(PACKDIR_FILE_GARBAGE, sb.buf);
|
|
|
|
}
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void adjust_git_path(struct strbuf *buf, int git_dir_len)
|
|
|
|
{
|
|
|
|
const char *base = buf->buf + git_dir_len;
|
|
|
|
if (git_graft_env && is_dir_file(base, "info", "grafts"))
|
|
|
|
strbuf_splice(buf, 0, buf->len,
|
|
|
|
get_graft_file(), strlen(get_graft_file()));
|
|
|
|
else if (git_index_env && !strcmp(base, "index"))
|
|
|
|
strbuf_splice(buf, 0, buf->len,
|
|
|
|
get_index_file(), strlen(get_index_file()));
|
|
|
|
else if (git_db_env && dir_prefix(base, "objects"))
|
|
|
|
replace_dir(buf, git_dir_len + 7, get_object_directory());
|
|
|
|
else if (git_hooks_path && dir_prefix(base, "hooks"))
|
|
|
|
replace_dir(buf, git_dir_len + 5, git_hooks_path);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
else if (git_common_dir_env)
|
|
|
|
update_common_dir(buf, git_dir_len, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void do_git_path(const struct worktree *wt, struct strbuf *buf,
|
|
|
|
const char *fmt, va_list args)
|
|
|
|
{
|
|
|
|
int gitdir_len;
|
|
|
|
strbuf_addstr(buf, get_worktree_git_dir(wt));
|
|
|
|
if (buf->len && !is_dir_sep(buf->buf[buf->len - 1]))
|
|
|
|
strbuf_addch(buf, '/');
|
|
|
|
gitdir_len = buf->len;
|
|
|
|
strbuf_vaddf(buf, fmt, args);
|
|
|
|
adjust_git_path(buf, gitdir_len);
|
|
|
|
strbuf_cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
add git_path_buf helper function
If you have a function that uses git_path a lot, but would
prefer to avoid the static buffers, it's useful to keep a
single scratch buffer locally and reuse it for each call.
You used to be able to do this with git_snpath:
char buf[PATH_MAX];
foo(git_snpath(buf, sizeof(buf), "foo"));
bar(git_snpath(buf, sizeof(buf), "bar"));
but since 1a83c24, git_snpath has been replaced with
strbuf_git_path. This is good, because it removes the
arbitrary PATH_MAX limit. But using strbuf_git_path is more
awkward for two reasons:
1. It adds to the buffer, rather than replacing it. This
is consistent with other strbuf functions, but makes
reuse of a single buffer more tedious.
2. It doesn't return the buffer, so you can't format
as part of a function's arguments.
The new git_path_buf solves both of these, so you can use it
like:
struct strbuf buf = STRBUF_INIT;
foo(git_path_buf(&buf, "foo"));
bar(git_path_buf(&buf, "bar"));
strbuf_release(&buf);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
char *git_path_buf(struct strbuf *buf, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
strbuf_reset(buf);
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(NULL, buf, fmt, args);
|
add git_path_buf helper function
If you have a function that uses git_path a lot, but would
prefer to avoid the static buffers, it's useful to keep a
single scratch buffer locally and reuse it for each call.
You used to be able to do this with git_snpath:
char buf[PATH_MAX];
foo(git_snpath(buf, sizeof(buf), "foo"));
bar(git_snpath(buf, sizeof(buf), "bar"));
but since 1a83c24, git_snpath has been replaced with
strbuf_git_path. This is good, because it removes the
arbitrary PATH_MAX limit. But using strbuf_git_path is more
awkward for two reasons:
1. It adds to the buffer, rather than replacing it. This
is consistent with other strbuf functions, but makes
reuse of a single buffer more tedious.
2. It doesn't return the buffer, so you can't format
as part of a function's arguments.
The new git_path_buf solves both of these, so you can use it
like:
struct strbuf buf = STRBUF_INIT;
foo(git_path_buf(&buf, "foo"));
bar(git_path_buf(&buf, "bar"));
strbuf_release(&buf);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
va_end(args);
|
|
|
|
return buf->buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
void strbuf_git_path(struct strbuf *sb, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(NULL, sb, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *git_path(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(NULL, pathname, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return pathname->buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *git_pathdup(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(NULL, &path, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return strbuf_detach(&path, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *mkpathdup(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
strbuf_vaddf(&sb, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
strbuf_cleanup_path(&sb);
|
|
|
|
return strbuf_detach(&sb, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *mkpath(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_start(args, fmt);
|
|
|
|
strbuf_vaddf(pathname, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return cleanup_path(pathname->buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *worktree_git_path(const struct worktree *wt, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(wt, pathname, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return pathname->buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns 0 on success, negative on failure. */
|
|
|
|
#define SUBMODULE_PATH_ERR_NOT_CONFIGURED -1
|
|
|
|
static int do_submodule_path(struct strbuf *buf, const char *path,
|
|
|
|
const char *fmt, va_list args)
|
|
|
|
{
|
|
|
|
const char *git_dir;
|
|
|
|
struct strbuf git_submodule_common_dir = STRBUF_INIT;
|
|
|
|
struct strbuf git_submodule_dir = STRBUF_INIT;
|
|
|
|
const struct submodule *sub;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
strbuf_addstr(buf, path);
|
use strbuf_complete to conditionally append slash
When working with paths in strbufs, we frequently want to
ensure that a directory contains a trailing slash before
appending to it. We can shorten this code (and make the
intent more obvious) by calling strbuf_complete.
Most of these cases are trivially identical conversions, but
there are two things to note:
- in a few cases we did not check that the strbuf is
non-empty (which would lead to an out-of-bounds memory
access). These were generally not triggerable in
practice, either from earlier assertions, or typically
because we would have just fed the strbuf to opendir(),
which would choke on an empty path.
- in a few cases we indexed the buffer with "original_len"
or similar, rather than the current sb->len, and it is
not immediately obvious from the diff that they are the
same. In all of these cases, I manually verified that
the strbuf does not change between the assignment and
the strbuf_complete call.
This does not convert cases which look like:
if (sb->len && !is_dir_sep(sb->buf[sb->len - 1]))
strbuf_addch(sb, '/');
as those are obviously semantically different. Some of these
cases arguably should be doing that, but that is out of
scope for this change, which aims purely for cleanup with no
behavior change (and at least it will make such sites easier
to find and examine in the future, as we can grep for
strbuf_complete).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
strbuf_complete(buf, '/');
|
|
|
|
strbuf_addstr(buf, ".git");
|
|
|
|
|
|
|
|
git_dir = read_gitfile(buf->buf);
|
|
|
|
if (git_dir) {
|
|
|
|
strbuf_reset(buf);
|
|
|
|
strbuf_addstr(buf, git_dir);
|
|
|
|
}
|
|
|
|
if (!is_git_directory(buf->buf)) {
|
|
|
|
gitmodules_config();
|
|
|
|
sub = submodule_from_path(null_sha1, path);
|
|
|
|
if (!sub) {
|
|
|
|
err = SUBMODULE_PATH_ERR_NOT_CONFIGURED;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
strbuf_reset(buf);
|
|
|
|
strbuf_git_path(buf, "%s/%s", "modules", sub->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addch(buf, '/');
|
|
|
|
strbuf_addbuf(&git_submodule_dir, buf);
|
|
|
|
|
|
|
|
strbuf_vaddf(buf, fmt, args);
|
|
|
|
|
|
|
|
if (get_common_dir_noenv(&git_submodule_common_dir, git_submodule_dir.buf))
|
|
|
|
update_common_dir(buf, git_submodule_dir.len, git_submodule_common_dir.buf);
|
|
|
|
|
|
|
|
strbuf_cleanup_path(buf);
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
strbuf_release(&git_submodule_dir);
|
|
|
|
strbuf_release(&git_submodule_common_dir);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *git_pathdup_submodule(const char *path, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
va_list args;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
va_start(args, fmt);
|
|
|
|
err = do_submodule_path(&buf, path, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (err) {
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return strbuf_detach(&buf, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int strbuf_git_path_submodule(struct strbuf *buf, const char *path,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
err = do_submodule_path(buf, path, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void do_git_common_path(struct strbuf *buf,
|
|
|
|
const char *fmt,
|
|
|
|
va_list args)
|
|
|
|
{
|
|
|
|
strbuf_addstr(buf, get_git_common_dir());
|
|
|
|
if (buf->len && !is_dir_sep(buf->buf[buf->len - 1]))
|
|
|
|
strbuf_addch(buf, '/');
|
|
|
|
strbuf_vaddf(buf, fmt, args);
|
|
|
|
strbuf_cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *git_common_path(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_common_path(pathname, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return pathname->buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
void strbuf_git_common_path(struct strbuf *sb, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_common_path(sb, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
int validate_headref(const char *path)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
char *buf, buffer[256];
|
|
|
|
unsigned char sha1[20];
|
|
|
|
int fd;
|
|
|
|
ssize_t len;
|
|
|
|
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Make sure it is a "refs/.." symlink */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
len = readlink(path, buffer, sizeof(buffer)-1);
|
|
|
|
if (len >= 5 && !memcmp("refs/", buffer, 5))
|
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to see if it is a symbolic ref.
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
len = read_in_full(fd, buffer, sizeof(buffer)-1);
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a symbolic ref?
|
|
|
|
*/
|
|
|
|
if (len < 4)
|
|
|
|
return -1;
|
|
|
|
if (!memcmp("ref:", buffer, 4)) {
|
|
|
|
buf = buffer + 4;
|
|
|
|
len -= 4;
|
|
|
|
while (len && isspace(*buf))
|
|
|
|
buf++, len--;
|
|
|
|
if (len >= 5 && !memcmp("refs/", buf, 5))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this a detached HEAD?
|
|
|
|
*/
|
|
|
|
if (!get_sha1_hex(buffer, sha1))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct passwd *getpw_str(const char *username, size_t len)
|
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
struct passwd *pw;
|
|
|
|
char *username_z = xmemdupz(username, len);
|
|
|
|
pw = getpwnam(username_z);
|
|
|
|
free(username_z);
|
|
|
|
return pw;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return a string with ~ and ~user expanded via getpw*. If buf != NULL,
|
|
|
|
* then it is a newly allocated string. Returns NULL on getpw failure or
|
|
|
|
* if path is NULL.
|
|
|
|
*/
|
|
|
|
char *expand_user_path(const char *path)
|
|
|
|
{
|
|
|
|
struct strbuf user_path = STRBUF_INIT;
|
|
|
|
const char *to_copy = path;
|
|
|
|
|
|
|
|
if (path == NULL)
|
|
|
|
goto return_null;
|
|
|
|
if (path[0] == '~') {
|
|
|
|
const char *first_slash = strchrnul(path, '/');
|
|
|
|
const char *username = path + 1;
|
|
|
|
size_t username_len = first_slash - username;
|
|
|
|
if (username_len == 0) {
|
|
|
|
const char *home = getenv("HOME");
|
|
|
|
if (!home)
|
|
|
|
goto return_null;
|
|
|
|
strbuf_addstr(&user_path, home);
|
|
|
|
#ifdef GIT_WINDOWS_NATIVE
|
|
|
|
convert_slashes(user_path.buf);
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
struct passwd *pw = getpw_str(username, username_len);
|
|
|
|
if (!pw)
|
|
|
|
goto return_null;
|
|
|
|
strbuf_addstr(&user_path, pw->pw_dir);
|
|
|
|
}
|
|
|
|
to_copy = first_slash;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
}
|
|
|
|
strbuf_addstr(&user_path, to_copy);
|
|
|
|
return strbuf_detach(&user_path, NULL);
|
|
|
|
return_null:
|
|
|
|
strbuf_release(&user_path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
/*
|
|
|
|
* First, one directory to try is determined by the following algorithm.
|
|
|
|
*
|
|
|
|
* (0) If "strict" is given, the path is used as given and no DWIM is
|
|
|
|
* done. Otherwise:
|
|
|
|
* (1) "~/path" to mean path under the running user's home directory;
|
|
|
|
* (2) "~user/path" to mean path under named user's home directory;
|
|
|
|
* (3) "relative/path" to mean cwd relative directory; or
|
|
|
|
* (4) "/absolute/path" to mean absolute directory.
|
|
|
|
*
|
|
|
|
* Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git"
|
|
|
|
* in this order. We select the first one that is a valid git repository, and
|
|
|
|
* chdir() to it. If none match, or we fail to chdir, we return NULL.
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
*
|
|
|
|
* If all goes well, we return the directory we used to chdir() (but
|
|
|
|
* before ~user is expanded), avoiding getcwd() resolving symbolic
|
|
|
|
* links. User relative paths are also returned as they are given,
|
|
|
|
* except DWIM suffixing.
|
|
|
|
*/
|
|
|
|
const char *enter_repo(const char *path, int strict)
|
|
|
|
{
|
|
|
|
static struct strbuf validated_path = STRBUF_INIT;
|
|
|
|
static struct strbuf used_path = STRBUF_INIT;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
|
|
|
|
if (!path)
|
|
|
|
return NULL;
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
if (!strict) {
|
|
|
|
static const char *suffix[] = {
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
"/.git", "", ".git/.git", ".git", NULL,
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
};
|
|
|
|
const char *gitfile;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
int len = strlen(path);
|
|
|
|
int i;
|
|
|
|
while ((1 < len) && (path[len-1] == '/'))
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
len--;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can handle arbitrary-sized buffers, but this remains as a
|
|
|
|
* sanity check on untrusted input.
|
|
|
|
*/
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
if (PATH_MAX <= len)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
strbuf_reset(&used_path);
|
|
|
|
strbuf_reset(&validated_path);
|
|
|
|
strbuf_add(&used_path, path, len);
|
|
|
|
strbuf_add(&validated_path, path, len);
|
|
|
|
|
|
|
|
if (used_path.buf[0] == '~') {
|
|
|
|
char *newpath = expand_user_path(used_path.buf);
|
|
|
|
if (!newpath)
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
return NULL;
|
|
|
|
strbuf_attach(&used_path, newpath, strlen(newpath),
|
|
|
|
strlen(newpath));
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
}
|
|
|
|
for (i = 0; suffix[i]; i++) {
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
struct stat st;
|
|
|
|
size_t baselen = used_path.len;
|
|
|
|
strbuf_addstr(&used_path, suffix[i]);
|
|
|
|
if (!stat(used_path.buf, &st) &&
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
13 years ago
|
|
|
(S_ISREG(st.st_mode) ||
|
|
|
|
(S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) {
|
|
|
|
strbuf_addstr(&validated_path, suffix[i]);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
break;
|
|
|
|
}
|
|
|
|
strbuf_setlen(&used_path, baselen);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
}
|
|
|
|
if (!suffix[i])
|
|
|
|
return NULL;
|
|
|
|
gitfile = read_gitfile(used_path.buf);
|
|
|
|
if (gitfile) {
|
|
|
|
strbuf_reset(&used_path);
|
|
|
|
strbuf_addstr(&used_path, gitfile);
|
|
|
|
}
|
|
|
|
if (chdir(used_path.buf))
|
|
|
|
return NULL;
|
|
|
|
path = validated_path.buf;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const char *gitfile = read_gitfile(path);
|
|
|
|
if (gitfile)
|
|
|
|
path = gitfile;
|
|
|
|
if (chdir(path))
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_git_directory(".")) {
|
|
|
|
set_git_dir(".");
|
|
|
|
check_repository_format();
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
19 years ago
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int calc_shared_perm(int mode)
|
|
|
|
{
|
|
|
|
int tweak;
|
|
|
|
|
|
|
|
if (get_shared_repository() < 0)
|
|
|
|
tweak = -get_shared_repository();
|
|
|
|
else
|
|
|
|
tweak = get_shared_repository();
|
|
|
|
|
|
|
|
if (!(mode & S_IWUSR))
|
|
|
|
tweak &= ~0222;
|
|
|
|
if (mode & S_IXUSR)
|
|
|
|
/* Copy read bits to execute bits */
|
|
|
|
tweak |= (tweak & 0444) >> 2;
|
|
|
|
if (get_shared_repository() < 0)
|
|
|
|
mode = (mode & ~0777) | tweak;
|
|
|
|
else
|
|
|
|
mode |= tweak;
|
|
|
|
|
|
|
|
return mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int adjust_shared_perm(const char *path)
|
|
|
|
{
|
|
|
|
int old_mode, new_mode;
|
|
|
|
|
|
|
|
if (!get_shared_repository())
|
|
|
|
return 0;
|
|
|
|
if (get_st_mode_bits(path, &old_mode) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
new_mode = calc_shared_perm(old_mode);
|
|
|
|
if (S_ISDIR(old_mode)) {
|
|
|
|
/* Copy read bits to execute bits */
|
|
|
|
new_mode |= (new_mode & 0444) >> 2;
|
|
|
|
new_mode |= FORCE_DIR_SET_GID;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (((old_mode ^ new_mode) & ~S_IFMT) &&
|
|
|
|
chmod(path, (new_mode & ~S_IFMT)) < 0)
|
|
|
|
return -2;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void safe_create_dir(const char *dir, int share)
|
|
|
|
{
|
|
|
|
if (mkdir(dir, 0777) < 0) {
|
|
|
|
if (errno != EEXIST) {
|
|
|
|
perror(dir);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (share && adjust_shared_perm(dir))
|
|
|
|
die(_("Could not make %s writable by group"), dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int have_same_root(const char *path1, const char *path2)
|
|
|
|
{
|
|
|
|
int is_abs1, is_abs2;
|
|
|
|
|
|
|
|
is_abs1 = is_absolute_path(path1);
|
|
|
|
is_abs2 = is_absolute_path(path2);
|
|
|
|
return (is_abs1 && is_abs2 && tolower(path1[0]) == tolower(path2[0])) ||
|
|
|
|
(!is_abs1 && !is_abs2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Give path as relative to prefix.
|
|
|
|
*
|
|
|
|
* The strbuf may or may not be used, so do not assume it contains the
|
|
|
|
* returned path.
|
|
|
|
*/
|
|
|
|
const char *relative_path(const char *in, const char *prefix,
|
|
|
|
struct strbuf *sb)
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
{
|
|
|
|
int in_len = in ? strlen(in) : 0;
|
|
|
|
int prefix_len = prefix ? strlen(prefix) : 0;
|
|
|
|
int in_off = 0;
|
|
|
|
int prefix_off = 0;
|
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
if (!in_len)
|
|
|
|
return "./";
|
|
|
|
else if (!prefix_len)
|
|
|
|
return in;
|
|
|
|
|
|
|
|
if (have_same_root(in, prefix))
|
|
|
|
/* bypass dos_drive, for "c:" is identical to "C:" */
|
|
|
|
i = j = has_dos_drive_prefix(in);
|
|
|
|
else {
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i < prefix_len && j < in_len && prefix[i] == in[j]) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
prefix_off = i;
|
|
|
|
in_off = j;
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
|
|
|
/* "prefix" seems like prefix of "in" */
|
|
|
|
i >= prefix_len &&
|
|
|
|
/*
|
|
|
|
* but "/foo" is not a prefix of "/foobar"
|
|
|
|
* (i.e. prefix not end with '/')
|
|
|
|
*/
|
|
|
|
prefix_off < prefix_len) {
|
|
|
|
if (j >= in_len) {
|
|
|
|
/* in="/a/b", prefix="/a/b" */
|
|
|
|
in_off = in_len;
|
|
|
|
} else if (is_dir_sep(in[j])) {
|
|
|
|
/* in="/a/b/c", prefix="/a/b" */
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
in_off = j;
|
|
|
|
} else {
|
|
|
|
/* in="/a/bbb/c", prefix="/a/b" */
|
|
|
|
i = prefix_off;
|
|
|
|
}
|
|
|
|
} else if (
|
|
|
|
/* "in" is short than "prefix" */
|
|
|
|
j >= in_len &&
|
|
|
|
/* "in" not end with '/' */
|
|
|
|
in_off < in_len) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
/* in="/a/b", prefix="/a/b/c/" */
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
in_off = in_len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
in += in_off;
|
|
|
|
in_len -= in_off;
|
|
|
|
|
|
|
|
if (i >= prefix_len) {
|
|
|
|
if (!in_len)
|
|
|
|
return "./";
|
|
|
|
else
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_reset(sb);
|
|
|
|
strbuf_grow(sb, in_len);
|
|
|
|
|
|
|
|
while (i < prefix_len) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
strbuf_addstr(sb, "../");
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
if (!is_dir_sep(prefix[prefix_len - 1]))
|
|
|
|
strbuf_addstr(sb, "../");
|
|
|
|
|
|
|
|
strbuf_addstr(sb, in);
|
|
|
|
|
|
|
|
return sb->buf;
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A simpler implementation of relative_path
|
|
|
|
*
|
|
|
|
* Get relative path by removing "prefix" from "in". This function
|
|
|
|
* first appears in v1.5.6-1-g044bbbc, and makes git_dir shorter
|
|
|
|
* to increase performance when traversing the path to work_tree.
|
|
|
|
*/
|
|
|
|
const char *remove_leading_path(const char *in, const char *prefix)
|
|
|
|
{
|
|
|
|
static struct strbuf buf = STRBUF_INIT;
|
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
if (!prefix || !prefix[0])
|
|
|
|
return in;
|
|
|
|
while (prefix[i]) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
if (!is_dir_sep(in[j]))
|
|
|
|
return in;
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
continue;
|
|
|
|
} else if (in[j] != prefix[i]) {
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
/* "/foo" is a prefix of "/foo" */
|
|
|
|
in[j] &&
|
|
|
|
/* "/foo" is not a prefix of "/foobar" */
|
|
|
|
!is_dir_sep(prefix[i-1]) && !is_dir_sep(in[j])
|
|
|
|
)
|
|
|
|
return in;
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
if (!in[j])
|
|
|
|
strbuf_addstr(&buf, ".");
|
|
|
|
else
|
|
|
|
strbuf_addstr(&buf, in + j);
|
|
|
|
return buf.buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is okay if dst == src, but they should not overlap otherwise.
|
|
|
|
*
|
|
|
|
* Performs the following normalizations on src, storing the result in dst:
|
|
|
|
* - Ensures that components are separated by '/' (Windows only)
|
|
|
|
* - Squashes sequences of '/'.
|
|
|
|
* - Removes "." components.
|
|
|
|
* - Removes ".." components, and the components the precede them.
|
|
|
|
* Returns failure (non-zero) if a ".." component appears as first path
|
|
|
|
* component anytime during the normalization. Otherwise, returns success (0).
|
|
|
|
*
|
|
|
|
* Note that this function is purely textual. It does not follow symlinks,
|
|
|
|
* verify the existence of the path, or make any system calls.
|
|
|
|
*
|
|
|
|
* prefix_len != NULL is for a specific case of prefix_pathspec():
|
|
|
|
* assume that src == dst and src[0..prefix_len-1] is already
|
|
|
|
* normalized, any time "../" eats up to the prefix_len part,
|
|
|
|
* prefix_len is reduced. In the end prefix_len is the remaining
|
|
|
|
* prefix that has not been overridden by user pathspec.
|
|
|
|
*
|
|
|
|
* NEEDSWORK: This function doesn't perform normalization w.r.t. trailing '/'.
|
|
|
|
* For everything but the root folder itself, the normalized path should not
|
|
|
|
* end with a '/', then the callers need to be fixed up accordingly.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int normalize_path_copy_len(char *dst, const char *src, int *prefix_len)
|
|
|
|
{
|
|
|
|
char *dst0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = has_dos_drive_prefix(src); i > 0; i--)
|
|
|
|
*dst++ = *src++;
|
|
|
|
dst0 = dst;
|
|
|
|
|
|
|
|
if (is_dir_sep(*src)) {
|
|
|
|
*dst++ = '/';
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
char c = *src;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A path component that begins with . could be
|
|
|
|
* special:
|
|
|
|
* (1) "." and ends -- ignore and terminate.
|
|
|
|
* (2) "./" -- ignore them, eat slash and continue.
|
|
|
|
* (3) ".." and ends -- strip one and terminate.
|
|
|
|
* (4) "../" -- strip one, eat slash and continue.
|
|
|
|
*/
|
|
|
|
if (c == '.') {
|
|
|
|
if (!src[1]) {
|
|
|
|
/* (1) */
|
|
|
|
src++;
|
|
|
|
} else if (is_dir_sep(src[1])) {
|
|
|
|
/* (2) */
|
|
|
|
src += 2;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
continue;
|
|
|
|
} else if (src[1] == '.') {
|
|
|
|
if (!src[2]) {
|
|
|
|
/* (3) */
|
|
|
|
src += 2;
|
|
|
|
goto up_one;
|
|
|
|
} else if (is_dir_sep(src[2])) {
|
|
|
|
/* (4) */
|
|
|
|
src += 3;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
goto up_one;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy up to the next '/', and eat all '/' */
|
|
|
|
while ((c = *src++) != '\0' && !is_dir_sep(c))
|
|
|
|
*dst++ = c;
|
|
|
|
if (is_dir_sep(c)) {
|
|
|
|
*dst++ = '/';
|
|
|
|
while (is_dir_sep(c))
|
|
|
|
c = *src++;
|
|
|
|
src--;
|
|
|
|
} else if (!c)
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
up_one:
|
|
|
|
/*
|
|
|
|
* dst0..dst is prefix portion, and dst[-1] is '/';
|
|
|
|
* go up one level.
|
|
|
|
*/
|
|
|
|
dst--; /* go to trailing '/' */
|
|
|
|
if (dst <= dst0)
|
|
|
|
return -1;
|
|
|
|
/* Windows: dst[-1] cannot be backslash anymore */
|
|
|
|
while (dst0 < dst && dst[-1] != '/')
|
|
|
|
dst--;
|
|
|
|
if (prefix_len && *prefix_len > dst - dst0)
|
|
|
|
*prefix_len = dst - dst0;
|
|
|
|
}
|
|
|
|
*dst = '\0';
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int normalize_path_copy(char *dst, const char *src)
|
|
|
|
{
|
|
|
|
return normalize_path_copy_len(dst, src, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* path = Canonical absolute path
|
|
|
|
* prefixes = string_list containing normalized, absolute paths without
|
|
|
|
* trailing slashes (except for the root directory, which is denoted by "/").
|
|
|
|
*
|
|
|
|
* Determines, for each path in prefixes, whether the "prefix"
|
|
|
|
* is an ancestor directory of path. Returns the length of the longest
|
|
|
|
* ancestor directory, excluding any trailing slashes, or -1 if no prefix
|
|
|
|
* is an ancestor. (Note that this means 0 is returned if prefixes is
|
|
|
|
* ["/"].) "/foo" is not considered an ancestor of "/foobar". Directories
|
|
|
|
* are not considered to be their own ancestors. path must be in a
|
|
|
|
* canonical form: empty components, or "." or ".." components are not
|
|
|
|
* allowed.
|
|
|
|
*/
|
|
|
|
int longest_ancestor_length(const char *path, struct string_list *prefixes)
|
|
|
|
{
|
|
|
|
int i, max_len = -1;
|
|
|
|
|
|
|
|
if (!strcmp(path, "/"))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (i = 0; i < prefixes->nr; i++) {
|
|
|
|
const char *ceil = prefixes->items[i].string;
|
|
|
|
int len = strlen(ceil);
|
|
|
|
|
|
|
|
if (len == 1 && ceil[0] == '/')
|
|
|
|
len = 0; /* root matches anything, with length 0 */
|
|
|
|
else if (!strncmp(path, ceil, len) && path[len] == '/')
|
|
|
|
; /* match of length len */
|
|
|
|
else
|
|
|
|
continue; /* no match */
|
|
|
|
|
|
|
|
if (len > max_len)
|
|
|
|
max_len = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* strip arbitrary amount of directory separators at end of path */
|
|
|
|
static inline int chomp_trailing_dir_sep(const char *path, int len)
|
|
|
|
{
|
|
|
|
while (len && is_dir_sep(path[len - 1]))
|
|
|
|
len--;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If path ends with suffix (complete path components), returns the
|
|
|
|
* part before suffix (sans trailing directory separators).
|
|
|
|
* Otherwise returns NULL.
|
|
|
|
*/
|
|
|
|
char *strip_path_suffix(const char *path, const char *suffix)
|
|
|
|
{
|
|
|
|
int path_len = strlen(path), suffix_len = strlen(suffix);
|
|
|
|
|
|
|
|
while (suffix_len) {
|
|
|
|
if (!path_len)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (is_dir_sep(path[path_len - 1])) {
|
|
|
|
if (!is_dir_sep(suffix[suffix_len - 1]))
|
|
|
|
return NULL;
|
|
|
|
path_len = chomp_trailing_dir_sep(path, path_len);
|
|
|
|
suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
|
|
|
|
}
|
|
|
|
else if (path[--path_len] != suffix[--suffix_len])
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (path_len && !is_dir_sep(path[path_len - 1]))
|
|
|
|
return NULL;
|
|
|
|
return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
|
|
|
|
}
|
|
|
|
|
|
|
|
int daemon_avoid_alias(const char *p)
|
|
|
|
{
|
|
|
|
int sl, ndot;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This resurrects the belts and suspenders paranoia check by HPA
|
|
|
|
* done in <435560F7.4080006@zytor.com> thread, now enter_repo()
|
|
|
|
* does not do getcwd() based path canonicalization.
|
|
|
|
*
|
|
|
|
* sl becomes true immediately after seeing '/' and continues to
|
|
|
|
* be true as long as dots continue after that without intervening
|
|
|
|
* non-dot character.
|
|
|
|
*/
|
|
|
|
if (!p || (*p != '/' && *p != '~'))
|
|
|
|
return -1;
|
|
|
|
sl = 1; ndot = 0;
|
|
|
|
p++;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
char ch = *p++;
|
|
|
|
if (sl) {
|
|
|
|
if (ch == '.')
|
|
|
|
ndot++;
|
|
|
|
else if (ch == '/') {
|
|
|
|
if (ndot < 3)
|
|
|
|
/* reject //, /./ and /../ */
|
|
|
|
return -1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0) {
|
|
|
|
if (0 < ndot && ndot < 3)
|
|
|
|
/* reject /.$ and /..$ */
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
sl = ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0)
|
|
|
|
return 0;
|
|
|
|
else if (ch == '/') {
|
|
|
|
sl = 1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
path: add is_ntfs_dotgit() helper
We do not allow paths with a ".git" component to be added to
the index, as that would mean repository contents could
overwrite our repository files. However, asking "is this
path the same as .git" is not as simple as strcmp() on some
filesystems.
On NTFS (and FAT32), there exist so-called "short names" for
backwards-compatibility: 8.3 compliant names that refer to the same files
as their long names. As ".git" is not an 8.3 compliant name, a short name
is generated automatically, typically "git~1".
Depending on the Windows version, any combination of trailing spaces and
periods are ignored, too, so that both "git~1." and ".git." still refer
to the Git directory. The reason is that 8.3 stores file names shorter
than 8 characters with trailing spaces. So literally, it does not matter
for the short name whether it is padded with spaces or whether it is
shorter than 8 characters, it is considered to be the exact same.
The period is the separator between file name and file extension, and
again, an empty extension consists just of spaces in 8.3 format. So
technically, we would need only take care of the equivalent of this
regex:
(\.git {0,4}|git~1 {0,3})\. {0,3}
However, there are indications that at least some Windows versions might
be more lenient and accept arbitrary combinations of trailing spaces and
periods and strip them out. So we're playing it real safe here. Besides,
there can be little doubt about the intention behind using file names
matching even the more lenient pattern specified above, therefore we
should be fine with disallowing such patterns.
Extra care is taken to catch names such as '.\\.git\\booh' because the
backslash is marked as a directory separator only on Windows, and we want
to use this new helper function also in fsck on other platforms.
A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for
the detailed analysis performed to come up with the corresponding fixes
for libgit2.
This commit adds a function to detect whether a given file name can refer
to the Git directory by mistake.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static int only_spaces_and_periods(const char *path, size_t len, size_t skip)
|
|
|
|
{
|
|
|
|
if (len < skip)
|
|
|
|
return 0;
|
|
|
|
len -= skip;
|
|
|
|
path += skip;
|
|
|
|
while (len-- > 0) {
|
|
|
|
char c = *(path++);
|
|
|
|
if (c != ' ' && c != '.')
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_ntfs_dotgit(const char *name)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
|
|
|
|
for (len = 0; ; len++)
|
|
|
|
if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) {
|
|
|
|
if (only_spaces_and_periods(name, len, 4) &&
|
|
|
|
!strncasecmp(name, ".git", 4))
|
|
|
|
return 1;
|
|
|
|
if (only_spaces_and_periods(name, len, 5) &&
|
|
|
|
!strncasecmp(name, "git~1", 5))
|
|
|
|
return 1;
|
|
|
|
if (name[len] != '\\')
|
|
|
|
return 0;
|
|
|
|
name += len + 1;
|
|
|
|
len = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
char *xdg_config_home(const char *filename)
|
|
|
|
{
|
|
|
|
const char *home, *config_home;
|
|
|
|
|
|
|
|
assert(filename);
|
|
|
|
config_home = getenv("XDG_CONFIG_HOME");
|
|
|
|
if (config_home && *config_home)
|
|
|
|
return mkpathdup("%s/git/%s", config_home, filename);
|
|
|
|
|
|
|
|
home = getenv("HOME");
|
|
|
|
if (home)
|
|
|
|
return mkpathdup("%s/.config/git/%s", home, filename);
|
|
|
|
return NULL;
|
|
|
|
}
|
memoize common git-path "constant" files
One of the most common uses of git_path() is to pass a
constant, like git_path("MERGE_MSG"). This has two
drawbacks:
1. The return value is a static buffer, and the lifetime
is dependent on other calls to git_path, etc.
2. There's no compile-time checking of the pathname. This
is OK for a one-off (after all, we have to spell it
correctly at least once), but many of these constant
strings appear throughout the code.
This patch introduces a series of functions to "memoize"
these strings, which are essentially globals for the
lifetime of the program. We compute the value once, take
ownership of the buffer, and return the cached value for
subsequent calls. cache.h provides a helper macro for
defining these functions as one-liners, and defines a few
common ones for global use.
Using a macro is a little bit gross, but it does nicely
document the purpose of the functions. If we need to touch
them all later (e.g., because we learned how to change the
git_dir variable at runtime, and need to invalidate all of
the stored values), it will be much easier to have the
complete list.
Note that the shared-global functions have separate, manual
declarations. We could do something clever with the macros
(e.g., expand it to a declaration in some places, and a
declaration _and_ a definition in path.c). But there aren't
that many, and it's probably better to stay away from
too-magical macros.
Likewise, if we abandon the C preprocessor in favor of
generating these with a script, we could get much fancier.
E.g., normalizing "FOO/BAR-BAZ" into "git_path_foo_bar_baz".
But the small amount of saved typing is probably not worth
the resulting confusion to readers who want to grep for the
function's definition.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
|
|
|
|
GIT_PATH_FUNC(git_path_cherry_pick_head, "CHERRY_PICK_HEAD")
|
|
|
|
GIT_PATH_FUNC(git_path_revert_head, "REVERT_HEAD")
|
|
|
|
GIT_PATH_FUNC(git_path_squash_msg, "SQUASH_MSG")
|
|
|
|
GIT_PATH_FUNC(git_path_merge_msg, "MERGE_MSG")
|
|
|
|
GIT_PATH_FUNC(git_path_merge_rr, "MERGE_RR")
|
|
|
|
GIT_PATH_FUNC(git_path_merge_mode, "MERGE_MODE")
|
|
|
|
GIT_PATH_FUNC(git_path_merge_head, "MERGE_HEAD")
|
|
|
|
GIT_PATH_FUNC(git_path_fetch_head, "FETCH_HEAD")
|
|
|
|
GIT_PATH_FUNC(git_path_shallow, "shallow")
|