diff -up util-linux-2.23.2/include/pathnames.h.kzak util-linux-2.23.2/include/pathnames.h --- util-linux-2.23.2/include/pathnames.h.kzak 2015-06-26 10:00:19.111877564 +0200 +++ util-linux-2.23.2/include/pathnames.h 2015-06-26 10:00:51.623630869 +0200 @@ -85,6 +85,10 @@ #define _PATH_PROC_LOCKS "/proc/locks" #define _PATH_PROC_CDROMINFO "/proc/sys/dev/cdrom/info" +#define _PATH_PROC_UIDMAP "/proc/self/uid_map" +#define _PATH_PROC_GIDMAP "/proc/self/gid_map" +#define _PATH_PROC_SETGROUPS "/proc/self/setgroups" + #define _PATH_PROC_ATTR_CURRENT "/proc/self/attr/current" #define _PATH_PROC_ATTR_EXEC "/proc/self/attr/exec" #define _PATH_PROC_CAPLASTCAP "/proc/sys/kernel/cap_last_cap" diff -up util-linux-2.23.2/sys-utils/Makemodule.am.kzak util-linux-2.23.2/sys-utils/Makemodule.am diff -up util-linux-2.23.2/sys-utils/nsenter.1.kzak util-linux-2.23.2/sys-utils/nsenter.1 --- util-linux-2.23.2/sys-utils/nsenter.1.kzak 2015-06-26 09:58:39.468633643 +0200 +++ util-linux-2.23.2/sys-utils/nsenter.1 2015-06-26 09:58:51.672541041 +0200 @@ -1,44 +1,45 @@ -.TH NSENTER 1 "January 2013" "util-linux" "User Commands" +.TH NSENTER 1 "June 2013" "util-linux" "User Commands" .SH NAME nsenter \- run program with namespaces of other processes .SH SYNOPSIS .B nsenter -.RI [ options ] -.RI [ program ] -.RI [ arguments ] +[options] +.RI [ program +.RI [ arguments ]] .SH DESCRIPTION Enters the namespaces of one or more other processes and then executes the specified program. Enterable namespaces are: .TP .B mount namespace -mounting and unmounting filesystems will not affect rest of the system +Mounting and unmounting filesystems will not affect the rest of the system .RB ( CLONE_\:NEWNS -flag), except for filesystems which are explicitly marked as shared (by mount ---make-\:shared). See /proc\:/self\:/mountinfo for the shared flag. +flag), except for filesystems which are explicitly marked as shared (with +\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the +\fBshared\fP flag). .TP .B UTS namespace -setting hostname, domainname will not affect rest of the system +Setting hostname or domainname will not affect the rest of the system. .RB ( CLONE_\:NEWUTS -flag). +flag) .TP .B IPC namespace -process will have independent namespace for System V message queues, semaphore -sets and shared memory segments +The process will have an independent namespace for System V message queues, +semaphore sets and shared memory segments. .RB ( CLONE_\:NEWIPC -flag). +flag) .TP .B network namespace -process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall -rules, the +The process will have independent IPv4 and IPv6 stacks, IP routing tables, +firewall rules, the .I /proc\:/net and .I /sys\:/class\:/net -directory trees, sockets etc. +directory trees, sockets, etc. .RB ( CLONE_\:NEWNET -flag). +flag) .TP .B PID namespace -children will have a set of PID to process mappings separate from the +Children will have a set of PID to process mappings separate from the .B nsenter process .RB ( CLONE_\:NEWPID @@ -46,18 +47,18 @@ flag). .B nsenter will fork by default if changing the PID namespace, so that the new program and its children share the same PID namespace and are visible to each other. -If \-\-no\-fork is used, the new program will be exec'ed without forking. -.PP -See the -.BR clone (2) -for exact semantics of the flags. +If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking. .TP -If program is not given, run ``${SHELL}'' (default: /bin\:/sh). +.B user namespace +The process will have a distinct set of UIDs, GIDs and capabilities. +.RB ( CLONE_\:NEWUSER +flag) +.TP +See \fBclone\fP(2) for the exact semantics of the flags. +.TP +If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh). .SH OPTIONS -Argument with square brakets, such as [\fIfile\fR], means optional argument. -Command line syntax to specify optional argument \-\-mount=/path\:/to\:/file. -Please notice the equals sign. .TP \fB\-t\fR, \fB\-\-target\fR \fIpid\fP Specify a target process to get contexts from. The paths to the contexts @@ -83,6 +84,9 @@ the network namespace /proc/\fIpid\fR/ns/pid the PID namespace .TP +/proc/\fIpid\fR/ns/user +the user namespace +.TP /proc/\fIpid\fR/root the root directory .TP @@ -91,51 +95,71 @@ the working directory respectively .PD .RE .TP -\fB\-m\fR, \fB\-\-mount\fR [\fIfile\fR] -Enter the mount namespace. If no file is specified enter the mount namespace -of the target process. If file is specified enter the mount namespace +\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR] +Enter the mount namespace. If no file is specified, enter the mount namespace +of the target process. If file is specified, enter the mount namespace specified by file. .TP -\fB\-u\fR, \fB\-\-uts\fR [\fIfile\fR] -Enter the UTS namespace. If no file is specified enter the UTS namespace of -the target process. If file is specified enter the UTS namespace specified by +\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR] +Enter the UTS namespace. If no file is specified, enter the UTS namespace of +the target process. If file is specified, enter the UTS namespace specified by file. .TP -\fB\-i\fR, \fB\-\-ipc\fR [\fIfile\fR] -Enter the IPC namespace. If no file is specified enter the IPC namespace of -the target process. If file is specified enter the IPC namespace specified by +\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR] +Enter the IPC namespace. If no file is specified, enter the IPC namespace of +the target process. If file is specified, enter the IPC namespace specified by file. .TP -\fB\-n\fR, \fB\-\-net\fR [\fIfile\fR] -Enter the network namespace. If no file is specified enter the network -namespace of the target process. If file is specified enter the network +\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR] +Enter the network namespace. If no file is specified, enter the network +namespace of the target process. If file is specified, enter the network namespace specified by file. .TP -\fB\-p\fR, \fB\-\-pid\fR [\fIfile\fR] -Enter the PID namespace. If no file is specified enter the PID namespace of -the target process. If file is specified enter the PID namespace specified by +\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR] +Enter the PID namespace. If no file is specified, enter the PID namespace of +the target process. If file is specified, enter the PID namespace specified by file. .TP -\fB\-r\fR, \fB\-\-root\fR [\fIdirectory\fR] -Set the root directory. If no directory is specified set the root directory to -the root directory of the target process. If directory is specified set the +\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR] +Enter the user namespace. If no file is specified, enter the user namespace of +the target process. If file is specified, enter the user namespace specified by +file. See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options. +.TP +\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR +Set the group ID which will be used in the entered namespace and drop +supplementary groups. +.BR nsenter (1) +always sets GID for user namespaces, the default is 0. +.TP +\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR +Set the user ID which will be used in the entered namespace. +.BR nsenter (1) +always sets UID for user namespaces, the default is 0. +.TP +\fB\-\-preserve\-credentials\fR +Don't modify UID and GID when enter user namespace. The default is to +drops supplementary groups and sets GID and UID to 0. +.TP +\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR] +Set the root directory. If no directory is specified, set the root directory to +the root directory of the target process. If directory is specified, set the root directory to the specified directory. .TP -\fB\-w\fR, \fB\-\-wd\fR [\fIdirectory\fR] -Set the working directory. If no directory is specified set the working +\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR] +Set the working directory. If no directory is specified, set the working directory to the working directory of the target process. If directory is -specified set the working directory to the specified directory. +specified, set the working directory to the specified directory. .TP -\fB\-F\fR, \fB\-\-no-fork\fR -Do not fork before exec'ing the specified program. By default when entering a -pid namespace enter calls fork before calling exec so that the children will be -in the newly entered pid namespace. +\fB\-F\fR, \fB\-\-no\-fork\fR +Do not fork before exec'ing the specified program. By default, when entering a +PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that +any children will also be in the newly entered PID namespace. .TP \fB\-V\fR, \fB\-\-version\fR Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Print a help message. +Display help text and exit. .SH SEE ALSO .BR setns (2), .BR clone (2) diff -up util-linux-2.23.2/sys-utils/nsenter.c.kzak util-linux-2.23.2/sys-utils/nsenter.c --- util-linux-2.23.2/sys-utils/nsenter.c.kzak 2015-06-26 09:58:39.468633643 +0200 +++ util-linux-2.23.2/sys-utils/nsenter.c 2015-06-26 09:58:51.673541033 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include "strutils.h" #include "nls.h" @@ -42,7 +43,12 @@ static struct namespace_file { int fd; } namespace_files[] = { /* Careful the order is significant in this array. + * + * The user namespace comes first, so that it is entered + * first. This gives an unprivileged user the potential to + * enter the other namespaces. */ + { .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 }, { .nstype = CLONE_NEWIPC, .name = "ns/ipc", .fd = -1 }, { .nstype = CLONE_NEWUTS, .name = "ns/uts", .fd = -1 }, { .nstype = CLONE_NEWNET, .name = "ns/net", .fd = -1 }, @@ -56,18 +62,25 @@ static void usage(int status) FILE *out = status == EXIT_SUCCESS ? stdout : stderr; fputs(USAGE_HEADER, out); - fprintf(out, _(" %s [options] [args...]\n"), + fprintf(out, _(" %s [options] [...]\n"), program_invocation_short_name); + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program with namespaces of other processes.\n"), out); + fputs(USAGE_OPTIONS, out); fputs(_(" -t, --target target process to get namespaces from\n"), out); - fputs(_(" -m, --mount [=] enter mount namespace\n"), out); - fputs(_(" -u, --uts [=] enter UTS namespace (hostname etc)\n"), out); - fputs(_(" -i, --ipc [=] enter System V IPC namespace\n"), out); - fputs(_(" -n, --net [=] enter network namespace\n"), out); - fputs(_(" -p, --pid [=] enter pid namespace\n"), out); - fputs(_(" -r, --root [=] set the root directory\n"), out); - fputs(_(" -w, --wd [=] set the working directory\n"), out); + fputs(_(" -m, --mount[=] enter mount namespace\n"), out); + fputs(_(" -u, --uts[=] enter UTS namespace (hostname etc)\n"), out); + fputs(_(" -i, --ipc[=] enter System V IPC namespace\n"), out); + fputs(_(" -n, --net[=] enter network namespace\n"), out); + fputs(_(" -p, --pid[=] enter pid namespace\n"), out); + fputs(_(" -U, --user[=] enter user namespace\n"), out); + fputs(_(" -S, --setuid set uid in entered namespace\n"), out); + fputs(_(" -G, --setgid set gid in entered namespace\n"), out); + fputs(_(" --preserve-credentials do not touch uids or gids\n"), out); + fputs(_(" -r, --root[=] set the root directory\n"), out); + fputs(_(" -w, --wd[=] set the working directory\n"), out); fputs(_(" -F, --no-fork do not fork before exec'ing \n"), out); fputs(USAGE_SEPARATOR, out); @@ -153,6 +166,9 @@ static void continue_as_child(void) int main(int argc, char *argv[]) { + enum { + OPT_PRESERVE_CRED = CHAR_MAX + 1 + }; static const struct option longopts[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V'}, @@ -162,24 +178,30 @@ int main(int argc, char *argv[]) { "ipc", optional_argument, NULL, 'i' }, { "net", optional_argument, NULL, 'n' }, { "pid", optional_argument, NULL, 'p' }, + { "user", optional_argument, NULL, 'U' }, + { "setuid", required_argument, NULL, 'S' }, + { "setgid", required_argument, NULL, 'G' }, { "root", optional_argument, NULL, 'r' }, { "wd", optional_argument, NULL, 'w' }, { "no-fork", no_argument, NULL, 'F' }, + { "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED }, { NULL, 0, NULL, 0 } }; struct namespace_file *nsfile; - int c, namespaces = 0; - bool do_rd = false, do_wd = false; + int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0; + bool do_rd = false, do_wd = false, force_uid = false, force_gid = false; int do_fork = -1; /* unknown yet */ + uid_t uid = 0; + gid_t gid = 0; - setlocale(LC_MESSAGES, ""); + setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); atexit(close_stdout); while ((c = - getopt_long(argc, argv, "hVt:m::u::i::n::p::r::w::F", + getopt_long(argc, argv, "+hVt:m::u::i::n::p::U::S:G:r::w::F", longopts, NULL)) != -1) { switch (c) { case 'h': @@ -221,6 +243,20 @@ int main(int argc, char *argv[]) else namespaces |= CLONE_NEWPID; break; + case 'U': + if (optarg) + open_namespace_fd(CLONE_NEWUSER, optarg); + else + namespaces |= CLONE_NEWUSER; + break; + case 'S': + uid = strtoul_or_err(optarg, _("failed to parse uid")); + force_uid = true; + break; + case 'G': + gid = strtoul_or_err(optarg, _("failed to parse gid")); + force_gid = true; + break; case 'F': do_fork = 0; break; @@ -236,6 +272,9 @@ int main(int argc, char *argv[]) else do_wd = true; break; + case OPT_PRESERVE_CRED: + preserve_cred = 1; + break; default: usage(EXIT_FAILURE); } @@ -253,6 +292,26 @@ int main(int argc, char *argv[]) open_target_fd(&wd_fd, "cwd", NULL); /* + * Update namespaces variable to contain all requested namespaces + */ + for (nsfile = namespace_files; nsfile->nstype; nsfile++) { + if (nsfile->fd < 0) + continue; + namespaces |= nsfile->nstype; + } + + /* for user namespaces we always set UID and GID (default is 0) + * and clear root's groups if --preserve-credentials is no specified */ + if ((namespaces & CLONE_NEWUSER) && !preserve_cred) { + force_uid = true, force_gid = true; + + /* We call setgroups() before and after we enter user namespace, + * let's complain only if both fail */ + if (setgroups(0, NULL) != 0) + setgroups_nerrs++; + } + + /* * Now that we know which namespaces we want to enter, enter them. */ for (nsfile = namespace_files; nsfile->nstype; nsfile++) { @@ -302,6 +361,15 @@ int main(int argc, char *argv[]) if (do_fork == 1) continue_as_child(); + if (force_uid || force_gid) { + if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs) /* drop supplementary groups */ + err(EXIT_FAILURE, _("setgroups failed")); + if (force_gid && setgid(gid) < 0) /* change GID */ + err(EXIT_FAILURE, _("setgid failed")); + if (force_uid && setuid(uid) < 0) /* change UID */ + err(EXIT_FAILURE, _("setuid failed")); + } + if (optind < argc) { execvp(argv[optind], argv + optind); err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]); diff -up util-linux-2.23.2/sys-utils/unshare.1.kzak util-linux-2.23.2/sys-utils/unshare.1 --- util-linux-2.23.2/sys-utils/unshare.1.kzak 2015-06-26 09:58:39.484633521 +0200 +++ util-linux-2.23.2/sys-utils/unshare.1 2015-06-26 09:58:51.673541033 +0200 @@ -1,28 +1,27 @@ -.\" Process this file with -.\" groff -man -Tascii lscpu.1 -.\" -.TH UNSHARE 1 "July 2013" "util-linux" "User Commands" +.TH UNSHARE 1 "July 2014" "util-linux" "User Commands" .SH NAME unshare \- run program with some namespaces unshared from parent .SH SYNOPSIS .B unshare -.RI [ options ] +[options] .I program .RI [ arguments ] .SH DESCRIPTION Unshares the indicated namespaces from the parent process and then executes -the specified program. The namespaces to be unshared are indicated via +the specified \fIprogram\fR. The namespaces to be unshared are indicated via options. Unshareable namespaces are: .TP .BR "mount namespace" Mounting and unmounting filesystems will not affect the rest of the system (\fBCLONE_NEWNS\fP flag), except for filesystems which are explicitly marked as -shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP for the -\fBshared\fP flags). - -It's recommended to use \fBmount --make-rprivate\fP or \fBmount --make-rslave\fP -after \fBunshare --mount\fP to make sure that mountpoints in the new namespace -are really unshared from parental namespace. +shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or +\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags). +.sp +.B unshare +automatically sets propagation to \fBprivate\fP +in the new mount namespace to make sure that the new namespace is really +unshared. This feature is possible to disable by option \fB\-\-propagation unchanged\fP. +Note that \fBprivate\fP is the kernel default. .TP .BR "UTS namespace" Setting hostname or domainname will not affect the rest of the system. @@ -40,13 +39,14 @@ sockets, etc. (\fBCLONE_NEWNET\fP flag) .BR "pid namespace" Children will have a distinct set of PID to process mappings from their parent. (\fBCLONE_NEWPID\fP flag) +.TP +.BR "user namespace" +The process will have a distinct set of UIDs, GIDs and capabilities. +(\fBCLONE_NEWUSER\fP flag) .PP See \fBclone\fR(2) for the exact semantics of the flags. .SH OPTIONS .TP -.BR \-h , " \-\-help" -Display help text and exit. -.TP .BR \-i , " \-\-ipc" Unshare the IPC namespace. .TP @@ -63,16 +63,68 @@ See also the \fB--fork\fP and \fB--mount .BR \-u , " \-\-uts" Unshare the UTS namespace. .TP +.BR \-U , " \-\-user" +Unshare the user namespace. +.TP .BR \-f , " \-\-fork" Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than running it directly. This is useful when creating a new pid namespace. .TP -.BR \-\-mount-proc "[=\fImountpoint\fP]" -Just before running the program, mount the proc filesystem at the \fImountpoint\fP +.BR \-\-mount\-proc "[=\fImountpoint\fP]" +Just before running the program, mount the proc filesystem at \fImountpoint\fP (default is /proc). This is useful when creating a new pid namespace. It also implies creating a new mount namespace since the /proc mount would otherwise -mess up existing programs on the system. The new proc filesystem is explicitly +mess up existing programs on the system. The new proc filesystem is explicitly mounted as private (by MS_PRIVATE|MS_REC). +.TP +.BR \-r , " \-\-map\-root\-user" +Run the program only after the current effective user and group IDs have been mapped to +the superuser UID and GID in the newly created user namespace. This makes it possible to +conveniently gain capabilities needed to manage various aspects of the newly created +namespaces (such as configuring interfaces in the network namespace or mounting filesystems in +the mount namespace) even when run unprivileged. As a mere convenience feature, it does not support +more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs. +This option implies --setgroups=deny. +.TP +.BR "\-\-propagation \fIprivate|shared|slave|unchanged\fP" +Recursively sets mount propagation flag in the new mount namespace. The default +is to set the propagation to \fIprivate\fP, this feature is possible to disable +by \fIunchanged\fP argument. The options is silently ignored when mount namespace (\fB\-\-mount\fP) +is not requested. +.TP +.BR "\-\-setgroups \fIallow|deny\fP" +Allow or deny +.BR setgroups (2) +syscall in user namespaces. + +.BR setgroups(2) +is only callable with CAP_SETGID and CAP_SETGID in a user +namespace (since Linux 3.19) does not give you permission to call setgroups(2) +until after GID map has been set. The GID map is writable by root when +.BR setgroups(2) +is enabled and GID map becomes writable by unprivileged processes when +.BR setgroups(2) +is permanently disabled. +.TP +.BR \-V , " \-\-version" +Display version information and exit. +.TP +.BR \-h , " \-\-help" +Display help text and exit. +.SH EXAMPLES +.TP +.B # unshare --fork --pid --mount-proc readlink /proc/self +.TQ +1 +.br +Establish a PID namespace, ensure we're PID 1 in it against newly mounted +procfs instance. +.TP +.B $ unshare --map-root-user --user sh -c whoami +.TQ +root +.br +Establish a user namespace as an unprivileged user with a root user within it. .SH SEE ALSO .BR unshare (2), .BR clone (2), diff -up util-linux-2.23.2/sys-utils/unshare.c.kzak util-linux-2.23.2/sys-utils/unshare.c --- util-linux-2.23.2/sys-utils/unshare.c.kzak 2015-06-26 09:58:39.484633521 +0200 +++ util-linux-2.23.2/sys-utils/unshare.c 2015-06-26 09:58:51.673541033 +0200 @@ -32,19 +32,117 @@ #include "nls.h" #include "c.h" +#include "closestream.h" #include "namespace.h" #include "exec_shell.h" #include "xalloc.h" #include "pathnames.h" +#include "all-io.h" +/* 'private' is kernel default */ +#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE) + +enum { + SETGROUPS_NONE = -1, + SETGROUPS_DENY = 0, + SETGROUPS_ALLOW = 1, +}; + +static const char *setgroups_strings[] = +{ + [SETGROUPS_DENY] = "deny", + [SETGROUPS_ALLOW] = "allow" +}; + +static int setgroups_str2id(const char *str) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++) + if (strcmp(str, setgroups_strings[i]) == 0) + return i; + + errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str); +} + +static void setgroups_control(int action) +{ + const char *file = _PATH_PROC_SETGROUPS; + const char *cmd; + int fd; + + if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings)) + return; + cmd = setgroups_strings[action]; + + fd = open(file, O_WRONLY); + if (fd < 0) { + if (errno == ENOENT) + return; + err(EXIT_FAILURE, _("cannot open %s"), file); + } + + if (write_all(fd, cmd, strlen(cmd))) + err(EXIT_FAILURE, _("write failed %s"), file); + close(fd); +} + +static void map_id(const char *file, uint32_t from, uint32_t to) +{ + char *buf; + int fd; + + fd = open(file, O_WRONLY); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), file); + + xasprintf(&buf, "%u %u 1", from, to); + if (write_all(fd, buf, strlen(buf))) + err(EXIT_FAILURE, _("write failed %s"), file); + free(buf); + close(fd); +} + +static unsigned long parse_propagation(const char *str) +{ + size_t i; + static const struct prop_opts { + const char *name; + unsigned long flag; + } opts[] = { + { "slave", MS_REC | MS_SLAVE }, + { "private", MS_REC | MS_PRIVATE }, + { "shared", MS_REC | MS_SHARED }, + { "unchanged", 0 } + }; + + for (i = 0; i < ARRAY_SIZE(opts); i++) { + if (strcmp(opts[i].name, str) == 0) + return opts[i].flag; + } + + errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str); +} + +static void set_propagation(unsigned long flags) +{ + if (flags == 0) + return; + + if (mount("none", "/", NULL, flags, NULL) != 0) + err(EXIT_FAILURE, _("cannot change root filesystem propagation")); +} static void usage(int status) { FILE *out = status == EXIT_SUCCESS ? stdout : stderr; fputs(USAGE_HEADER, out); - fprintf(out, - _(" %s [options] [args...]\n"), program_invocation_short_name); + fprintf(out, _(" %s [options] [...]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program with some namespaces unshared from the parent.\n"), out); fputs(USAGE_OPTIONS, out); fputs(_(" -m, --mount unshare mounts namespace\n"), out); @@ -52,8 +150,13 @@ static void usage(int status) fputs(_(" -i, --ipc unshare System V IPC namespace\n"), out); fputs(_(" -n, --net unshare network namespace\n"), out); fputs(_(" -p, --pid unshare pid namespace\n"), out); + fputs(_(" -U, --user unshare user namespace\n"), out); fputs(_(" -f, --fork fork before launching \n"), out); fputs(_(" --mount-proc[=] mount proc filesystem first (implies --mount)\n"), out); + fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out); + fputs(_(" --propagation \n" + " modify mount propagation in mount namespace\n"), out); + fputs(_(" -s, --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); @@ -66,7 +169,9 @@ static void usage(int status) int main(int argc, char *argv[]) { enum { - OPT_MOUNTPROC = CHAR_MAX + 1 + OPT_MOUNTPROC = CHAR_MAX + 1, + OPT_PROPAGATION, + OPT_SETGROUPS }; static const struct option longopts[] = { { "help", no_argument, 0, 'h' }, @@ -76,20 +181,29 @@ int main(int argc, char *argv[]) { "ipc", no_argument, 0, 'i' }, { "net", no_argument, 0, 'n' }, { "pid", no_argument, 0, 'p' }, + { "user", no_argument, 0, 'U' }, { "fork", no_argument, 0, 'f' }, { "mount-proc", optional_argument, 0, OPT_MOUNTPROC }, + { "map-root-user", no_argument, 0, 'r' }, + { "propagation", required_argument, 0, OPT_PROPAGATION }, + { "setgroups", required_argument, 0, OPT_SETGROUPS }, { NULL, 0, 0, 0 } }; + int setgrpcmd = SETGROUPS_NONE; int unshare_flags = 0; - int c, forkit = 0; + int c, forkit = 0, maproot = 0; const char *procmnt = NULL; + unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT; + uid_t real_euid = geteuid(); + gid_t real_egid = getegid();; setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); + atexit(close_stdout); - while ((c = getopt_long(argc, argv, "+fhVmuinp", longopts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "+fhVmuinpUr", longopts, NULL)) != -1) { switch (c) { case 'f': forkit = 1; @@ -114,10 +228,23 @@ int main(int argc, char *argv[]) case 'p': unshare_flags |= CLONE_NEWPID; break; + case 'U': + unshare_flags |= CLONE_NEWUSER; + break; case OPT_MOUNTPROC: unshare_flags |= CLONE_NEWNS; procmnt = optarg ? optarg : "/proc"; break; + case 'r': + unshare_flags |= CLONE_NEWUSER; + maproot = 1; + break; + case OPT_SETGROUPS: + setgrpcmd = setgroups_str2id(optarg); + break; + case OPT_PROPAGATION: + propagation = parse_propagation(optarg); + break; default: usage(EXIT_FAILURE); } @@ -146,6 +273,25 @@ int main(int argc, char *argv[]) } } + if (maproot) { + if (setgrpcmd == SETGROUPS_ALLOW) + errx(EXIT_FAILURE, _("options --setgroups=allow and " + "--map-root-user are mutually exclusive")); + + /* since Linux 3.19 unprivileged writing of /proc/self/gid_map + * has s been disabled unless /proc/self/setgroups is written + * first to permanently disable the ability to call setgroups + * in that user namespace. */ + setgroups_control(SETGROUPS_DENY); + map_id(_PATH_PROC_UIDMAP, 0, real_euid); + map_id(_PATH_PROC_GIDMAP, 0, real_egid); + + } else if (setgrpcmd != SETGROUPS_NONE) + setgroups_control(setgrpcmd); + + if ((unshare_flags & CLONE_NEWNS) && propagation) + set_propagation(propagation); + if (procmnt && (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 || mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))