Harald Hoyer 2009-03-06 10:48:46 +01:00
parent 883594c833
commit 63efe29bb3
2 changed files with 249 additions and 479 deletions

View File

@ -96,7 +96,7 @@ source_all pre-pivot
getarg break && emergency_shell getarg break && emergency_shell
kill $(pidof udevd) kill $(pidof udevd)
echo "Switching to real root filesystem $root" echo "Switching to real root filesystem $root"
exec switch_root "$NEWROOT" "$INIT" $CMDLINE || { exec switch_root -n "$NEWROOT" || {
# davej doesn't like initrd bugs # davej doesn't like initrd bugs
echo "Something went very badly wrong in the initrd. Please " echo "Something went very badly wrong in the initrd. Please "
echo "file a bug against mkinitrd." echo "file a bug against mkinitrd."

View File

@ -1,10 +1,7 @@
/* /*
* switch_root.c * switchroot.c - switch to new root directory and start init.
* *
* Code to switch from initramfs to system root. * Copyright 2002-2008 Red Hat, Inc. All rights reserved.
* Based on nash.c from mkinitrd
*
* Copyright 2002-2009 Red Hat, Inc. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -19,538 +16,311 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
* *
* Author(s): Erik Troan <ewt@redhat.com> * Authors:
* Jeremy Katz <katzj@redhat.com> * Peter Jones <pjones@redhat.com>
* Peter Jones <pjones@redhat.com> * Jeremy Katz <katzj@redhat.com>
* Harald Hoyer <harald@redhat.com>
*/ */


#define _GNU_SOURCE 1 #define _GNU_SOURCE 1

#include <sys/mount.h> #include <sys/mount.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
#include <dirent.h> #include <stdlib.h>
#include <alloca.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <mntent.h>
#include <stdlib.h>
#include <ctype.h> #include <ctype.h>
#include <sys/mount.h>
#include <fcntl.h>
#include <linux/fs.h>


#ifndef MNT_FORCE #ifndef MS_MOVE
#define MNT_FORCE 0x1 #define MS_MOVE 8192
#endif #endif


#ifndef MNT_DETACH #ifndef MNT_DETACH
#define MNT_DETACH 0x2 #define MNT_DETACH 0x2
#endif #endif


enum {
ok,
err_no_directory,
err_usage,
};


#define asprintfa(str, fmt, ...) ({ \ static int readFD(int fd, char **buf)
char *_tmp = NULL; \
int _rc; \
_rc = asprintf((str), (fmt), __VA_ARGS__); \
if (_rc != -1) { \
_tmp = strdupa(*(str)); \
if (!_tmp) { \
_rc = -1; \
} else { \
free(*(str)); \
*(str) = _tmp; \
} \
} \
_rc; \
})



static inline int
setFdCoe(int fd, int enable)
{ {
int rc; char *p;
long flags = 0; size_t size = 16384;
int s = 0, filesize = 0;


rc = fcntl(fd, F_GETFD, &flags); if (!(*buf = calloc (16384, sizeof (char))))
if (rc < 0) return -1;
return rc;


if (enable) do {
flags |= FD_CLOEXEC; p = *buf + filesize;
else s = read(fd, p, 16384 - s);
flags &= ~FD_CLOEXEC; if (s < 0)
break;
filesize += s;
/* only exit for empty reads */
if (s == 0)
break;
else if (s == 16384) {
*buf = realloc(*buf, size + 16384);
memset(*buf + size, '\0', 16384);
size += s;
s = 0;
} else {
size += s;
}
} while (1);


rc = fcntl(fd, F_SETFD, flags); *buf = realloc(*buf, filesize+1);
return rc; (*buf)[filesize] = '\0';

return *buf ? filesize : -1;
} }


static char * static char *getKernelCmdLine(void)
getArg(char * cmd, char * end, char ** arg)
{ {
char quote = '\0'; static char *cmdline = NULL;
int fd = -1;
int errnum;


if (!cmd || cmd >= end) fd = open("/proc/cmdline", O_RDONLY);
return NULL; if (fd < 0) {
errnum = errno;
fprintf(stderr, "Error: Could not open /proc/cmdline: %m\n");
errno = errnum;
return NULL;
}
if (readFD(fd, &cmdline) < 0) {
errnum = errno;
fprintf(stderr, "Error: could not read /proc/cmdline: %m\n");
close(fd);
errno = errnum;
return NULL;
}
close(fd);


while (isspace(*cmd) && cmd < end) return cmdline;
cmd++;
if (cmd >= end)
return NULL;

if (*cmd == '"')
cmd++, quote = '"';
else if (*cmd == '\'')
cmd++, quote = '\'';

if (quote) {
*arg = cmd;

/* This doesn't support \ escapes */
while (cmd < end && *cmd != quote)
cmd++;

if (cmd == end) {
printf("error: quote mismatch for %s\n", *arg);
return NULL;
}

*cmd = '\0';
cmd++;
} else {
*arg = cmd;
while (!isspace(*cmd) && cmd < end)
cmd++;
*cmd = '\0';
if (**arg == '$')
*arg = getenv(*arg+1);
if (*arg == NULL)
*arg = "";
}

cmd++;

while (isspace(*cmd))
cmd++;

return cmd;
} }


static int /* get the start of a kernel arg "arg". returns everything after it
mountCommand(char * cmd, char * end) * (useful for things like getting the args to init=). so if you only
* want one arg, you need to terminate it at the n */
static char *getKernelArg(char *arg)
{ {
char * fsType = NULL; char *start;
char * device, *spec; char *cmdline;
char * mntPoint; int len;
char * opts = NULL;
int rc = 0;
int flags = MS_MGC_VAL;
char * newOpts;


if (!(cmd = getArg(cmd, end, &spec))) { cmdline = start = getKernelCmdLine();
printf( if (start == NULL)
"usage: mount [--ro] [-o <opts>] -t <type> <device> <mntpoint>\n"); return NULL;
return 1;
}


while (cmd && *spec == '-') { while (*start) {
if (!strcmp(spec, "--ro")) { if (isspace(*start)) {
flags |= MS_RDONLY; start++;
} else if (!strcmp(spec, "--bind")) { continue;
flags = MS_BIND; }
fsType = "none";
} else if (!strcmp(spec, "--move")) {
flags = MS_MOVE;
fsType = "none";
} else if (!strcmp(spec, "-o")) {
cmd = getArg(cmd, end, &opts);
if (!cmd) {
printf("mount: -o requires arguments\n");
return 1;
}
} else if (!strcmp(spec, "-t")) {
if (!(cmd = getArg(cmd, end, &fsType))) {
printf("mount: missing filesystem type\n");
return 1;
}
}


cmd = getArg(cmd, end, &spec); len = strlen(arg);
} /* don't return if it's a different argument that merely starts
* like this one. */
if (strncmp(start, arg, len) == 0) {
if (start[len] == '=')
return start + len + 1;
if (!start[len] || isspace(start[len]))
return start + len;
}
while (*++start && !isspace(*start))
;
}


if (!cmd) { return NULL;
printf("mount: missing device or mountpoint\n");
return 1;
}

if (!(cmd = getArg(cmd, end, &mntPoint))) {
struct mntent *mnt;
FILE *fstab;

fstab = fopen("/etc/fstab", "r");
if (!fstab) {
printf("mount: missing mount point\n");
return 1;
}
do {
if (!(mnt = getmntent(fstab))) {
printf("mount: missing mount point\n");
fclose(fstab);
return 1;
}
if (!strcmp(mnt->mnt_dir, spec)) {
spec = mnt->mnt_fsname;
mntPoint = mnt->mnt_dir;

if (!strcmp(mnt->mnt_type, "bind")) {
flags |= MS_BIND;
fsType = "none";
} else
fsType = mnt->mnt_type;

opts = mnt->mnt_opts;
break;
}
} while(1);

fclose(fstab);
}

if (!fsType) {
printf("mount: filesystem type expected\n");
return 1;
}

if (cmd && cmd < end) {
printf("mount: unexpected arguments\n");
return 1;
}

/* need to deal with options */
if (opts) {
char * end;
char * start = opts;

newOpts = alloca(strlen(opts) + 1);
*newOpts = '\0';

while (*start) {
end = strchr(start, ',');
if (!end) {
end = start + strlen(start);
} else {
*end = '\0';
end++;
}

if (!strcmp(start, "ro"))
flags |= MS_RDONLY;
else if (!strcmp(start, "rw"))
flags &= ~MS_RDONLY;
else if (!strcmp(start, "nosuid"))
flags |= MS_NOSUID;
else if (!strcmp(start, "suid"))
flags &= ~MS_NOSUID;
else if (!strcmp(start, "nodev"))
flags |= MS_NODEV;
else if (!strcmp(start, "dev"))
flags &= ~MS_NODEV;
else if (!strcmp(start, "noexec"))
flags |= MS_NOEXEC;
else if (!strcmp(start, "exec"))
flags &= ~MS_NOEXEC;
else if (!strcmp(start, "sync"))
flags |= MS_SYNCHRONOUS;
else if (!strcmp(start, "async"))
flags &= ~MS_SYNCHRONOUS;
else if (!strcmp(start, "nodiratime"))
flags |= MS_NODIRATIME;
else if (!strcmp(start, "diratime"))
flags &= ~MS_NODIRATIME;
else if (!strcmp(start, "noatime"))
flags |= MS_NOATIME;
else if (!strcmp(start, "atime"))
flags &= ~MS_NOATIME;
else if (!strcmp(start, "relatime"))
flags |= MS_RELATIME;
else if (!strcmp(start, "norelatime"))
flags &= ~MS_RELATIME;
else if (!strcmp(start, "remount"))
flags |= MS_REMOUNT;
else if (!strcmp(start, "bind"))
flags |= MS_BIND;
else if (!strcmp(start, "defaults"))
;
else {
if (*newOpts)
strcat(newOpts, ",");
strcat(newOpts, start);
}

start = end;
}

opts = newOpts;
}

device = strdupa(spec);

if (!device) {
printf("mount: could not find filesystem '%s'\n", spec);
return 1;
}

{
char *mount_opts = NULL;
mount_opts = opts;
if (mount(device, mntPoint, fsType, flags, mount_opts) < 0) {
printf("mount: error mounting %s on %s as %s: %m\n",
device, mntPoint, fsType);
rc = 1;
}
}

return rc;
} }


/* remove all files/directories below dirName -- don't cross mountpoints */ #define MAX_INIT_ARGS 32
static int static int build_init_args(char **init, char ***initargs_out)
recursiveRemove(char * dirName)
{ {
struct stat sb,rb; const char *initprogs[] = { "/sbin/init", "/etc/init",
DIR * dir; "/bin/init", "/bin/sh", NULL };
struct dirent * d; const char *ignoreargs[] = { "console=", "BOOT_IMAGE=", NULL };
char * strBuf = alloca(strlen(dirName) + 1024); char *cmdline = NULL;
char **initargs;


if (!(dir = opendir(dirName))) { int i = 0;
printf("error opening %s: %m\n", dirName);
return 0;
}


if (fstat(dirfd(dir),&rb)) { *init = getKernelArg("init");
printf("unable to stat %s: %m\n", dirName);
closedir(dir);
return 0;
}


errno = 0; if (*init == NULL) {
while ((d = readdir(dir))) { int j;
errno = 0; cmdline = getKernelCmdLine();
if (cmdline == NULL)
return -1;


if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) { for (j = 0; initprogs[j] != NULL; j++) {
errno = 0; if (!access(initprogs[j], X_OK)) {
continue; *init = strdup(initprogs[j]);
} break;
}
}
}


strcpy(strBuf, dirName); initargs = (char **)calloc(MAX_INIT_ARGS+1, sizeof (char *));
strcat(strBuf, "/"); if (initargs == NULL)
strcat(strBuf, d->d_name); return -1;


if (lstat(strBuf, &sb)) { if (cmdline && *init) {
printf("failed to stat %s: %m\n", strBuf); initargs[i++] = *init;
errno = 0; } else {
continue; cmdline = *init;
} initargs[0] = NULL;
}


/* only descend into subdirectories if device is same as dir */ if (cmdline) {
if (S_ISDIR(sb.st_mode)) { char quote = '\0';
if (sb.st_dev == rb.st_dev) { char *chptr;
recursiveRemove(strBuf); char *start;
if (rmdir(strBuf))
printf("failed to rmdir %s: %m\n", strBuf);
}
errno = 0;
continue;
}


if (unlink(strBuf)) { start = chptr = cmdline;
printf("failed to remove %s: %m\n", strBuf); for (; (i < MAX_INIT_ARGS) && (*start != '\0'); i++) {
errno = 0; while (*chptr && (*chptr != quote)) {
continue; if (isspace(*chptr) && quote == '\0')
} break;
} if (*chptr == '"' || *chptr == '\'')
quote = *chptr;
chptr++;
}


if (errno) { if (quote == '"' || quote == '\'')
closedir(dir); chptr++;
printf("error reading from %s: %m\n", dirName); if (*chptr != '\0')
return 1; *(chptr++) = '\0';
}


closedir(dir); /* There are some magic parameters added *after*
* everything you pass, including a console= from the
* x86_64 kernel and BOOT_IMAGE= by syslinux. Bash
* doesn't know what they mean, so it then exits, init
* gets killed, desaster ensues. *sigh*.
*/
int j;
for (j = 0; ignoreargs[j] != NULL; j++) {
if (cmdline == *init && !strncmp(start, ignoreargs[j], strlen(ignoreargs[j]))) {
if (!*chptr)
initargs[i] = NULL;
else
i--;
start = chptr;
break;
}
}
if (start == chptr)
continue;


return 0; if (start[0] == '\0')
i--;
else
initargs[i] = strdup(start);
start = chptr;
}
}

if (initargs[i-1] != NULL)
initargs[i] = NULL;

*initargs_out = initargs;

return 0;
} }


static void static void switchroot(const char *newroot)
mountMntEnt(const struct mntent *mnt)
{ {
char *start = NULL, *end; /* Don't try to unmount the old "/", there's no way to do it. */
char *target = NULL; const char *umounts[] = { "/dev", "/proc", "/sys", NULL };
struct stat sb; char *init, **initargs;
int errnum;
printf("mounting %s\n", mnt->mnt_dir); int rc;
if (asprintfa(&target, ".%s", mnt->mnt_dir) < 0) { int i;
printf("setuproot: out of memory while mounting %s\n",
mnt->mnt_dir); rc = build_init_args(&init, &initargs);
return; if (rc < 0)
} return;

if (stat(target, &sb) < 0) for (i = 0; umounts[i] != NULL; i++) {
return; if (umount2(umounts[i], MNT_DETACH) < 0) {
fprintf(stderr, "Error unmounting old %s: %m\n",
if (asprintf(&start, "-o %s -t %s %s .%s\n", umounts[i]);
mnt->mnt_opts, mnt->mnt_type, mnt->mnt_fsname, fprintf(stderr, "Forcing unmount of %s\n", umounts[i]);
mnt->mnt_dir) < 0) { umount2(umounts[i], MNT_FORCE);
printf("setuproot: out of memory while mounting %s\n", }
mnt->mnt_dir); }
return;
} chdir(newroot);

end = start + 1; if (mount(newroot, "/", NULL, MS_MOVE, NULL) < 0) {
while (*end && (*end != '\n')) errnum = errno;
end++; fprintf(stderr, "switchroot: mount failed: %m\n");
/* end points to the \n at the end of the command */ errno = errnum;
return;
if (mountCommand(start, end) != 0) }
printf("setuproot: mount returned error\n");
if (chroot(".")) {
errnum = errno;
fprintf(stderr, "switchroot: chroot failed: %m\n");
errno = errnum;
return;
}

if (access(initargs[0], X_OK))
fprintf(stderr, "WARNING: can't access %s\n", initargs[0]);

execv(initargs[0], initargs);
return;
} }


static int static void usage(FILE *output)
setuprootCommand(char *new)
{ {
FILE *fp; fprintf(output, "usage: switchroot {-n|--newroot} <newrootdir>\n");

if (output == stderr)
printf("Setting up new root fs\n"); exit(err_usage);

exit(ok);
if (chdir(new)) {
printf("setuproot: chdir(%s) failed: %m\n", new);
return 1;
}

if (mount("/dev", "./dev", NULL, MS_BIND, NULL) < 0)
printf("setuproot: moving /dev failed: %m\n");

fp = setmntent("./etc/fstab.sys", "r");
if (fp)
printf("using fstab.sys from mounted FS\n");
else {
fp = setmntent("/etc/fstab.sys", "r");
if (fp)
printf("using fstab.sys from initrd\n");
}
if (fp) {
struct mntent *mnt;

while((mnt = getmntent(fp)))
mountMntEnt(mnt);
endmntent(fp);
} else {
struct {
char *source;
char *target;
char *type;
int flags;
void *data;
int raise;
} fstab[] = {
{ "/proc", "./proc", "proc", 0, NULL },
{ "/sys", "./sys", "sysfs", 0, NULL },
#if 0
{ "/dev/pts", "./dev/pts", "devpts", 0, "gid=5,mode=620" },
{ "/dev/shm", "./dev/shm", "tmpfs", 0, NULL },
{ "/selinux", "/selinux", "selinuxfs", 0, NULL },
#endif
{ NULL, }
};
int i = 0;

printf("no fstab.sys, mounting internal defaults\n");
for (; fstab[i].source != NULL; i++) {
if (mount(fstab[i].source, fstab[i].target, fstab[i].type,
fstab[i].flags, fstab[i].data) < 0)
printf("setuproot: error mounting %s: %m\n",
fstab[i].source);
}
}

chdir("/");
return 0;
} }


int main(int argc, char **argv) int main(int argc, char *argv[])
{ {
/* Don't try to unmount the old "/", there's no way to do it. */ int i;
const char *umounts[] = { "/dev", "/proc", "/sys", NULL }; char *newroot = NULL;
char *new = NULL;
int fd, i = 0;


argv++; for (i = 1; i < argc; i++) {
new = argv[0]; if (!strcmp(argv[i], "--help")
argv++; || !strcmp(argv[i], "-h")
printf("Switching to root: %s\n", new); || !strcmp(argv[i], "--usage")) {
usage(stdout);
} else if (!strcmp(argv[i], "-n")
|| !strcmp(argv[i], "--newroot")) {
newroot = argv[++i];
} else if (!strncmp(argv[i], "--newroot=", 10)) {
newroot = argv[i] + 10;
} else {
usage(stderr);
}
}


setuprootCommand(new); if (newroot == NULL || newroot[0] == '\0') {
usage(stderr);
}


fd = open("/", O_RDONLY); switchroot(newroot);
for (; umounts[i] != NULL; i++) {
printf("unmounting old %s\n", umounts[i]);
if (umount2(umounts[i], MNT_DETACH) < 0) {
printf("ERROR unmounting old %s: %m\n",umounts[i]);
printf("forcing unmount of %s\n", umounts[i]);
umount2(umounts[i], MNT_FORCE);
}
}
i=0;


chdir(new); fprintf(stderr, "switchroot has failed. Sorry.\n");

return 1;
recursiveRemove("/");

if (mount(new, "/", NULL, MS_MOVE, NULL) < 0) {
printf("switchroot: mount failed: %m\n");
close(fd);
return 1;
}

if (chroot(".")) {
printf("switchroot: chroot() failed: %m\n");
close(fd);
return 1;
}

/* release the old "/" */
close(fd);

close(3);
if ((fd = open("/dev/console", O_RDWR)) < 0) {
printf("ERROR opening /dev/console: %m\n");
printf("Trying to use fd 0 instead.\n");
fd = dup2(0, 3);
} else {
setFdCoe(fd, 0);
if (fd != 3) {
dup2(fd, 3);
close(fd);
fd = 3;
}
}
close(0);
dup2(fd, 0);
close(1);
dup2(fd, 1);
close(2);
dup2(fd, 2);
close(fd);

if (access(argv[0], X_OK)) {
printf("WARNING: can't access %s\n", argv[0]);
}

execv(argv[0], argv);

printf("exec of init (%s) failed!!!: %m\n", argv[0]);
return 1;
} }

/*
* vim:noet:ts=8:sw=8:sts=8
*/