From 4693034138c5c4ec7142f46aff2c5c928c7aac58 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Wed, 13 Jan 2021 16:05:57 +0100 Subject: [PATCH] allow unshare as root user - this is useful when you are already root and want the benefits of unsharing the mount namespace to prevent messing up your system - if the unshare mode is used as root, the user namespace is not unshared anymore and newuidmap, setuid and friends are not called anymore - if the unshare mode is used as non-root test if the user namespace can be unshared, otherwise test if the mount namespace can be unshared --- coverage.sh | 23 +++++++++-------------- mmdebstrap | 51 +++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/coverage.sh b/coverage.sh index 0360a89..54cc0e0 100755 --- a/coverage.sh +++ b/coverage.sh @@ -498,29 +498,24 @@ else runtests=$((runtests+1)) fi -print_header "mode=unshare,variant=apt: fail with unshare as root user" +print_header "mode=unshare,variant=apt: unshare as root user" cat << END > shared/test.sh #!/bin/sh set -eu export LC_ALL=C.UTF-8 -if [ ! -e /mmdebstrap-testenv ]; then - echo "this test modifies the system and should only be run inside a container" >&2 - exit 1 -fi -sysctl -w kernel.unprivileged_userns_clone=1 -ret=0 -$CMD --mode=unshare --variant=apt $DEFAULT_DIST /tmp/debian-chroot $mirror || ret=\$? -if [ "\$ret" = 0 ]; then - echo expected failure but got exit \$ret >&2 - exit 1 -fi +[ "\$(whoami)" = "root" ] +$CMD --mode=unshare --variant=apt \ + --customize-hook='chroot "\$1" sh -c "test -e /proc/self/fd"' \ + $DEFAULT_DIST /tmp/debian-chroot.tar $mirror +tar -tf /tmp/debian-chroot.tar | sort | diff -u tar1.txt - +rm /tmp/debian-chroot.tar END if [ "$HAVE_QEMU" = "yes" ]; then ./run_qemu.sh runtests=$((runtests+1)) else - echo "HAVE_QEMU != yes -- Skipping test..." >&2 - skipped=$((skipped+1)) + ./run_null.sh SUDO + runtests=$((runtests+1)) fi for variant in essential apt minbase buildd important standard; do diff --git a/mmdebstrap b/mmdebstrap index 0f0d739..f1dc4a2 100755 --- a/mmdebstrap +++ b/mmdebstrap @@ -223,10 +223,10 @@ sub get_tar_compressor { return; } -sub test_unshare { +sub test_unshare_userns { my $verbose = shift; if ($EFFECTIVE_USER_ID == 0) { - my $msg = "cannot use unshare mode when executing as root"; + my $msg = "cannot unshare user namespace when executing as root"; if ($verbose) { warning $msg; } else { @@ -401,9 +401,14 @@ sub get_unshare_cmd { my $cmd = shift; my $idmap = shift; + # unsharing the mount namespace (NEWNS) requires CAP_SYS_ADMIN my $unshare_flags - = $CLONE_NEWUSER | $CLONE_NEWNS | $CLONE_NEWPID | $CLONE_NEWUTS - | $CLONE_NEWIPC; + = $CLONE_NEWNS | $CLONE_NEWPID | $CLONE_NEWUTS | $CLONE_NEWIPC; + + # we only need to add CLONE_NEWUSER if we are not yet root + if ($EFFECTIVE_USER_ID != 0) { + $unshare_flags |= $CLONE_NEWUSER; + } if (0) { $unshare_flags |= $CLONE_NEWNET; @@ -447,6 +452,11 @@ sub get_unshare_cmd { # waiting for an EOF. 0 == sysread $rfh, my $c, 1 or error "read() did not receive EOF"; + # the process is already root, so no need for newuidmap/newgidmap + if ($EFFECTIVE_USER_ID == 0) { + exit 0; + } + # The program's new[ug]idmap have to be used because they are # setuid root. These privileges are needed to map the ids from # /etc/sub[ug]id to the user namespace set up by the parent. @@ -515,9 +525,11 @@ sub get_unshare_cmd { # want here, like checking /proc/sys/kernel/ngroups_max (which might # not exist). It would also also call setgroups() in a way that makes # the root user be part of the group unknown. - 0 == syscall &SYS_setgid, 0 or error "setgid failed: $!"; - 0 == syscall &SYS_setuid, 0 or error "setuid failed: $!"; - 0 == syscall &SYS_setgroups, 0, 0 or error "setgroups failed: $!"; + if ($EFFECTIVE_USER_ID != 0) { + 0 == syscall &SYS_setgid, 0 or error "setgid failed: $!"; + 0 == syscall &SYS_setuid, 0 or error "setuid failed: $!"; + 0 == syscall &SYS_setgroups, 0, 0 or error "setgroups failed: $!"; + } if (1) { # When the pid namespace is also unshared, then processes expect a @@ -4134,7 +4146,7 @@ sub main() { # lxc-usernsexec -- lxc-unshare -s 'MOUNT|PID|UTSNAME|IPC' ... # but without needing lxc if ($ARGV[0] eq "--unshare-helper") { - if (!test_unshare(1)) { + if (!test_unshare_userns(1)) { exit 1; } my @idmap = read_subuid_subgid; @@ -4408,9 +4420,9 @@ sub main() { # if mmdebstrap is executed as root, we assume the user wants root # mode $options->{mode} = 'root'; - } elsif (test_unshare(0)) { - # otherwise, unshare mode is our best option if test_unshare() - # succeeds + } elsif (test_unshare_userns(0)) { + # if we are not root, unshare mode is our best option if + # test_unshare_userns() succeeds $options->{mode} = 'unshare'; } elsif (system('fakechroot --version>/dev/null') == 0) { # the next fallback is fakechroot @@ -4449,7 +4461,16 @@ sub main() { exec 'fakechroot', 'fakeroot', @prefix, $PROGRAM_NAME, @ARGVORIG; } } elsif ($options->{mode} eq 'unshare') { - if (!test_unshare(1)) { + # For unshare mode to work we either need to already be the root user + # and then we do not have to unshare the user namespace anymore but we + # need to be able to unshare the mount namespace... + if ($EFFECTIVE_USER_ID == 0 + && 0 != system 'unshare --mount true 2>/dev/null') { + error "unable to unshare the mount namespace"; + } + # ...or we are not root and then we need to be able to unshare the user + # namespace. + if ($EFFECTIVE_USER_ID != 0 && !test_unshare_userns(1)) { my $procfile = '/proc/sys/kernel/unprivileged_userns_clone'; open(my $fh, '<', $procfile) or error "failed to open $procfile: $!"; @@ -6132,6 +6153,12 @@ available and you know your subuid/subgid offset (100000 in this example): $ sudo systemd-nspawn --private-users=100000 \ > --directory=./debian-rootfs /bin/bash +If this mode is used as the root user, the user namespace is not unshared (but +the mount namespace and other still are) and created directories will have +correct ownership information. This is also useful in cases where the root user +wants the benefits of an unshared mount namespace to prevent accidentally +messing up the system. + =item B, B This mode will exec B again under C. A