Filip Schauer <[email protected]> writes:
> Add support for customizing UID/GID mappings on individual mount points
> without affecting the entire container.
>
> A new "idmap" mount point option accepts space-separated mappings:
> ```
> idmap=type:ct:host:len type:ct:host:len ...
> ```
>
> type: can be either 'u' or 'g'
> ct: ID as seen inside the container
> host: corresponding ID on the host
> len: number of consecutive IDs to map
>
> Unmapped ranges inherit the container's ID mapping.
>
> Example to pass through the host UID & GID 1005:
> ```
> mp0: /mnt/data,mp=/data,idmap=u:1005:1005:1 g:1005:1005:1
> ```
>
> Signed-off-by: Filip Schauer <[email protected]>
> ---
> src/PVE/LXC.pm | 85 ++++++++++++++++++++++++++++++++++++++-
> src/PVE/LXC/Config.pm | 7 ++++
> src/lxc-pve-prestart-hook | 14 +++++++
> 3 files changed, 105 insertions(+), 1 deletion(-)
>
> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
> index 2c02e9a..ec7cb01 100644
> --- a/src/PVE/LXC.pm
> +++ b/src/PVE/LXC.pm
> @@ -11,6 +11,7 @@ use File::Path;
> use File::Spec;
> use IO::Poll qw(POLLIN POLLHUP);
> use IO::Socket::UNIX;
> +use List::Util qw(max min);
> use POSIX qw(EINTR);
> use Socket;
> use Time::HiRes qw (gettimeofday);
> @@ -43,6 +44,7 @@ use PVE::Syscall qw(:fsmount);
> use PVE::LXC::CGroup;
> use PVE::LXC::Config;
> use PVE::LXC::Monitor;
> +use PVE::LXC::Namespaces;
> use PVE::LXC::Tools;
>
> my $have_sdn;
> @@ -2438,7 +2440,12 @@ sub device_passthrough_hotplug : prototype($$$) {
> sub mountpoint_hotplug : prototype($$$$$) {
> my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
>
> - my (undef, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf);
> + my ($id_map, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf);
> + my $mp_userns_fh;
> + if ($mp->{idmap}) {
> + my $mp_id_map = parse_mountpoint_idmap($id_map, $mp);
> + $mp_userns_fh = PVE::LXC::Namespaces::new_userns($mp_id_map);
> + }
>
> # We do the rest in a fork with an unshared mount namespace:
> # -) change our apparmor profile to 'pve-container-mounthotplug', which
> is '/usr/bin/lxc-start'
> @@ -2474,6 +2481,18 @@ sub mountpoint_hotplug : prototype($$$$$) {
>
> my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef,
> $root_uid, $root_gid);
>
> + if ($mp_userns_fh) {
> + PVE::Tools::mount_setattr(
> + fileno($mount_fd),
> + '',
> + PVE::Tools::AT_EMPTY_PATH,
> + &PVE::Syscall::MOUNT_ATTR_IDMAP,
> + 0,
> + 0,
> + fileno($mp_userns_fh),
> + ) or die "mount_setattr: $!\n";
> + }
> +
> PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
> chdir('/')
> or die "failed to change root directory within the container's
> mount namespace: $!\n";
> @@ -2989,6 +3008,70 @@ sub map_ct_gid_to_host {
> return map_ct_id_to_host($gid, $id_map, 'g');
> }
>
> +sub parse_mountpoint_idmap {
> + my ($id_map, $mp) = @_;
> +
> + die "mount point does not specify an idmap\n" if !$mp->{idmap};
> +
> + # Parse the user-friendly mount-specific ID map
> + # This maps IDs as seen in the container to IDs as seen on disk.
> + my $user_mp_id_map = [];
> + for my $entry (split(' ', $mp->{idmap})) {
> + $entry =~ /^([ug]):(\d+):(\d+):(\d+)$/ or die "failed to parse mount
> point idmap: $entry\n";
> + push @$user_mp_id_map, [$1, $2, $3, $4];
> + }
> +
> + validate_id_maps($user_mp_id_map);
> +
> + # Convert the user friendly mp.idmap to the actual mapping to be applied
> via mount_setattr.
> + # Provided by the config:
> + # lxc.idmap: ID in Container --> ID on Host
> + # mp.idmap: ID in Container --> ID on Disk
> + #
> + # Convert to: ID on Disk --> ID on Host
> + my $result = [];
> + for my $type ('u', 'g') {
> + my @ct_chunks = grep { $_->[0] eq $type } @$id_map;
> + next if !@ct_chunks;
> +
> + my @exceptions = sort { $a->[1] <=> $b->[1] } grep { $_->[0] eq
> $type } @$user_mp_id_map;
> +
> + for my $chunk (@ct_chunks) {
> + my (undef, $ct_start, $host_start, $len) = @$chunk;
> + my $ct_end = $ct_start + $len;
> +
> + # Find exceptions that fall within this specific lxc.idmap chunk
> + my @chunk_exc = grep { $_->[1] < $ct_end && $_->[1] + $_->[3] >
> $ct_start } @exceptions;
> + push @chunk_exc, [$type, $ct_end, undef, 0]; # ensure the
> trailing gap is mapped
> +
> + my $ct = $ct_start;
> + for my $exc (@chunk_exc) {
> + my (undef, $exc_ct, $exc_disk, $exc_len) = @$exc;
> +
> + my $clamped_ct = max($exc_ct, $ct_start);
> + my $clamped_len = min($exc_ct + $exc_len, $ct_end) -
> $clamped_ct;
> +
> + # Identity mapping for unmapped ranges
> + if ($ct < $clamped_ct) {
> + my $host = $host_start + ($ct - $ct_start);
> + push @$result, [$type, $host, $host, $clamped_ct - $ct];
> + }
> +
> + # Map the IDs on Disk to the Host IDs.
> + if ($clamped_len > 0) {
> + my $disk = $exc_disk + $clamped_ct - $exc_ct;
> + my $host = $host_start + $clamped_ct - $ct_start;
> + push @$result, [$type, $disk, $host, $clamped_len];
> + }
> +
> + $ct = $clamped_ct + $clamped_len;
> + }
> + }
> + }
> +
> + return $result;
> +}
> +
> sub userns_command {
> my ($id_map) = @_;
> if (@$id_map) {
> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
> index 5442586..9f56bc7 100644
> --- a/src/PVE/LXC/Config.pm
> +++ b/src/PVE/LXC/Config.pm
> @@ -369,6 +369,13 @@ my $rootfs_desc = {
> format_description => 'opt[;opt...]',
> pattern => qr/$valid_mount_option_re(;$valid_mount_option_re)*/,
> },
> + idmap => {
> + optional => 1,
> + type => 'string',
> + description => 'Map specific UIDs/GIDs to specific host
> UIDs/GIDs for this mount point',
I think this would benefit from a verbose_description, it is not clear
to me whether the syntax 123:456:1 maps 123 from the root to 456 on the
container or the other way around. Perhaps an explicit example would be
helpful.
> + format_description => 'id-type:id-mount:id-host:id-range
> id-type:id-mount:...',
> + pattern => qr/^(?:[ug]:[0-9]+:[0-9]+:[1-9][0-9]*)(?:
> [ug]:[0-9]+:[0-9]+:[1-9][0-9]*)*$/,
> + },
> ro => {
> type => 'boolean',
> description => 'Read-only mount point',
> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
> index 9862509..6e500a8 100755
> --- a/src/lxc-pve-prestart-hook
> +++ b/src/lxc-pve-prestart-hook
> @@ -95,6 +95,20 @@ PVE::LXC::Tools::lxc_hook(
> $mountpoint, $dir, $storage_cfg, undef, $root_uid, $root_gid,
> );
>
> + if ($mountpoint->{idmap}) {
> + my $mp_id_map = PVE::LXC::parse_mountpoint_idmap($id_map,
> $mountpoint);
> + my $usernsfh = PVE::LXC::Namespaces::new_userns($mp_id_map);
> + PVE::Tools::mount_setattr(
> + fileno($mount_fd),
> + '',
> + PVE::Tools::AT_EMPTY_PATH,
> + &PVE::Syscall::MOUNT_ATTR_IDMAP,
> + 0,
> + 0,
> + fileno($usernsfh),
> + ) or die "mount_setattr: $!\n";
> + }
> +
> my ($dest_dir, $dest_base_fd, $keep_attrs);
> if ($rootdir_fd) {
> # Mount relative to the rootdir fd.
--
Maximiliano