On Mon, Feb 23, 2026 at 02:04:53PM +0100, Filip Schauer wrote: > Add support for customizing UID/GID mappings on individual mount points > without affecting the entire container. > > A new "idmap" mount point option accepts space-separated mappings: > ``` > idmap=type:ct:host:len type:ct:host:len ... > ```
While I don't have time for a detailed review today, some thoughts: We probably want a way to just say "undo the container user namespace". The pre-start hook gets a `$namespaces` hash passed as 3rd parameter, we can just open the user namespace fd there for this purpose. Eg. `idmap=passthrough` or something, although the rust-side of the schema might not like that... some more inline... > > type: can be either 'u' or 'g' > ct: ID as seen inside the container > host: corresponding ID on the host > len: number of consecutive IDs to map > > Unmapped ranges inherit the container's ID mapping. > > Example to pass through the host UID & GID 1005: > ``` > mp0: /mnt/data,mp=/data,idmap=u:1005:1005:1 g:1005:1005:1 > ``` > > Signed-off-by: Filip Schauer <[email protected]> > --- > src/PVE/LXC.pm | 85 ++++++++++++++++++++++++++++++++++++++- > src/PVE/LXC/Config.pm | 7 ++++ > src/lxc-pve-prestart-hook | 14 +++++++ > 3 files changed, 105 insertions(+), 1 deletion(-) > > diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm > index 2c02e9a..ec7cb01 100644 > --- a/src/PVE/LXC.pm > +++ b/src/PVE/LXC.pm > @@ -11,6 +11,7 @@ use File::Path; > use File::Spec; > use IO::Poll qw(POLLIN POLLHUP); > use IO::Socket::UNIX; > +use List::Util qw(max min); > use POSIX qw(EINTR); > use Socket; > use Time::HiRes qw (gettimeofday); > @@ -43,6 +44,7 @@ use PVE::Syscall qw(:fsmount); > use PVE::LXC::CGroup; > use PVE::LXC::Config; > use PVE::LXC::Monitor; > +use PVE::LXC::Namespaces; > use PVE::LXC::Tools; > > my $have_sdn; > @@ -2438,7 +2440,12 @@ sub device_passthrough_hotplug : prototype($$$) { > sub mountpoint_hotplug : prototype($$$$$) { > my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_; > > - my (undef, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf); > + my ($id_map, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf); > + my $mp_userns_fh; > + if ($mp->{idmap}) { > + my $mp_id_map = parse_mountpoint_idmap($id_map, $mp); > + $mp_userns_fh = PVE::LXC::Namespaces::new_userns($mp_id_map); > + } > > # We do the rest in a fork with an unshared mount namespace: > # -) change our apparmor profile to 'pve-container-mounthotplug', which > is '/usr/bin/lxc-start' > @@ -2474,6 +2481,18 @@ sub mountpoint_hotplug : prototype($$$$$) { > > my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef, > $root_uid, $root_gid); > > + if ($mp_userns_fh) { > + PVE::Tools::mount_setattr( > + fileno($mount_fd), > + '', > + PVE::Tools::AT_EMPTY_PATH, > + &PVE::Syscall::MOUNT_ATTR_IDMAP, > + 0, > + 0, > + fileno($mp_userns_fh), > + ) or die "mount_setattr: $!\n"; > + } > + > PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS); > chdir('/') > or die "failed to change root directory within the container's > mount namespace: $!\n"; > @@ -2989,6 +3008,70 @@ sub map_ct_gid_to_host { > return map_ct_id_to_host($gid, $id_map, 'g'); > } > > +sub parse_mountpoint_idmap { > + my ($id_map, $mp) = @_; > + > + die "mount point does not specify an idmap\n" if !$mp->{idmap}; > + > + # Parse the user-friendly mount-specific ID map > + # This maps IDs as seen in the container to IDs as seen on disk. > + my $user_mp_id_map = []; > + for my $entry (split(' ', $mp->{idmap})) { > + $entry =~ /^([ug]):(\d+):(\d+):(\d+)$/ or die "failed to parse mount > point idmap: $entry\n"; > + push @$user_mp_id_map, [$1, $2, $3, $4]; > + } > + > + validate_id_maps($user_mp_id_map); > + > + # Convert the user friendly mp.idmap to the actual mapping to be applied > via mount_setattr. > + # Provided by the config: > + # lxc.idmap: ID in Container --> ID on Host > + # mp.idmap: ID in Container --> ID on Disk > + # > + # Convert to: ID on Disk --> ID on Host > + my $result = []; > + for my $type ('u', 'g') { > + my @ct_chunks = grep { $_->[0] eq $type } @$id_map; > + next if !@ct_chunks; > + > + my @exceptions = sort { $a->[1] <=> $b->[1] } grep { $_->[0] eq > $type } @$user_mp_id_map; > + > + for my $chunk (@ct_chunks) { > + my (undef, $ct_start, $host_start, $len) = @$chunk; > + my $ct_end = $ct_start + $len; > + > + # Find exceptions that fall within this specific lxc.idmap chunk > + my @chunk_exc = grep { $_->[1] < $ct_end && $_->[1] + $_->[3] > > $ct_start } @exceptions; > + push @chunk_exc, [$type, $ct_end, undef, 0]; # ensure the > trailing gap is mapped > + > + my $ct = $ct_start; > + for my $exc (@chunk_exc) { > + my (undef, $exc_ct, $exc_disk, $exc_len) = @$exc; > + > + my $clamped_ct = max($exc_ct, $ct_start); > + my $clamped_len = min($exc_ct + $exc_len, $ct_end) - > $clamped_ct; > + > + # Identity mapping for unmapped ranges > + if ($ct < $clamped_ct) { > + my $host = $host_start + ($ct - $ct_start); > + push @$result, [$type, $host, $host, $clamped_ct - $ct]; > + } > + > + # Map the IDs on Disk to the Host IDs. > + if ($clamped_len > 0) { > + my $disk = $exc_disk + $clamped_ct - $exc_ct; > + my $host = $host_start + $clamped_ct - $ct_start; > + push @$result, [$type, $disk, $host, $clamped_len]; > + } > + > + $ct = $clamped_ct + $clamped_len; > + } > + } > + } > + > + return $result; > +} > + > sub userns_command { > my ($id_map) = @_; > if (@$id_map) { > diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm > index 5442586..9f56bc7 100644 > --- a/src/PVE/LXC/Config.pm > +++ b/src/PVE/LXC/Config.pm > @@ -369,6 +369,13 @@ my $rootfs_desc = { > format_description => 'opt[;opt...]', > pattern => qr/$valid_mount_option_re(;$valid_mount_option_re)*/, > }, > + idmap => { > + optional => 1, > + type => 'string', > + description => 'Map specific UIDs/GIDs to specific host UIDs/GIDs > for this mount point', > + format_description => 'id-type:id-mount:id-host:id-range > id-type:id-mount:...', > + pattern => qr/^(?:[ug]:[0-9]+:[0-9]+:[1-9][0-9]*)(?: > [ug]:[0-9]+:[0-9]+:[1-9][0-9]*)*$/, > + }, > ro => { > type => 'boolean', > description => 'Read-only mount point', > diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook > index 9862509..6e500a8 100755 > --- a/src/lxc-pve-prestart-hook > +++ b/src/lxc-pve-prestart-hook > @@ -95,6 +95,20 @@ PVE::LXC::Tools::lxc_hook( > $mountpoint, $dir, $storage_cfg, undef, $root_uid, $root_gid, > ); > > + if ($mountpoint->{idmap}) { > + my $mp_id_map = PVE::LXC::parse_mountpoint_idmap($id_map, > $mountpoint); > + my $usernsfh = PVE::LXC::Namespaces::new_userns($mp_id_map); ^ We may want to cache the file descriptors to not create too many user namespaces as that's quite expensive and wasteful. An alternative would be to, instead of defining arbitrary maps in the container configs, define a central map somewhere and refer to the mappings by name. We could then bind-mount the namespaces as, for instance, `/run/pve/user-ns-mappings/<name>-<hash>`. That way an existing mapping can just be `open()`ed, the namespace can be easily accessed during debugging... Downside: more work if this should be exposed in the UI ;-) Although I do believe the "most used" thing would be the "passthrough" value (?). > + PVE::Tools::mount_setattr( > + fileno($mount_fd), > + '', > + PVE::Tools::AT_EMPTY_PATH, > + &PVE::Syscall::MOUNT_ATTR_IDMAP, > + 0, > + 0, > + fileno($usernsfh), > + ) or die "mount_setattr: $!\n"; > + } > + > my ($dest_dir, $dest_base_fd, $keep_attrs); > if ($rootdir_fd) { > # Mount relative to the rootdir fd. > -- > 2.47.3
