Hi, It could be great to have 2 others improvements:
add support for multiple osd by disk (can by usefull for nvme) #ceph-volume lvm batch --osds-per-device <numberofosd> /dev/sdX add support for encryption #ceph-volume lvm ..... --dmcrypt ----- Mail original ----- De: "Dominik Csapak" <d.csa...@proxmox.com> À: "pve-devel" <pve-devel@pve.proxmox.com> Envoyé: Mardi 4 Juin 2019 14:47:50 Objet: [pve-devel] [PATCH manager 11/20] ceph: osd: rework creation with ceph-volume this completely rewrites the ceph os creation api call using ceph-volume since ceph-disk is not available anymore breaking changes: no filestore anymore, journal_dev -> db_dev it is now possible to give a specific size for db/wal, default is to read from ceph db/config and fallback is 10% of osd for block.db and 1% of osd for block.wal the reason is that ceph-volume does not autocreate those itself (like ceph-disk) but you have to create it yourself if the db/wal device has an lvm on it with naming scheme 'ceph-UUID' it uses that and creates a new lv if we detect partitions, we create a new partition at the end if the disk is not used at all, we create a pv/vg/lv for it it is not possible to create osds on luminous with this api call anymore, anyone needing this has to use ceph-disk directly Signed-off-by: Dominik Csapak <d.csa...@proxmox.com> --- PVE/API2/Ceph/OSD.pm | 186 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 56 deletions(-) diff --git a/PVE/API2/Ceph/OSD.pm b/PVE/API2/Ceph/OSD.pm index adb0025c..ae938016 100644 --- a/PVE/API2/Ceph/OSD.pm +++ b/PVE/API2/Ceph/OSD.pm @@ -5,12 +5,14 @@ use warnings; use Cwd qw(abs_path); use IO::File; +use UUID; use PVE::Ceph::Tools; use PVE::Ceph::Services; use PVE::CephConfig; use PVE::Cluster qw(cfs_read_file cfs_write_file); use PVE::Diskmanage; +use PVE::Storage::LVMPlugin; use PVE::Exception qw(raise_param_exc); use PVE::JSONSchema qw(get_standard_option); use PVE::RADOS; @@ -198,28 +200,39 @@ __PACKAGE__->register_method ({ description => "Block device name.", type => 'string', }, - journal_dev => { - description => "Block device name for journal (filestore) or block.db (bluestore).", + db_dev => { + description => "Block device name for block.db.", optional => 1, type => 'string', }, - wal_dev => { - description => "Block device name for block.wal (bluestore only).", + db_size => { + description => "Size in GiB for block.db. ". + "If a block.db is requested but the size is not given, will be ". + "automatically selected by: bluestore_block_db_size from the ". + "ceph database (osd or global section) or config (osd or global section)". + "in that order. If this is not available, it will be sized 10% of the size ". + "of the OSD device. Fails if the available size is not enough.", optional => 1, - type => 'string', + type => 'number', + requires => 'db_dev', + minimum => 1.0, }, - fstype => { - description => "File system type (filestore only).", - type => 'string', - enum => ['xfs', 'ext4'], - default => 'xfs', + wal_dev => { + description => "Block device name for block.wal.", optional => 1, + type => 'string', }, - bluestore => { - description => "Use bluestore instead of filestore. This is the default.", - type => 'boolean', - default => 1, + wal_size => { + description => "Size in GiB for block.wal. ". + "If a block.wal is requested but the size is not given, will be ". + "automatically selected by: bluestore_block_wal_size from the ". + "ceph database (osd or global section) or config (osd or global section)". + "in that order. If this is not available, it will be sized 1% of the size ". + "of the OSD device. Fails if the available size is not enough.", optional => 1, + minimum => 0.5, + requires => 'wal_dev', + type => 'number', }, }, }, @@ -231,44 +244,53 @@ __PACKAGE__->register_method ({ my $authuser = $rpcenv->get_user(); - raise_param_exc({ 'bluestore' => "conflicts with parameter 'fstype'" }) - if (defined($param->{fstype}) && defined($param->{bluestore}) && $param->{bluestore}); - PVE::Ceph::Tools::check_ceph_inited(); PVE::Ceph::Tools::setup_pve_symlinks(); PVE::Ceph::Tools::check_ceph_installed('ceph_osd'); + PVE::Ceph::Tools::check_ceph_installed('ceph_volume'); - my $bluestore = $param->{bluestore} // 1; - - my $journal_dev; + my $dev; + my $db_dev; + my $db_devname; + my $db_size; my $wal_dev; + my $wal_devname; + my $wal_size; - if ($param->{journal_dev} && ($param->{journal_dev} ne $param->{dev})) { - $journal_dev = PVE::Diskmanage::verify_blockdev_path($param->{journal_dev}); + if ($param->{db_dev} && ($param->{db_dev} ne $param->{dev})) { + $db_dev = PVE::Diskmanage::verify_blockdev_path($param->{db_dev}); + if (defined($param->{db_size})) { + $db_size = PVE::Tools::convert_size($param->{db_size}, 'gb' => 'b') ; + } + ($db_devname = $db_dev) =~ s|/dev/||; } if ($param->{wal_dev} && ($param->{wal_dev} ne $param->{dev}) && - (!$param->{journal_dev} || $param->{wal_dev} ne $param->{journal_dev})) { - raise_param_exc({ 'wal_dev' => "can only be set with paramater 'bluestore'"}) - if !$bluestore; + (!$param->{db_dev} || $param->{wal_dev} ne $param->{db_dev})) { $wal_dev = PVE::Diskmanage::verify_blockdev_path($param->{wal_dev}); + if (defined($param->{wal_size})) { + $wal_size = PVE::Tools::convert_size($param->{wal_size}, 'gb' => 'b') ; + } + ($wal_devname = $wal_dev) =~ s|/dev/||; } - $param->{dev} = PVE::Diskmanage::verify_blockdev_path($param->{dev}); + $dev = PVE::Diskmanage::verify_blockdev_path($param->{dev}); - my $devname = $param->{dev}; - $devname =~ s|/dev/||; + (my $devname = $dev) =~ s|/dev/||; + my $devs = [$devname]; + push @$devs, $db_devname if $db_devname; + push @$devs, $wal_devname if $wal_devname; - my $disklist = PVE::Diskmanage::get_disks($devname, 1); + my $disklist = PVE::Diskmanage::get_disks($devs, 1); my $diskinfo = $disklist->{$devname}; die "unable to get device info for '$devname'\n" if !$diskinfo; - die "device '$param->{dev}' is in use\n" + die "device '$dev' is in use\n" if $diskinfo->{used}; my $devpath = $diskinfo->{devpath}; @@ -286,46 +308,98 @@ __PACKAGE__->register_method ({ file_set_contents($ceph_bootstrap_osd_keyring, $bindata); }; - my $worker = sub { - my $upid = shift; + my $create_part_or_lv = sub { + my ($dev, $size, $type) = @_; + + if ($size =~ m/^(\d+)$/) { + $size = $1; + } else { + die "invalid size '$size'\n"; + } + + die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n" + if $dev->{size} < $size; + + if (!$dev->{used}) { + # create pv,vg,lv + + my $vg = "ceph-" . UUID::uuid(); + my $lv = $type . "-" . UUID::uuid(); + + PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg); + PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}b"); + + return "$vg/$lv"; - my $fstype = $param->{fstype} || 'xfs'; + } elsif ($dev->{used} eq 'LVM') { + # check pv/vg and create lv + my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1); + my $vg; + for my $vgname ( sort keys %$vgs ) { + next if $vgname !~ /^ceph-/; - my $ccname = PVE::Ceph::Tools::get_config('ccname'); + for my $pv ( @{$vgs->{$vgname}->{pvs}} ) { + next if $pv->{name} ne $dev->{devpath}; + $vg = $vgname; + last; + } + last if $vg; + } + + die "no ceph vg found on '$dev->{devpath}'\n" if !$vg; + die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size; + + my $lv = $type . "-" . "012345"; + + PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}b"); + + return "$vg/$lv"; + + } elsif ($dev->{used} eq 'partitions') { + # create new partition at the end + + return PVE::Diskmanage::append_partition($dev->{devpath}, $size); + } + + die "cannot use '$dev->{devpath}' for '$type'\n"; + }; + + my $worker = sub { + my $upid = shift; + + PVE::Diskmanage::locked_disk_action(sub { + # get db/wal size + if (($db_dev && !defined($db_size)) || ($wal_dev && !defined($wal_size))) { + my $sizes = PVE::Ceph::Tools::get_db_wal_sizes(); + $db_size = $sizes->{db} // int($disklist->{$devname}->{size} / 10); # 10% of OSD + $wal_size = $sizes->{wal} // int($disklist->{$devname}->{size} / 100); # 1% of OSD + } - my $cmd = ['ceph-disk', 'prepare', '--zap-disk', - '--cluster', $ccname, '--cluster-uuid', $fsid ]; + my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ]; - if ($bluestore) { print "create OSD on $devpath (bluestore)\n"; - push @$cmd, '--bluestore'; - if ($journal_dev) { - print "using device '$journal_dev' for block.db\n"; - push @$cmd, '--block.db', $journal_dev; + if ($db_dev) { + print "creating block.db on '$db_dev'\n"; + my $part_or_lv = $create_part_or_lv->($disklist->{$db_devname}, $db_size, 'osd-db'); + print "using '$part_or_lv' for block.db\n"; + push @$cmd, '--block.db', $part_or_lv; } if ($wal_dev) { - print "using device '$wal_dev' for block.wal\n"; - push @$cmd, '--block.wal', $wal_dev; + print "creating block.wal on '$wal_dev'\n"; + my $part_or_lv = $create_part_or_lv->($disklist->{$wal_devname}, $wal_size, 'osd-wal'); + print "using '$part_or_lv' for block.wal\n"; + push @$cmd, '--block.wal', $part_or_lv; } - push @$cmd, $devpath; - } else { - print "create OSD on $devpath ($fstype)\n"; - push @$cmd, '--filestore', '--fs-type', $fstype; - if ($journal_dev) { - print "using device '$journal_dev' for journal\n"; - push @$cmd, '--journal-dev', $devpath, $journal_dev; - } else { - push @$cmd, $devpath; - } - } + push @$cmd, '--data', $devpath; - PVE::Ceph::Tools::wipe_disks($devpath); + PVE::Ceph::Tools::wipe_disks($devpath); - run_command($cmd); + run_command($cmd); + }); }; return $rpcenv->fork_worker('cephcreateosd', $devname, $authuser, $worker); -- 2.11.0 _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel