Add a "rusty" interface on top of the raw NVML bindings for retrieving information about creatable vGPU. Will be used to e.g. show a proper description for each creatable vGPU type.
Signed-off-by: Christoph Heiss <[email protected]> --- .../examples/nv_list_creatable_vgpus.rs | 15 ++ proxmox-ve-vfio/src/nvidia/mod.rs | 123 ++++++++++ proxmox-ve-vfio/src/nvidia/nvml/mod.rs | 224 ++++++++++++++++++ 3 files changed, 362 insertions(+) create mode 100644 proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs diff --git a/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs new file mode 100644 index 0000000..b2f276a --- /dev/null +++ b/proxmox-ve-vfio/examples/nv_list_creatable_vgpus.rs @@ -0,0 +1,15 @@ +use std::env; + +use proxmox_ve_vfio::nvidia::creatable_vgpu_types_for_dev; + +fn main() { + let bus_id = env::args() + .nth(1) + .expect("vGPU bus id expected as first argument, e.g. 00:01.0"); + + let types = creatable_vgpu_types_for_dev(&bus_id).expect("failed to retrieve vGPU info"); + + for t in types { + println!("{}", t.description()); + } +} diff --git a/proxmox-ve-vfio/src/nvidia/mod.rs b/proxmox-ve-vfio/src/nvidia/mod.rs index 08a414c..bc2ef17 100644 --- a/proxmox-ve-vfio/src/nvidia/mod.rs +++ b/proxmox-ve-vfio/src/nvidia/mod.rs @@ -1,3 +1,126 @@ //! Provides access to the state of NVIDIA (v)GPU devices connected to the system. +use anyhow::Result; +use serde::Serialize; + mod nvml; + +use nvml::bindings::{nvmlDevice_t, nvmlVgpuTypeId_t}; + +/// A single vGPU type that is either supported and/or currently creatable +/// for a given GPU. +#[derive(Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct VgpuTypeInfo { + /// Unique vGPU type ID. + pub id: u32, + /// An alphanumeric string that denotes a particular vGPU, e.g. GRID M60-2Q. + pub name: String, + /// Class of the vGPU, e.g. Quadro. + pub class_name: String, + /// Maximum number of vGPU instances creatable of this vGPU type. + pub max_instances: u32, + /// Maximum number of vGPU instances supported per VM for this vGPU type. + pub max_instances_per_vm: u32, + /// vGPU framebuffer size in bytes. + pub framebuffer_size: u64, + /// Number of supported display heads by this vGPU type. + pub num_heads: u32, + /// Maximum resolution of a single head available across all display heads + /// supported by this vGPU type. + pub max_resolution: (u32, u32), + /// License types and versions required to run this specified vGPU type, + /// each in the form "\<license name\>,\<version\>", for example + /// "GRID-Virtual-PC,2.0". + /// A vGPU type might also be runnable with more than one type of license, + /// in which cases each license is separated by a semicolon. + pub license: String, + /// Static frame limit for this vGPU, if the frame limiter is enabled for + /// this vGPU type. + pub fps_limit: Option<u32>, +} + +impl VgpuTypeInfo { + fn get_with(nvml: &nvml::Nvml, dev: nvmlDevice_t, type_id: nvmlVgpuTypeId_t) -> Result<Self> { + let num_heads = nvml.vgpu_type_num_display_heads(type_id)?; + + // Take the best resolution among all available display heads + let max_resolution = (0..num_heads) + .filter_map(|i| nvml.vgpu_type_max_resolution(type_id, i).ok()) + .max() + .unwrap_or((0, 0)); + + Ok(VgpuTypeInfo { + id: type_id, + name: nvml.vgpu_type_name(type_id)?, + class_name: nvml.vgpu_type_class_name(type_id)?, + max_instances: nvml.vgpu_type_max_instances(dev, type_id)?, + max_instances_per_vm: nvml.vgpu_type_max_instances_per_vm(type_id)?, + framebuffer_size: nvml.vgpu_type_framebuffer_size(type_id)?, + num_heads, + max_resolution, + license: nvml.vgpu_type_license(type_id)?, + fps_limit: nvml.vgpu_type_frame_rate_limit(type_id)?, + }) + } + + /// Formats the descriptive fields of the vGPU type information as a property string. + pub fn description(&self) -> String { + let VgpuTypeInfo { + class_name, + max_instances, + max_instances_per_vm, + framebuffer_size, + num_heads, + max_resolution, + license, + .. + } = self; + + let framebuffer_size = framebuffer_size / 1024 / 1024; + let (max_res_x, max_res_y) = max_resolution; + + format!( + "class={class_name}\ + ,max-instances={max_instances}\ + ,max-instances-per-vm={max_instances_per_vm}\ + ,framebuffer-size={framebuffer_size}MiB\ + ,num-heads={num_heads}\ + ,max-resolution={max_res_x}x{max_res_y}\ + ,license={license}" + ) + } +} + +/// Given a concrete GPU device, enumerates all *creatable* vGPU types for this +/// device. +fn enumerate_creatable_vgpu_types_by_dev( + nvml: &nvml::Nvml, + dev: nvmlDevice_t, +) -> Result<Vec<VgpuTypeInfo>> { + let mut vgpu_info = vec![]; + let type_ids = nvml.device_get_creatable_vgpus(dev)?; + + for type_id in type_ids { + vgpu_info.push(VgpuTypeInfo::get_with(nvml, dev, type_id)?); + } + + Ok(vgpu_info) +} + +/// Retrieves a list of *creatable* vGPU types for the specified GPU by bus id. +/// +/// The `bus_id` must be of format "\<domain\>:\<bus\>:\<device\>.\<function\>", e.g. +/// "0000:01:01.0". +/// \<domain\> is optional and can be left out if there is only one. +/// +/// # See also +/// +/// [`nvmlDeviceGetHandleByPciBusId_v2()`]: <https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gea7484bb9eac412c28e8a73842254c05> +/// [`struct nvmlPciInto_t`]: <https://docs.nvidia.com/deploy/nvml-api/structnvmlPciInfo__t.html#structnvmlPciInfo__t_1a4d54ad9b596d7cab96ecc34613adbe4> +pub fn creatable_vgpu_types_for_dev(bus_id: &str) -> Result<Vec<VgpuTypeInfo>> { + let nvml = nvml::Nvml::new()?; + let handle = nvml.device_handle_by_bus_id(bus_id)?; + + enumerate_creatable_vgpu_types_by_dev(&nvml, handle) +} diff --git a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs index 10ad3c9..1259095 100644 --- a/proxmox-ve-vfio/src/nvidia/nvml/mod.rs +++ b/proxmox-ve-vfio/src/nvidia/nvml/mod.rs @@ -3,6 +3,13 @@ //! //! [NVML]: <https://developer.nvidia.com/management-library-nvml> +use anyhow::{bail, Result}; +use std::{ + borrow::Cow, + ffi::{c_uint, c_ulonglong, CStr}, + ptr, +}; + #[allow( dead_code, non_camel_case_types, @@ -11,3 +18,220 @@ unused_imports )] pub mod bindings; + +use bindings::{ + nvmlDevice_t, nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE, + nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED, nvmlReturn_enum_NVML_SUCCESS, nvmlReturn_t, + nvmlVgpuTypeId_t, NvmlLib, NVML_DEVICE_NAME_BUFFER_SIZE, NVML_GRID_LICENSE_BUFFER_SIZE, +}; + +/// SONAME/filename of the native NVML, pin it to SOVERSION 1 explicitly to be sure. +const NVML_LIB_NAME: &str = "libnvidia-ml.so.1"; + +pub struct Nvml(NvmlLib); + +impl Nvml { + pub fn new() -> Result<Self> { + let lib = unsafe { + let lib = Self(NvmlLib::new(NVML_LIB_NAME)?); + lib.to_err(lib.0.nvmlInit_v2())?; + lib + }; + + Ok(lib) + } + + pub fn device_handle_by_bus_id(&self, bus_id: &str) -> Result<nvmlDevice_t> { + let mut handle: nvmlDevice_t = ptr::null_mut(); + unsafe { + self.to_err( + self.0 + .nvmlDeviceGetHandleByPciBusId_v2(bus_id.as_ptr() as *const i8, &mut handle), + )?; + } + + Ok(handle) + } + + /// Retrieves a list of vGPU types supported by the given device. + /// + /// # See also + /// + /// <https://docs.nvidia.com/deploy/nvml-api/group__nvmlVgpu.html#group__nvmlVgpu> + pub fn device_get_creatable_vgpus(&self, dev: nvmlDevice_t) -> Result<Vec<nvmlVgpuTypeId_t>> { + let mut count: c_uint = 0; + let mut ids = vec![]; + + unsafe { + // First retrieve the number of supported vGPUs by passing count == 0, + // which will set `count` to the actual number. + let result = self + .0 + .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr()); + + #[allow(non_upper_case_globals)] + if !matches!( + result, + nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE + ) { + self.to_err(result)?; + } + + ids.resize(count as usize, 0); + self.to_err( + self.0 + .nvmlDeviceGetCreatableVgpus(dev, &mut count, ids.as_mut_ptr()), + )?; + } + + Ok(ids) + } + + pub fn vgpu_type_class_name(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> { + let mut buffer: Vec<u8> = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize]; + let mut buffer_size = buffer.len() as u32; + + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetClass( + type_id, + buffer.as_mut_ptr() as *mut i8, + &mut buffer_size, + ))?; + } + + slice_to_string(&buffer) + } + + pub fn vgpu_type_license(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> { + let mut buffer: Vec<u8> = vec![0; NVML_GRID_LICENSE_BUFFER_SIZE as usize]; + + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetLicense( + type_id, + buffer.as_mut_ptr() as *mut i8, + buffer.len() as u32, + ))?; + } + + slice_to_string(&buffer) + } + + pub fn vgpu_type_name(&self, type_id: nvmlVgpuTypeId_t) -> Result<String> { + let mut buffer: Vec<u8> = vec![0; NVML_DEVICE_NAME_BUFFER_SIZE as usize]; + let mut buffer_size = buffer.len() as u32; + + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetName( + type_id, + buffer.as_mut_ptr() as *mut i8, + &mut buffer_size, + ))?; + } + + slice_to_string(&buffer) + } + + pub fn vgpu_type_max_instances( + &self, + dev: nvmlDevice_t, + type_id: nvmlVgpuTypeId_t, + ) -> Result<u32> { + let mut count: c_uint = 0; + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetMaxInstances(dev, type_id, &mut count))?; + } + + Ok(count) + } + + pub fn vgpu_type_max_instances_per_vm(&self, type_id: nvmlVgpuTypeId_t) -> Result<u32> { + let mut count: c_uint = 0; + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetMaxInstancesPerVm(type_id, &mut count))?; + } + + Ok(count) + } + + pub fn vgpu_type_framebuffer_size(&self, type_id: nvmlVgpuTypeId_t) -> Result<u64> { + let mut size: c_ulonglong = 0; + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetFramebufferSize(type_id, &mut size))?; + } + + Ok(size) + } + + pub fn vgpu_type_num_display_heads(&self, type_id: nvmlVgpuTypeId_t) -> Result<u32> { + let mut num: c_uint = 0; + unsafe { + self.to_err(self.0.nvmlVgpuTypeGetNumDisplayHeads(type_id, &mut num))?; + } + + Ok(num) + } + + pub fn vgpu_type_max_resolution( + &self, + type_id: nvmlVgpuTypeId_t, + head: u32, + ) -> Result<(u32, u32)> { + let (mut x, mut y): (c_uint, c_uint) = (0, 0); + unsafe { + self.to_err( + self.0 + .nvmlVgpuTypeGetResolution(type_id, head, &mut x, &mut y), + )?; + } + + Ok((x, y)) + } + + pub fn vgpu_type_frame_rate_limit(&self, type_id: nvmlVgpuTypeId_t) -> Result<Option<u32>> { + let mut limit: c_uint = 0; + let result = unsafe { self.0.nvmlVgpuTypeGetFrameRateLimit(type_id, &mut limit) }; + + if !Self::err_is_unsupported(result) { + Ok(None) + } else { + self.to_err(result)?; + Ok(Some(limit)) + } + } + + fn to_err(&self, result: nvmlReturn_t) -> Result<()> { + if result == nvmlReturn_enum_NVML_SUCCESS { + Ok(()) + } else { + bail!("{}", self.error_str(result)) + } + } + + fn err_is_unsupported(result: nvmlReturn_t) -> bool { + result == nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED + } + + fn error_str(&self, err_code: nvmlReturn_t) -> Cow<'_, str> { + let cstr = unsafe { + let raw = self.0.nvmlErrorString(err_code); + CStr::from_ptr(raw) + }; + + cstr.to_string_lossy() + } +} + +impl Drop for Nvml { + fn drop(&mut self) { + if let Ok(sym) = self.0.nvmlShutdown.as_ref() { + // Although nvmlShutdown() provides a return code (or error) indicating + // whether the operation was successful, at this point there isn't + // really anything we can do if it throws an error. + unsafe { sym() }; + } + } +} + +fn slice_to_string(s: &[u8]) -> Result<String> { + Ok(CStr::from_bytes_until_nul(s)?.to_str()?.into()) +} -- 2.52.0 _______________________________________________ pve-devel mailing list [email protected] https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
