This is an automated email from the ASF dual-hosted git repository.
sureshanaparti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudstack.git
The following commit(s) were added to refs/heads/main by this push:
new 2c493d19336 Add support for nvidia vGPU support with vendor specific
framework (#11432)
2c493d19336 is described below
commit 2c493d193364dd448d93912b7961bdc6a499bebd
Author: Vishesh <[email protected]>
AuthorDate: Fri Aug 15 15:54:11 2025 +0530
Add support for nvidia vGPU support with vendor specific framework (#11432)
---
.../hypervisor/kvm/resource/LibvirtGpuDef.java | 11 +-
scripts/vm/hypervisor/kvm/gpudiscovery.sh | 162 ++++++++++++++++++++-
2 files changed, 163 insertions(+), 10 deletions(-)
diff --git
a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java
b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java
index 06457b1d071..08086859fb7 100644
---
a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java
+++
b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java
@@ -47,7 +47,7 @@ public class LibvirtGpuDef {
private void generateMdevXml(StringBuilder gpuBuilder) {
String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices,
busAddress contains the UUID
- String displayAttribute = vgpuType.isDisplay() ? "on" : "off";
+ String displayAttribute = vgpuType.isDisplay() ? "on' ramfb='on" :
"off";
gpuBuilder.append("<hostdev mode='subsystem' type='mdev'
model='vfio-pci' display='").append(displayAttribute).append("'>\n");
gpuBuilder.append(" <source>\n");
@@ -65,11 +65,16 @@ public class LibvirtGpuDef {
// - UEFI/OVMF firmware environments
// - ARM64 hosts (cache coherency issues with traditional VGA)
// - Multi-monitor VDI setups (primary display)
+ String managed = "yes";
+ // To support passthrough NVIDIA GPUs with SR-IOV & vendor specific
GPU integration
+ if (vgpuType.getVendorId().equals("10de") &&
!vgpuType.getModelName().equals("passthrough")) {
+ managed = "no";
+ }
if (vgpuType.isDisplay()) {
- gpuBuilder.append("<hostdev mode='subsystem' type='pci'
managed='yes' display='on' ramfb='on'>\n");
+ gpuBuilder.append("<hostdev mode='subsystem' type='pci'
managed='").append(managed).append("' display='on' ramfb='on'>\n");
} else {
// Compute-only workloads don't need display or ramfb
- gpuBuilder.append("<hostdev mode='subsystem' type='pci'
managed='yes' display='off'>\n");
+ gpuBuilder.append("<hostdev mode='subsystem' type='pci'
managed='").append(managed).append("' display='off'>\n");
}
gpuBuilder.append(" <driver name='vfio'/>\n");
gpuBuilder.append(" <source>\n");
diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh
b/scripts/vm/hypervisor/kvm/gpudiscovery.sh
index 662abaed2cb..d27f6daf8c5 100755
--- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh
+++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh
@@ -349,6 +349,130 @@ json_escape() {
# Cache for nodedev XML data to avoid repeated virsh calls
declare -A nodedev_cache
+# Cache for nvidia-smi vgpu profile data
+declare -A nvidia_vgpu_profiles
+
+# Parse nvidia-smi vgpu -s -v output and populate profile cache
+parse_nvidia_vgpu_profiles() {
+ local gpu_address=""
+ local profile_id=""
+ local profile_name=""
+ local max_instances=""
+ local fb_memory=""
+ local max_heads=""
+ local max_x_res=""
+ local max_y_res=""
+
+ # Function to store current profile data
+ store_profile_data() {
+ if [[ -n "$gpu_address" && -n "$profile_id" && -n
"$profile_name" ]]; then
+ local key="${gpu_address}:${profile_id}"
+
nvidia_vgpu_profiles["$key"]="$profile_name|${max_instances:-0}|${fb_memory:-0}|${max_heads:-0}|${max_x_res:-0}|${max_y_res:-0}"
+ fi
+ }
+
+ # Skip if nvidia-smi is not available
+ if ! command -v nvidia-smi >/dev/null 2>&1; then
+ return
+ fi
+
+ while IFS= read -r line; do
+ # Match GPU address line
+ if [[ $line =~ ^GPU[[:space:]]+([0-9A-Fa-f:]+\.[0-9A-Fa-f]+)
]]; then
+ # Store previous profile data before starting new GPU
+ store_profile_data
+
+ gpu_address="${BASH_REMATCH[1]}"
+ # Convert from format like 00000000:AF:00.0 to AF:00.0
and normalize to lowercase
+ if [[ $gpu_address =~
[0-9A-Fa-f]+:([0-9A-Fa-f]+:[0-9A-Fa-f]+\.[0-9A-Fa-f]+) ]]; then
+ gpu_address="${BASH_REMATCH[1],,}"
+ else
+ gpu_address="${gpu_address,,}"
+ fi
+ # Reset profile variables for new GPU
+ profile_id=""
+ profile_name=""
+ max_instances=""
+ fb_memory=""
+ max_heads=""
+ max_x_res=""
+ max_y_res=""
+ elif [[ $line =~
^[[:space:]]*vGPU[[:space:]]+Type[[:space:]]+ID[[:space:]]*:[[:space:]]*0x([0-9A-Fa-f]+)
]]; then
+ # Store previous profile data before starting new
profile
+ store_profile_data
+
+ # Normalize to lowercase hex without 0x prefix
+ profile_id="${BASH_REMATCH[1],,}"
+ # Reset profile-specific variables
+ profile_name=""
+ max_instances=""
+ fb_memory=""
+ max_heads=""
+ max_x_res=""
+ max_y_res=""
+ elif [[ $line =~
^[[:space:]]*Name[[:space:]]*:[[:space:]]*(.+)$ ]]; then
+ profile_name="${BASH_REMATCH[1]}"
+ elif [[ $line =~
^[[:space:]]*Max[[:space:]]+Instances[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
+ max_instances="${BASH_REMATCH[1]}"
+ elif [[ $line =~
^[[:space:]]*FB[[:space:]]+Memory[[:space:]]*:[[:space:]]*([0-9]+)[[:space:]]*MiB
]]; then
+ fb_memory="${BASH_REMATCH[1]}"
+ elif [[ $line =~
^[[:space:]]*Display[[:space:]]+Heads[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
+ max_heads="${BASH_REMATCH[1]}"
+ elif [[ $line =~
^[[:space:]]*Maximum[[:space:]]+X[[:space:]]+Resolution[[:space:]]*:[[:space:]]*([0-9]+)
]]; then
+ max_x_res="${BASH_REMATCH[1]}"
+ elif [[ $line =~
^[[:space:]]*Maximum[[:space:]]+Y[[:space:]]+Resolution[[:space:]]*:[[:space:]]*([0-9]+)
]]; then
+ max_y_res="${BASH_REMATCH[1]}"
+ fi
+ done < <(nvidia-smi vgpu -s -v 2>/dev/null || true)
+
+ # Store the last profile data after processing all lines
+ store_profile_data
+}
+
+# Get current vGPU type ID for a VF from sysfs
+get_current_vgpu_type() {
+ local vf_path="$1"
+ local current_type_file="$vf_path/nvidia/current_vgpu_type"
+
+ if [[ -f "$current_type_file" ]]; then
+ local type_id
+ type_id=$(<"$current_type_file")
+
+ # Remove any whitespace
+ type_id="${type_id// /}"
+
+ # Handle different input formats and normalize to lowercase hex
without 0x
+ if [[ $type_id =~ ^0x([0-9A-Fa-f]+)$ ]]; then
+ # Input is hex with 0x prefix (e.g., "0x252")
+ echo "${BASH_REMATCH[1],,}"
+ elif [[ $type_id =~ ^[0-9]+$ ]]; then
+ # Input is decimal (e.g., "594")
+ printf "%x" "$type_id"
+ elif [[ $type_id =~ ^[0-9A-Fa-f]+$ ]]; then
+ # Input is hex without 0x prefix (e.g., "252")
+ echo "${type_id,,}"
+ else
+ # Fallback for unknown format
+ echo "0"
+ fi
+ else
+ echo "0"
+ fi
+}
+
+# Get profile information from nvidia-smi cache
+get_nvidia_profile_info() {
+ local gpu_address="$1"
+ local profile_id="$2"
+ local key="${gpu_address}:${profile_id}"
+
+ if [[ -n "${nvidia_vgpu_profiles[$key]:-}" ]]; then
+ echo "${nvidia_vgpu_profiles[$key]}"
+ else
+ echo "|0|0|0|0|0" # Default empty values
+ fi
+}
+
# Get nodedev name for a PCI address (e.g. "00:02.0" -> "pci_0000_00_02_0")
get_nodedev_name() {
local addr="$1"
@@ -567,6 +691,9 @@ process_mdev_instances() {
# === GPU Discovery ===
+# Parse nvidia-smi vgpu profiles once at the beginning
+parse_nvidia_vgpu_profiles
+
mapfile -t LINES < <(lspci -nnm)
echo '{ "gpus": ['
@@ -575,7 +702,7 @@ first_gpu=true
for LINE in "${LINES[@]}"; do
# Parse lspci -nnm fields: SLOT "CLASS [CODE]" "VENDOR [VID]" "DEVICE
[DID]" ...
if [[ $LINE =~
^([^[:space:]]+)[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"
]]; then
- PCI_ADDR="${BASH_REMATCH[1]}"
+ PCI_ADDR="${BASH_REMATCH[1],,}" # Normalize to lowercase
PCI_CLASS="${BASH_REMATCH[2]}"
VENDOR_FIELD="${BASH_REMATCH[3]}"
DEVICE_FIELD="${BASH_REMATCH[4]}"
@@ -662,12 +789,33 @@ for LINE in "${LINES[@]}"; do
SLOT="0x${VF_BDF:3:2}"
FUNC="0x${VF_BDF:6:1}"
- # Determine vf_profile
+ # Determine vf_profile using nvidia-smi information
VF_PROFILE=""
- if VF_LINE=$(lspci -nnm -s "$VF_BDF" 2>/dev/null); then
- if [[ $VF_LINE =~
\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"
]]; then
- VF_DEVICE_FIELD="${BASH_REMATCH[4]}"
- VF_PROFILE=$(sed -E 's/
\[[0-9A-Fa-f]{4}\]$//' <<<"$VF_DEVICE_FIELD")
+ VF_PROFILE_NAME=""
+ VF_MAX_INSTANCES="null"
+ VF_VIDEO_RAM="null"
+ VF_MAX_HEADS="null"
+ VF_MAX_RESOLUTION_X="null"
+ VF_MAX_RESOLUTION_Y="null"
+
+ if [[ "$VENDOR_ID" == "10de" ]]; then
+ # For NVIDIA GPUs, check current vGPU type
+ current_vgpu_type=$(get_current_vgpu_type
"$VF_PATH")
+ if [[ "$current_vgpu_type" != "0" ]]; then
+ # Get profile info from nvidia-smi cache
+ profile_info=$(get_nvidia_profile_info
"$PCI_ADDR" "$current_vgpu_type")
+ IFS='|' read -r VF_PROFILE_NAME
VF_MAX_INSTANCES VF_VIDEO_RAM VF_MAX_HEADS VF_MAX_RESOLUTION_X
VF_MAX_RESOLUTION_Y <<< "$profile_info"
+ VF_PROFILE="$VF_PROFILE_NAME"
+ fi
+ fi
+
+ # Fallback to lspci parsing if no nvidia-smi profile
found
+ if [[ -z "$VF_PROFILE" ]]; then
+ if VF_LINE=$(lspci -nnm -s "$VF_BDF"
2>/dev/null); then
+ if [[ $VF_LINE =~
\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"
]]; then
+
VF_DEVICE_FIELD="${BASH_REMATCH[4]}"
+ VF_PROFILE=$(sed -E 's/
\[[0-9A-Fa-f]{4}\]$//' <<<"$VF_DEVICE_FIELD")
+ fi
fi
fi
VF_PROFILE_JSON=$(json_escape "$VF_PROFILE")
@@ -677,7 +825,7 @@ for LINE in "${LINES[@]}"; do
USED_JSON=$(to_json_vm "$raw")
flist+=(
-
"{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
+
"{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"max_instances\":$VF_MAX_INSTANCES,\"video_ram\":$VF_VIDEO_RAM,\"max_heads\":$VF_MAX_HEADS,\"max_resolution_x\":$VF_MAX_RESOLUTION_X,\"max_resolution_y\":$VF_MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
done
if [ ${#flist[@]} -gt 0 ]; then
VF_ARRAY="[$(