Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package google-guest-configs for openSUSE:Factory checked in at 2025-06-26 11:39:15 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/google-guest-configs (Old) and /work/SRC/openSUSE:Factory/.google-guest-configs.new.7067 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "google-guest-configs" Thu Jun 26 11:39:15 2025 rev:32 rq:1288529 version:20250605.00 Changes: -------- --- /work/SRC/openSUSE:Factory/google-guest-configs/google-guest-configs.changes 2025-06-04 20:29:37.491646436 +0200 +++ /work/SRC/openSUSE:Factory/.google-guest-configs.new.7067/google-guest-configs.changes 2025-06-26 11:40:30.680740600 +0200 @@ -1,0 +2,26 @@ +Wed Jun 25 11:46:32 UTC 2025 - John Paul Adrian Glaubitz <adrian.glaub...@suse.com> + +- Update to version 20250605.00 + * Merge pull request (#112) from bk202:liujoh_416067717 + * Added comment to the bitmap conversion functions + * Remove IRQ affinity overwrite to XPS affinity + * Update XPS affinity to assign the remaining unassigned CPUs + to the last queue when populating the last queue + * Fix set_xps_affinity to correctly parse cpus array + * Update XPS CPU assignment logic + * Update CPU assignment algorithm in XPS affinity + * Remove commented code + * Update XPS affinity vCPU distribution algorithm s.t. the vCPUs assigned + to a queue are on the same core - fixed IRQ affinity on NUMA1 not using + the correct bind_cores_index + * Fixed NUMA comparison error in set_xps_affinity + * Update XPS affinity setup to be NUMA aware and support 64 bit CPU mask + calculation +- from version 20250604.00 + * Merge pull request (#114) from bk202:liujoh_irq_affinity_bug_fix + * Bug fix: bind_cores_begin -> bind_cores_index + * Name smart NICs in lexicographic order +- Run %postun to modify %{_sysconfdir}/sysconfig/network/ifcfg-eth0 + during uninstall only to avoid removal of POST_UP_SCRIPT on upgrade + +------------------------------------------------------------------- Old: ---- google-guest-configs-20250516.00.tar.gz New: ---- google-guest-configs-20250605.00.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ google-guest-configs.spec ++++++ --- /var/tmp/diff_new_pack.OyeS9m/_old 2025-06-26 11:40:32.332809133 +0200 +++ /var/tmp/diff_new_pack.OyeS9m/_new 2025-06-26 11:40:32.348809797 +0200 @@ -23,7 +23,7 @@ %define _udevdir %(pkg-config --variable udev_dir udev) %endif Name: google-guest-configs -Version: 20250516.00 +Version: 20250605.00 Release: 0 Summary: Google Cloud Guest Configs License: Apache-2.0 @@ -74,7 +74,7 @@ fi %postun -if [ -f %{_sysconfdir}/sysconfig/network/ifcfg-eth0 ] ; then +if [ -f %{_sysconfdir}/sysconfig/network/ifcfg-eth0 ] && [ $1 -eq 0 ]; then sed -i '/POST_UP_SCRIPT="compat:suse:google_up.sh"/d' %{_sysconfdir}/sysconfig/network/ifcfg-eth0 fi ++++++ google-guest-configs-20250516.00.tar.gz -> google-guest-configs-20250605.00.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/guest-configs-20250516.00/src/usr/bin/gce-nic-naming new/guest-configs-20250605.00/src/usr/bin/gce-nic-naming --- old/guest-configs-20250516.00/src/usr/bin/gce-nic-naming 2025-05-16 22:09:53.000000000 +0200 +++ new/guest-configs-20250605.00/src/usr/bin/gce-nic-naming 2025-06-05 22:31:29.000000000 +0200 @@ -26,7 +26,8 @@ declare SYS_PREPEND_PATH='/sys' # 0x15b3:0x101e is the vendor and device ID for Mellanox CX7 # 0x8086:0x145c is the vendor and device ID for Intel IDPF VF -readonly ETHERNET_DEVICES_VENDORS=('15b3:101e' '8086:145c') +# 0x8086:0x1452 is the vendor and device ID for Intel NIC +readonly ETHERNET_DEVICES_VENDORS=('15b3:101e' '8086:145c' '8086:1452') # 0x10de is the vendor ID for Nvidia readonly GPU_DEVICES_VENDORS=('10de' '10de') # PCI BUS ID path is in the format of 0000:00:04.0 @@ -132,7 +133,7 @@ # accelerator_devices: Array of processor devices # ethernet_to_accelerator_ratio: Ratio of Processor to Ethernet devices # Arguments: -# $1: Name refernece to the array of ethernet devices +# $1: Name reference to the array of ethernet devices # $2: Name reference to the array of processor devices # $@: Paths to search for devices ############################### @@ -455,14 +456,26 @@ fi local eth_device_vendor="${ethernet_devices[${int_id}]}" - local name_builder="" - if [[ ${accelerator_devices[*]} == "" ]] ; then + # Diorite NIC + if [[ ${eth_device_vendor} == "8086:1452" ]]; then + local old_name=$(basename ${device_path}) + local new_name="eth${eth_index}" + name_builder="$new_name" + + # Temporarily rename to avoid naming collisions: udev will overwrite the + # temporary name with the correct, final name + if [[ "$new_name" != "$old_name" ]]; then + notice "Renaming ${old_name} to ${new_name}" + /sbin/ip link set $new_name down + /sbin/ip link set $new_name name "${new_name}tmp" + /sbin/ip link set "${new_name}tmp" up + fi + elif [[ ${accelerator_devices[*]} == "" ]] ; then if [[ " ${ETHERNET_DEVICES_VENDORS[*]} " =~ \ [[:space:]]${eth_device_vendor}[[:space:]] ]]; then if [[ "${SUBSYSTEM}" == "net" ]] && [[ -d "${SYS_PREPEND_PATH}${DEVPATH}/device/${RDMA_TEST_FOLDER}" ]]; then - name_builder="rdma${eth_index}" elif [[ "${SUBSYSTEM}" == "net" ]] && [[ -d "${SYS_PREPEND_PATH}${DEVPATH}/device" ]]; then # If this is a VF device and not an RDMA we do not want this device @@ -473,7 +486,6 @@ else # If device path is empty it indicates other changes happening so this script will skip error_and_exit "DEVPATH provided is empty, skipping naming. Path:${SYS_PREPEND_PATH}${DEVPATH}" - fi else error_and_exit "Device is not for intent based name: "\ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/guest-configs-20250516.00/src/usr/bin/google_set_multiqueue new/guest-configs-20250605.00/src/usr/bin/google_set_multiqueue --- old/guest-configs-20250516.00/src/usr/bin/google_set_multiqueue 2025-05-16 22:09:53.000000000 +0200 +++ new/guest-configs-20250605.00/src/usr/bin/google_set_multiqueue 2025-06-05 22:31:29.000000000 +0200 @@ -86,10 +86,6 @@ # number of queues. tx_queue=/sys/class/net/"$nic"/queues/tx-"$irq" if ls $tx_queue 1> /dev/null 2>&1; then - # XPS (Transmit Packet Steering) allows a core to decide which queue to - # select if its mask is found in one of the queue's xps_cpus - cp /proc/irq/"$tx_irq"/smp_affinity $tx_queue/xps_cpus - echo -en "$nic:q-$irq: \ttx: irq $tx_irq bind to $core \trx: irq $rx_irq bind to $core" >&2 echo -e " \txps_cpus bind to $(cat $tx_queue/xps_cpus)" >&2 else @@ -171,10 +167,6 @@ numa0_irq_range1="${numa0_irq_range[0]#*,}" numa0_irq_range0_start=$(echo "$numa0_irq_range0" | cut -d '-' -f 1) - # Avoid setting binding IRQ on vCPU 0 as it is a busy vCPU being heavily - # used by the system. - numa0_irq_range0_start=$((numa0_irq_range0_start + 1)) - numa0_irq_range0_end=$(echo "$numa0_irq_range0" | cut -d '-' -f 2) numa0_irq_range1_start=$(echo "$numa0_irq_range1" | cut -d '-' -f 1) numa0_irq_range1_end=$(echo "$numa0_irq_range1" | cut -d '-' -f 2) @@ -200,7 +192,7 @@ "$numa1_irq_range1_end") } -function unpack_irq_ranges() { +function unpack_cpu_ranges() { local input_ranges=($1) local -n irq_ranges="$2" for ((i=0; i<${#input_ranges[@]}; i+=2)); do @@ -213,6 +205,132 @@ done } +# Converts a hexadecimal bitmap to rangelist +# ex. bitmap=00000000,00000000,00fff000,000003ff CPUs=0-9,44-55 +function bitmapbitmap_to_rangelist() { # bitmap + local bitmap="${1:-}" # must be non empty, only hex digits and commas + + [[ "${bitmap}" =~ ^[0-9a-fA-F,]+$ ]] || return 1 + bitmap="${bitmap//,/}" # remove commas + + local comma='' ret='' + local bit=0 l=-1 h=0 # current bit and range boundaries + local i j # process one character at a time starting from right (low index) + for ((i = ${#bitmap} - 1; i >= 0; i--)); do + local cur="0x${bitmap:${i}:1}" + for ((j = 0; j < 4; j++, bit++, cur >>= 1)); do + (( cur & 1 )) || continue # bit is 0 + (( l < 0 )) && (( l = bit, h = bit, 1 )) && continue # first bit + (( bit == h + 1 )) && (( h = bit, 1 )) && continue # extend range + ret+="${comma}${l}" ; (( l != h )) && ret+="-${h}" # add range + (( l = bit, h = bit )) # start new interval + comma="," + done + done + (( l < 0 )) && return + ret+="${comma}${l}"; (( l != h )) && ret+="-${h}" # add final entry + echo $ret +} + +# Converts a list of CPUs to a hexadecimal bitmap +# ex. CPUs=[0,1,2,3,4,5,6,7,8,9,44,45,46,47,48,49,50,51,52,53,54,55] +# bitmap=00000000,00000000,00fff000,000003ff +function rangelist_to_bitmap() { # list highest_cpu + local ranges="${1:-}" # can be empty, only digits, commas and dash + local highest=${2:-1} # highest CPU + [[ "${ranges}" =~ ^[0-9,-]*$ ]] || return 1 + ranges="${ranges//,/ }" # replace comma with space + + local digits=() + local range i l h + for range in $ranges; do + read l h <<< $(echo ${range/-/ }) + [[ -z "$h" ]] && h=l + for ((i = l; i <= h; i++)) { (( digits[i / 4] |= 1 << (i & 3) )); } + (( highest = h > highest ? h : highest)) + done + + # Print in reverse order with commas + local ret="" hex="0123456789abcdef" + (( h = (highest + 31) / 32 * 8 )) # make a multiple of 32 CPUs + for (( i = h - 1; i >= 0; i--)) ; do + ret+="${hex:${digits[$i]}:1}" + (( i & 7 || i == 0)) || ret+="," + done + echo $ret +} + +# Returns all the network interface names excluding "lo" +get_network_interfaces() { + local network_interfaces=() + + for nic_dir in /sys/class/net/*; do + local nic_name=$(basename "${nic_dir}") + + if [[ "${nic_name}" == "lo" || ! -e "${nic_dir}/device" ]]; then + continue + fi + + network_interfaces+=("${nic_name}") + done + + echo "${network_interfaces[@]}" +} + +# For XPS affinity configuration, we'd do the following assignment: +# 1. For each interface, divide the queues into two halves +# 2. Evenly distribute the vCPUs on NUMA0 to the first half of the queues +# 3. Evenly distribute the vCPUs on NUMA1 to the second half of the queues +# This function will have to be called once for each NUMA. +function set_xps_affinity() { + local numa="$1" + local cpus=("${@:2}") + + total_vcpus=${#cpus[@]} + num_numa_nodes=2 + nics_string=$(get_network_interfaces) + + IFS=' ' read -r -a nics <<< "$nics_string" + for nic in "${nics[@]}"; do + tx_queue_count=$(ls -1 /sys/class/net/"$nic"/queues/ | grep tx | wc -l) + + # the number of queues to assign CPUs for this NUMA node. + queues_per_numa=$(( tx_queue_count / num_numa_nodes )) + + # the number of CPUs to assign per queue + cpus_per_queue=$(( total_vcpus / queues_per_numa)) + + echo "nic=$nic tx_queue_count=$tx_queue_count queues_per_numa=$queues_per_numa cpus_per_queue=$cpus_per_queue" + + cpu_index=0 + queue_offset=$(( queues_per_numa*numa )) + for (( queue=queue_offset; queue<queue_offset+queues_per_numa; queue+=1 )); do + xps_path=/sys/class/net/$nic/queues/tx-$queue/xps_cpus + xps_cpus="" + + # Assign all the remaining CPUs to the last queue + if [[ queue -eq $(( queue_offset + queues_per_numa - 1 )) ]]; then + cpus_per_queue=$(( total_vcpus - cpu_index )) + fi + + for (( i=0; i<cpus_per_queue; i+=1 )); do + xps_cpus+="${cpus[cpu_index]}," + cpu_index=$(( cpu_index + 1 )) + done + + # remove the last "," + xps_cpus="${xps_cpus%,}" + cpu_mask=$(rangelist_to_bitmap $xps_cpus $(nproc)) + echo ${cpu_mask} > $xps_path + printf "Queue %d XPS_PATH=%s assigned CPUs=%s cpu_mask=%s\n" \ + "$queue" \ + "$xps_path" \ + "$(bitmap_to_rangelist "$cpu_mask")" \ + "$cpu_mask" + done + done +} + echo "Running $(basename $0)." VIRTIO_NET_DEVS=/sys/bus/virtio/drivers/virtio_net/virtio* is_multinic_accelerator_platform @@ -298,52 +416,27 @@ fi done -XPS=/sys/class/net/e*/queues/tx*/xps_cpus -num_cpus=$(nproc) -[[ $num_cpus -gt 63 ]] && num_cpus=63 - -num_queues=0 -for q in $XPS; do - interface=$(echo "$q" | grep -oE 'net/([^/]+)' | cut -d'/' -f2) - if [[ $IS_MULTINIC_ACCELERATOR_PLATFORM == 0 ]] && ! $(is_gvnic "$interface"); then - continue - fi - num_queues=$((num_queues + 1)) -done - -# If we have more CPUs than queues, then stripe CPUs across tx affinity -# as CPUNumber % queue_count. -for q in $XPS; do - interface=$(echo "$q" | grep -oE 'net/([^/]+)' | cut -d'/' -f2) - if [[ $IS_MULTINIC_ACCELERATOR_PLATFORM == 0 ]] && ! $(is_gvnic "$interface"); then - continue - fi - - queue_re=".*tx-([0-9]+).*$" - if [[ "$q" =~ ${queue_re} ]]; then - queue_num=${BASH_REMATCH[1]} - fi - - xps=0 - for cpu in `seq $queue_num $num_queues $((num_cpus - 1))`; do - xps=$((xps | (1 << cpu))) - done - - # Linux xps_cpus requires a hex number with commas every 32 bits. It ignores - # all bits above # cpus, so write a list of comma separated 32 bit hex values - # with a comma between dwords. - xps_dwords=() - for i in $(seq 0 $(((num_cpus - 1) / 32))) - do - xps_dwords=(`printf "%08x" $((xps & 0xffffffff))` "${xps_dwords[@]}") - xps=$((xps >> 32)) - done - xps_string=$(IFS=, ; echo "${xps_dwords[*]}") +vcpu_ranges=() +get_vcpu_ranges_on_accelerator_platform vcpu_ranges - - echo ${xps_string} > $q - printf "Queue %d XPS=%s for %s\n" $queue_num `cat $q` $q -done | sort -n -k2 +packed_numa0_vcpu_ranges=( + "${vcpu_ranges[0]} ${vcpu_ranges[1]} ${vcpu_ranges[2]} ${vcpu_ranges[3]}" +) +packed_numa1_vcpu_ranges=( + "${vcpu_ranges[4]} ${vcpu_ranges[5]} ${vcpu_ranges[6]} ${vcpu_ranges[7]}" +) +declare -a numa0_vcpu_ranges +unpack_cpu_ranges "${packed_numa0_vcpu_ranges[0]}" numa0_vcpu_ranges +declare -a numa1_vcpu_ranges +unpack_cpu_ranges "${packed_numa1_vcpu_ranges[0]}" numa1_vcpu_ranges + +echo -e "\nConfiguring XPS affinity for devices on NUMA 0" +echo -e "vCPUs on NUMA0 [${vcpu_ranges[0]}-${vcpu_ranges[1]}], [${vcpu_ranges[2]}-${vcpu_ranges[3]}]" +set_xps_affinity 0 "${numa0_vcpu_ranges[@]}" + +echo -e "\nConfiguring XPS affinity for devices on NUMA 1" +echo -e "vCPUs on NUMA1 [${vcpu_ranges[4]}-${vcpu_ranges[5]}], [${vcpu_ranges[6]}-${vcpu_ranges[7]}]" +set_xps_affinity 1 "${numa1_vcpu_ranges[@]}" if [[ ! $IS_MULTINIC_ACCELERATOR_PLATFORM == 0 ]]; then exit @@ -368,23 +461,20 @@ # enp134s0 # enp140s0 -irq_ranges=() -get_vcpu_ranges_on_accelerator_platform irq_ranges - +# Avoid setting binding IRQ on vCPU 0 as it is a busy vCPU being heavily +# used by the system. packed_numa0_irq_ranges=( - "${irq_ranges[0]} ${irq_ranges[1]} ${irq_ranges[2]} ${irq_ranges[3]}" + "$((vcpu_ranges[0] + 1)) ${vcpu_ranges[1]} ${vcpu_ranges[2]} ${vcpu_ranges[3]}" ) packed_numa1_irq_ranges=( - "${irq_ranges[4]} ${irq_ranges[5]} ${irq_ranges[6]} ${irq_ranges[7]}" + "${vcpu_ranges[4]} ${vcpu_ranges[5]} ${vcpu_ranges[6]} ${vcpu_ranges[7]}" ) declare -a numa0_irq_ranges -unpack_irq_ranges "${packed_numa0_irq_ranges[0]}" numa0_irq_ranges +unpack_cpu_ranges "${packed_numa0_irq_ranges[0]}" numa0_irq_ranges declare -a numa1_irq_ranges -unpack_irq_ranges "${packed_numa1_irq_ranges[0]}" numa1_irq_ranges - -echo -e "Binding vCPUs on NUMA0 [${numa0_irq_ranges[@]}]\n" -echo -e "Binding vCPUs on NUMA1 [${numa1_irq_ranges[@]}]\n" +unpack_cpu_ranges "${packed_numa1_irq_ranges[0]}" numa1_irq_ranges +echo -e "\nSetting IRQ affinity with vCPUs on NUMA0 [${numa0_irq_ranges[@]}]" bind_cores_index=0 find /sys/class/net -type l | xargs -L 1 realpath | grep '/sys/devices/pci' | sort | xargs -L 1 basename | while read nic_name; do # For non-gvnic devices (e.g. mlx5), the IRQ bindings will be handled by the device's driver. @@ -393,7 +483,7 @@ continue fi - echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name ." + echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name." nic_numa_node=$(cat /sys/class/net/"$nic_name"/device/numa_node) if [[ $nic_numa_node -ne 0 ]]; then @@ -403,6 +493,7 @@ bind_cores_index=$(set_irq_range "$nic_name" "$bind_cores_index" "${numa0_irq_ranges[@]}") done +echo -e "\nSetting IRQ affinity with vCPUs on NUMA1 [${numa1_irq_ranges[@]}]" bind_cores_index=0 find /sys/class/net -type l | xargs -L 1 realpath | grep '/sys/devices/pci' | sort | xargs -L 1 basename | while read nic_name; do # For non-gvnic devices (e.g. mlx5), the IRQ bindings will be handled by the device's driver. @@ -411,13 +502,13 @@ continue fi - echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name ." + echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name." nic_numa_node=$(cat /sys/class/net/"$nic_name"/device/numa_node) if [[ $nic_numa_node -ne 1 ]]; then continue fi - bind_cores_index=$(set_irq_range "$nic_name" "$bind_cores_begin" "${numa1_irq_ranges[@]}") + bind_cores_index=$(set_irq_range "$nic_name" "$bind_cores_index" "${numa1_irq_ranges[@]}") done