Re: [PATCH v4 1/2] qom: new object to associate device to numa node

2023-11-30 Thread Markus Armbruster
 writes:

> From: Ankit Agrawal 
>
> NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
> partitioning of the GPU device resources (including device memory) into
> several (upto 8) isolated instances. Each of the partitioned memory needs
> a dedicated NUMA node to operate. The partitions are not fixed and they
> can be created/deleted at runtime.
>
> Unfortunately Linux OS does not provide a means to dynamically create/destroy
> NUMA nodes and such feature implementation is not expected to be trivial. The
> nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
> we utilize the Generic Initiator Affinity structures that allows association
> between nodes and devices. Multiple GI structures per BDF is possible,
> allowing creation of multiple nodes by exposing unique PXM in each of these
> structures.
>
> Introduce a new acpi-generic-initiator object to allow host admin provide the
> device and the corresponding NUMA nodes. Qemu maintain this association and
> use this object to build the requisite GI Affinity Structure.
>
> An admin can provide the range of nodes through a uint16 array host-nodes
> and link it to a device by providing its id. Currently, only PCI device is
> supported and an error is returned for acpi device. The following sample
> creates 8 nodes and link them to the PCI device dev0:
>
> -numa node,nodeid=2 \
> -numa node,nodeid=3 \
> -numa node,nodeid=4 \
> -numa node,nodeid=5 \
> -numa node,nodeid=6 \
> -numa node,nodeid=7 \
> -numa node,nodeid=8 \
> -numa node,nodeid=9 \
> -device 
> vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
> -object acpi-generic-initiator,id=gi0,pci-dev=dev0,host-nodes=2-9 \
>
> [1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu
>
> Signed-off-by: Ankit Agrawal 
> ---
>  hw/acpi/acpi-generic-initiator.c | 84 
>  hw/acpi/meson.build  |  1 +
>  include/hw/acpi/acpi-generic-initiator.h | 30 +
>  qapi/qom.json| 18 +
>  4 files changed, 133 insertions(+)
>  create mode 100644 hw/acpi/acpi-generic-initiator.c
>  create mode 100644 include/hw/acpi/acpi-generic-initiator.h
>
> diff --git a/hw/acpi/acpi-generic-initiator.c 
> b/hw/acpi/acpi-generic-initiator.c
> new file mode 100644
> index 00..5ea51cb81e
> --- /dev/null
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/qdev-properties.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-builtin-visit.h"
> +#include "qapi/visitor.h"
> +#include "qom/object_interfaces.h"
> +#include "qom/object.h"
> +#include "hw/qdev-core.h"
> +#include "hw/vfio/vfio-common.h"
> +#include "hw/vfio/pci.h"
> +#include "hw/pci/pci_device.h"
> +#include "sysemu/numa.h"
> +#include "hw/acpi/acpi-generic-initiator.h"

Several #include are superfluous.  This compiles for me:

  #include "qemu/osdep.h"
  #include "hw/acpi/acpi-generic-initiator.h"
  #include "hw/pci/pci_device.h"
  #include "qapi/error.h"
  #include "qapi/qapi-builtin-visit.h"
  #include "qapi/visitor.h"
  #include "qemu/error-report.h"
  #include "sysemu/numa.h"

Yes, the alphabetical order is intentional.

> +
> +OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, 
> acpi_generic_initiator,
> +   ACPI_GENERIC_INITIATOR, OBJECT,
> +   { TYPE_USER_CREATABLE },
> +   { NULL })
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
> +
> +static void acpi_generic_initiator_init(Object *obj)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +gi->device = NULL;
> +gi->nodelist = NULL;
> +}
> +
> +static void acpi_generic_initiator_finalize(Object *obj)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +g_free(gi->device);
> +qapi_free_uint16List(gi->nodelist);
> +}
> +
> +static void acpi_generic_initiator_set_pci_device(Object *obj, const char 
> *val,
> +  Error **errp)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +gi->device = g_strdup(val);

The property is named "pci-dev", but the C member is called @device.
Making developers remember this mapping is not nice.  Suggest to rename
to @pci_dev.

> +}
> +
> +static void acpi_generic_initiator_set_acpi_device(Object *obj, const char 
> *val,
> +   Error **errp)
> +{
> +error_setg(errp, "Generic Initiator ACPI device not supported");
> +}

Let's add the property when it actually works.  More below at [*].

> +
> +static void
> +acpi_generic_initiator_set_host_nodes(Object *obj, Visitor *v, const char 
> *name,
> +  void *opaque, Error **errp)
> +{
> +AcpiG

Re: [PATCH v4 1/2] qom: new object to associate device to numa node

2023-11-27 Thread Alex Williamson
On Sun, 19 Nov 2023 18:31:10 +0530
 wrote:

> From: Ankit Agrawal 
> 
> NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
> partitioning of the GPU device resources (including device memory) into
> several (upto 8) isolated instances. Each of the partitioned memory needs
> a dedicated NUMA node to operate. The partitions are not fixed and they
> can be created/deleted at runtime.
> 
> Unfortunately Linux OS does not provide a means to dynamically create/destroy
> NUMA nodes and such feature implementation is not expected to be trivial. The
> nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
> we utilize the Generic Initiator Affinity structures that allows association
> between nodes and devices. Multiple GI structures per BDF is possible,
> allowing creation of multiple nodes by exposing unique PXM in each of these
> structures.
> 
> Introduce a new acpi-generic-initiator object to allow host admin provide the
> device and the corresponding NUMA nodes. Qemu maintain this association and
> use this object to build the requisite GI Affinity Structure.
> 
> An admin can provide the range of nodes through a uint16 array host-nodes
> and link it to a device by providing its id. Currently, only PCI device is
> supported and an error is returned for acpi device. The following sample
> creates 8 nodes and link them to the PCI device dev0:
> 
> -numa node,nodeid=2 \
> -numa node,nodeid=3 \
> -numa node,nodeid=4 \
> -numa node,nodeid=5 \
> -numa node,nodeid=6 \
> -numa node,nodeid=7 \
> -numa node,nodeid=8 \
> -numa node,nodeid=9 \
> -device 
> vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
> -object acpi-generic-initiator,id=gi0,pci-dev=dev0,host-nodes=2-9 \
> 
> [1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu
> 
> Signed-off-by: Ankit Agrawal 
> ---
>  hw/acpi/acpi-generic-initiator.c | 84 
>  hw/acpi/meson.build  |  1 +
>  include/hw/acpi/acpi-generic-initiator.h | 30 +
>  qapi/qom.json| 18 +
>  4 files changed, 133 insertions(+)
>  create mode 100644 hw/acpi/acpi-generic-initiator.c
>  create mode 100644 include/hw/acpi/acpi-generic-initiator.h
> 
> diff --git a/hw/acpi/acpi-generic-initiator.c 
> b/hw/acpi/acpi-generic-initiator.c
> new file mode 100644
> index 00..5ea51cb81e
> --- /dev/null
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/qdev-properties.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-builtin-visit.h"
> +#include "qapi/visitor.h"
> +#include "qom/object_interfaces.h"
> +#include "qom/object.h"
> +#include "hw/qdev-core.h"
> +#include "hw/vfio/vfio-common.h"
> +#include "hw/vfio/pci.h"

There's nothing related to vfio here except for the example use case,
surely you don't need the above two headers.

> +#include "hw/pci/pci_device.h"
> +#include "sysemu/numa.h"
> +#include "hw/acpi/acpi-generic-initiator.h"
> +
> +OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, 
> acpi_generic_initiator,
> +   ACPI_GENERIC_INITIATOR, OBJECT,
> +   { TYPE_USER_CREATABLE },
> +   { NULL })
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
> +
> +static void acpi_generic_initiator_init(Object *obj)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +gi->device = NULL;
> +gi->nodelist = NULL;
> +}
> +
> +static void acpi_generic_initiator_finalize(Object *obj)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +g_free(gi->device);
> +qapi_free_uint16List(gi->nodelist);
> +}
> +
> +static void acpi_generic_initiator_set_pci_device(Object *obj, const char 
> *val,
> +  Error **errp)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +gi->device = g_strdup(val);
> +}
> +
> +static void acpi_generic_initiator_set_acpi_device(Object *obj, const char 
> *val,
> +   Error **errp)
> +{
> +error_setg(errp, "Generic Initiator ACPI device not supported");
> +}
> +
> +static void
> +acpi_generic_initiator_set_host_nodes(Object *obj, Visitor *v, const char 
> *name,
> +  void *opaque, Error **errp)
> +{
> +AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +uint16List *l;
> +
> +visit_type_uint16List(v, name, &(gi->nodelist), errp);
> +
> +for (l = gi->nodelist; l; l = l->next) {
> +if (l->value >= MAX_NODES) {
> +error_setg(errp, "Invalid host-nodes value: %d", l->value);
> +qapi_free_uint16List(gi->nodelist);
> +return;
> +}
> +}
> +}
> +
> +static void acp

[PATCH v4 1/2] qom: new object to associate device to numa node

2023-11-19 Thread ankita
From: Ankit Agrawal 

NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
partitioning of the GPU device resources (including device memory) into
several (upto 8) isolated instances. Each of the partitioned memory needs
a dedicated NUMA node to operate. The partitions are not fixed and they
can be created/deleted at runtime.

Unfortunately Linux OS does not provide a means to dynamically create/destroy
NUMA nodes and such feature implementation is not expected to be trivial. The
nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
we utilize the Generic Initiator Affinity structures that allows association
between nodes and devices. Multiple GI structures per BDF is possible,
allowing creation of multiple nodes by exposing unique PXM in each of these
structures.

Introduce a new acpi-generic-initiator object to allow host admin provide the
device and the corresponding NUMA nodes. Qemu maintain this association and
use this object to build the requisite GI Affinity Structure.

An admin can provide the range of nodes through a uint16 array host-nodes
and link it to a device by providing its id. Currently, only PCI device is
supported and an error is returned for acpi device. The following sample
creates 8 nodes and link them to the PCI device dev0:

-numa node,nodeid=2 \
-numa node,nodeid=3 \
-numa node,nodeid=4 \
-numa node,nodeid=5 \
-numa node,nodeid=6 \
-numa node,nodeid=7 \
-numa node,nodeid=8 \
-numa node,nodeid=9 \
-device 
vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
-object acpi-generic-initiator,id=gi0,pci-dev=dev0,host-nodes=2-9 \

[1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu

Signed-off-by: Ankit Agrawal 
---
 hw/acpi/acpi-generic-initiator.c | 84 
 hw/acpi/meson.build  |  1 +
 include/hw/acpi/acpi-generic-initiator.h | 30 +
 qapi/qom.json| 18 +
 4 files changed, 133 insertions(+)
 create mode 100644 hw/acpi/acpi-generic-initiator.c
 create mode 100644 include/hw/acpi/acpi-generic-initiator.h

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
new file mode 100644
index 00..5ea51cb81e
--- /dev/null
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/visitor.h"
+#include "qom/object_interfaces.h"
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/pci.h"
+#include "hw/pci/pci_device.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/acpi-generic-initiator.h"
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, 
acpi_generic_initiator,
+   ACPI_GENERIC_INITIATOR, OBJECT,
+   { TYPE_USER_CREATABLE },
+   { NULL })
+
+OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
+
+static void acpi_generic_initiator_init(Object *obj)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+gi->device = NULL;
+gi->nodelist = NULL;
+}
+
+static void acpi_generic_initiator_finalize(Object *obj)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+g_free(gi->device);
+qapi_free_uint16List(gi->nodelist);
+}
+
+static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val,
+  Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+gi->device = g_strdup(val);
+}
+
+static void acpi_generic_initiator_set_acpi_device(Object *obj, const char 
*val,
+   Error **errp)
+{
+error_setg(errp, "Generic Initiator ACPI device not supported");
+}
+
+static void
+acpi_generic_initiator_set_host_nodes(Object *obj, Visitor *v, const char 
*name,
+  void *opaque, Error **errp)
+{
+AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+uint16List *l;
+
+visit_type_uint16List(v, name, &(gi->nodelist), errp);
+
+for (l = gi->nodelist; l; l = l->next) {
+if (l->value >= MAX_NODES) {
+error_setg(errp, "Invalid host-nodes value: %d", l->value);
+qapi_free_uint16List(gi->nodelist);
+return;
+}
+}
+}
+
+static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
+{
+object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_PCI_DEVICE_PROP,
+  NULL, acpi_generic_initiator_set_pci_device);
+object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_ACPI_DEVICE_PROP,
+  NULL, 
acpi_generic_initiator_set_acpi_device);
+object_class_property_add(oc, ACPI_GENER