Re: [PATCH v1 3/4] virtio: increase virtuqueue sizes in new machine types

2019-11-05 Thread Michael S. Tsirkin
On Tue, Nov 05, 2019 at 07:11:04PM +0300, Denis Plotnikov wrote:
> Linux guests submit IO requests no longer than PAGE_SIZE * max_seg
> field reported by SCSI controler. Thus typical sequential read with
> 1 MB size results in the following pattern of the IO from the guest:
>   8,16   115754 2.766095122  2071  D   R 2095104 + 1008 [dd]
>   8,16   115755 2.766108785  2071  D   R 2096112 + 1008 [dd]
>   8,16   115756 2.766113486  2071  D   R 2097120 + 32 [dd]
>   8,16   115757 2.767668961 0  C   R 2095104 + 1008 [0]
>   8,16   115758 2.768534315 0  C   R 2096112 + 1008 [0]
>   8,16   115759 2.768539782 0  C   R 2097120 + 32 [0]
> The IO was generated by
>   dd if=/dev/sda of=/dev/null bs=1024 iflag=direct
> 
> This effectively means that on rotational disks we will observe 3 IOPS
> for each 2 MBs processed. This definitely negatively affects both
> guest and host IO performance.
> 
> The cure is relatively simple - we should report lengthy scatter-gather
> ability of the SCSI controller. Fortunately the situation here is very
> good. VirtIO transport layer can accomodate 1024 items in one request
> while we are using only 128. This situation is present since almost
> very beginning. 2 items are dedicated for request metadata thus we
> should publish VIRTQUEUE_MAX_SIZE - 2 as max_seg.
> 
> The following pattern is observed after the patch:
>   8,16   1 9921 2.662721340  2063  D   R 2095104 + 1024 [dd]
>   8,16   1 9922 2.662737585  2063  D   R 2096128 + 1024 [dd]
>   8,16   1 9923 2.665188167 0  C   R 2095104 + 1024 [0]
>   8,16   1 9924 2.665198777 0  C   R 2096128 + 1024 [0]
> which is much better.
> 
> To fix this particular case, the patch adds new machine types with
> extended virtqueue sizes to 256 which also increases max_seg to 254
> implicitly.
> 
> Suggested-by: Denis V. Lunev 
> Signed-off-by: Denis Plotnikov 
> ---



the way we normally do this is change the defaults to 256.
what is wrong with doing that?


>  hw/core/machine.c   | 14 ++
>  hw/i386/pc_piix.c   | 16 +---
>  hw/i386/pc_q35.c| 14 --
>  include/hw/boards.h |  6 ++
>  4 files changed, 45 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 55b08f1466..28013a0e3f 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -24,6 +24,13 @@
>  #include "hw/pci/pci.h"
>  #include "hw/mem/nvdimm.h"
>  
> +GlobalProperty hw_compat_4_0_1[] = {
> +{ "virtio-blk-device", "queue-size", "128" },
> +{ "virtio-scsi-device", "virtqueue_size", "128" },
> +{ "vhost-scsi-device", "virtqueue_size", "128" },
> +};
> +const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
> +
>  GlobalProperty hw_compat_4_0[] = {
>  { "virtio-balloon-device", "qemu-4-0-config-size", "true" },
>  };
> @@ -157,6 +164,13 @@ GlobalProperty hw_compat_2_1[] = {
>  };
>  const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1);
>  
> +GlobalProperty hw_compat[] = {
> +{ "virtio-blk-device", "queue-size", "256" },
> +{ "virtio-scsi-device", "virtqueue_size", "256" },
> +{ "vhost-scsi-device", "virtqueue_size", "256" },
> +};
> +const size_t hw_compat_len = G_N_ELEMENTS(hw_compat);
> +
>  static char *machine_get_accel(Object *obj, Error **errp)
>  {
>  MachineState *ms = MACHINE(obj);
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 8ad8e885c6..2260a61b1b 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -426,15 +426,27 @@ static void pc_i440fx_machine_options(MachineClass *m)
>  m->default_machine_opts = "firmware=bios-256k.bin";
>  m->default_display = "std";
>  machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
> +compat_props_add(m->compat_props, hw_compat, hw_compat_len);
>  }
>  
> -static void pc_i440fx_4_0_machine_options(MachineClass *m)
> +static void pc_i440fx_4_0_2_machine_options(MachineClass *m)
>  {
>  pc_i440fx_machine_options(m);
>  m->alias = "pc";
>  m->is_default = 1;
>  }
>  
> +DEFINE_I440FX_MACHINE(v4_0_2, "pc-i440fx-4.0.2", NULL,
> +  pc_i440fx_4_0_2_machine_options);
> +
> +static void pc_i440fx_4_0_machine_options(MachineClass *m)
> +{
> +pc_i440fx_4_0_2_machine_options(m);
> +m->alias = NULL;
> +m->is_default = 0;
> +compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
> +}
> +
>  DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL,
>pc_i440fx_4_0_machine_options);
>  
> @@ -443,9 +455,7 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m)
>  PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>  
>  pc_i440fx_4_0_machine_options(m);
> -m->is_default = 0;
>  m->smbus_no_migration_support = true;
> -m->alias = NULL;
>  pcmc->pvh_enabled = false;
>  compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
>  

[PATCH v1 3/4] virtio: increase virtuqueue sizes in new machine types

2019-11-05 Thread Denis Plotnikov
Linux guests submit IO requests no longer than PAGE_SIZE * max_seg
field reported by SCSI controler. Thus typical sequential read with
1 MB size results in the following pattern of the IO from the guest:
  8,16   115754 2.766095122  2071  D   R 2095104 + 1008 [dd]
  8,16   115755 2.766108785  2071  D   R 2096112 + 1008 [dd]
  8,16   115756 2.766113486  2071  D   R 2097120 + 32 [dd]
  8,16   115757 2.767668961 0  C   R 2095104 + 1008 [0]
  8,16   115758 2.768534315 0  C   R 2096112 + 1008 [0]
  8,16   115759 2.768539782 0  C   R 2097120 + 32 [0]
The IO was generated by
  dd if=/dev/sda of=/dev/null bs=1024 iflag=direct

This effectively means that on rotational disks we will observe 3 IOPS
for each 2 MBs processed. This definitely negatively affects both
guest and host IO performance.

The cure is relatively simple - we should report lengthy scatter-gather
ability of the SCSI controller. Fortunately the situation here is very
good. VirtIO transport layer can accomodate 1024 items in one request
while we are using only 128. This situation is present since almost
very beginning. 2 items are dedicated for request metadata thus we
should publish VIRTQUEUE_MAX_SIZE - 2 as max_seg.

The following pattern is observed after the patch:
  8,16   1 9921 2.662721340  2063  D   R 2095104 + 1024 [dd]
  8,16   1 9922 2.662737585  2063  D   R 2096128 + 1024 [dd]
  8,16   1 9923 2.665188167 0  C   R 2095104 + 1024 [0]
  8,16   1 9924 2.665198777 0  C   R 2096128 + 1024 [0]
which is much better.

To fix this particular case, the patch adds new machine types with
extended virtqueue sizes to 256 which also increases max_seg to 254
implicitly.

Suggested-by: Denis V. Lunev 
Signed-off-by: Denis Plotnikov 
---
 hw/core/machine.c   | 14 ++
 hw/i386/pc_piix.c   | 16 +---
 hw/i386/pc_q35.c| 14 --
 include/hw/boards.h |  6 ++
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 55b08f1466..28013a0e3f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -24,6 +24,13 @@
 #include "hw/pci/pci.h"
 #include "hw/mem/nvdimm.h"
 
+GlobalProperty hw_compat_4_0_1[] = {
+{ "virtio-blk-device", "queue-size", "128" },
+{ "virtio-scsi-device", "virtqueue_size", "128" },
+{ "vhost-scsi-device", "virtqueue_size", "128" },
+};
+const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
+
 GlobalProperty hw_compat_4_0[] = {
 { "virtio-balloon-device", "qemu-4-0-config-size", "true" },
 };
@@ -157,6 +164,13 @@ GlobalProperty hw_compat_2_1[] = {
 };
 const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1);
 
+GlobalProperty hw_compat[] = {
+{ "virtio-blk-device", "queue-size", "256" },
+{ "virtio-scsi-device", "virtqueue_size", "256" },
+{ "vhost-scsi-device", "virtqueue_size", "256" },
+};
+const size_t hw_compat_len = G_N_ELEMENTS(hw_compat);
+
 static char *machine_get_accel(Object *obj, Error **errp)
 {
 MachineState *ms = MACHINE(obj);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8ad8e885c6..2260a61b1b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -426,15 +426,27 @@ static void pc_i440fx_machine_options(MachineClass *m)
 m->default_machine_opts = "firmware=bios-256k.bin";
 m->default_display = "std";
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
+compat_props_add(m->compat_props, hw_compat, hw_compat_len);
 }
 
-static void pc_i440fx_4_0_machine_options(MachineClass *m)
+static void pc_i440fx_4_0_2_machine_options(MachineClass *m)
 {
 pc_i440fx_machine_options(m);
 m->alias = "pc";
 m->is_default = 1;
 }
 
+DEFINE_I440FX_MACHINE(v4_0_2, "pc-i440fx-4.0.2", NULL,
+  pc_i440fx_4_0_2_machine_options);
+
+static void pc_i440fx_4_0_machine_options(MachineClass *m)
+{
+pc_i440fx_4_0_2_machine_options(m);
+m->alias = NULL;
+m->is_default = 0;
+compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
+}
+
 DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL,
   pc_i440fx_4_0_machine_options);
 
@@ -443,9 +455,7 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m)
 PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 
 pc_i440fx_4_0_machine_options(m);
-m->is_default = 0;
 m->smbus_no_migration_support = true;
-m->alias = NULL;
 pcmc->pvh_enabled = false;
 compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
 compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 45cc29d1ad..50ccd9ebcf 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -363,14 +363,25 @@ static void pc_q35_machine_options(MachineClass *m)
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);