Re: [PATCH 04/22] machine: move SMP initialization from vl.c

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/21/20 10:56 PM, Paolo Bonzini wrote:

Initialize the object's values from the class when the object is
created, no need to have vl.c do it for us.

Signed-off-by: Paolo Bonzini 
---
  hw/core/machine.c | 7 +++
  softmmu/vl.c  | 7 ---
  2 files changed, 7 insertions(+), 7 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 12/22] vl: move bios_name out of softmmu/vl.c

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/21/20 10:57 PM, Paolo Bonzini wrote:

bios_name is a legacy variable used by machine code.  Hide it
from softmmu/vl.c.

Signed-off-by: Paolo Bonzini 
---
  hw/core/machine.c | 5 +
  softmmu/vl.c  | 2 --
  2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3c674bb05e..e4dac350d4 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -212,6 +212,8 @@ GlobalProperty hw_compat_2_1[] = {
  };
  const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1);
  
+const char *bios_name = NULL;


I expect checkpatch.pl to complain "do not zero-initialize global".


+
  static char *machine_get_kernel(Object *obj, Error **errp)
  {
  MachineState *ms = MACHINE(obj);
@@ -396,6 +398,9 @@ static void machine_set_firmware(Object *obj, const char 
*value, Error **errp)
  
  g_free(ms->firmware);

  ms->firmware = g_strdup(value);
+
+/* HACK */


With a slightly better rationale explanation:
Reviewed-by: Philippe Mathieu-Daudé 


+bios_name = ms->firmware;
  }
  
  static void machine_set_suppress_vmdesc(Object *obj, bool value, Error **errp)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 52e7d317d7..e32e209a82 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -123,7 +123,6 @@ static int data_dir_idx;
  static const char *mem_path;
  static const char *boot_order;
  static const char *boot_once;
-const char *bios_name = NULL;
  enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
  int display_opengl;
  const char* keyboard_layout = NULL;
@@ -4314,7 +4313,6 @@ void qemu_init(int argc, char **argv, char **envp)
  kernel_filename = qemu_opt_get(machine_opts, "kernel");
  initrd_filename = qemu_opt_get(machine_opts, "initrd");
  kernel_cmdline = qemu_opt_get(machine_opts, "append");
-bios_name = qemu_opt_get(machine_opts, "firmware");
  
  opts = qemu_opts_find(qemu_find_opts("boot-opts"), NULL);

  if (opts) {






Re: [PATCH 17/22] vl: load plugins as late as possible

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/21/20 10:57 PM, Paolo Bonzini wrote:

There is no need to load plugins in the middle of default device processing,
move -plugin handling just before preconfig is entered.

Signed-off-by: Paolo Bonzini 
---
  softmmu/vl.c | 15 ---
  1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 8577667b8f..75e57133ad 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -125,6 +125,7 @@ static const char *boot_order;
  static const char *boot_once;
  static const char *incoming;
  static const char *loadvm;
+static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
  enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
  int mem_prealloc; /* force preallocation of physical target memory */
  int display_opengl;
@@ -3064,12 +3065,18 @@ void qemu_finish_machine_init(void)
  MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
  DisplayState *ds;
  
+/* from here on runstate is RUN_STATE_PRELAUNCH */

+


Please move this comment in your patch #11:
"vl: move prelaunch part of qemu_init to a new function"

Then:
Reviewed-by: Philippe Mathieu-Daudé 


  if (machine_class->default_ram_id && current_machine->ram_size &&
  numa_uses_legacy_mem() && !current_machine->ram_memdev_id) {
  create_default_memdev(current_machine, mem_path);
  }
  
-/* from here on runstate is RUN_STATE_PRELAUNCH */

+/* process plugin before CPUs are created, but once -smp has been parsed */
+if (qemu_plugin_load_list(&plugin_list)) {
+exit(1);
+}
+
  machine_run_board_init(current_machine);
  
  /*

@@ -3196,7 +3203,6 @@ void qemu_init(int argc, char **argv, char **envp)
  Error *err = NULL;
  bool have_custom_ram_size;
  BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue);
-QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
  
  qemu_add_opts(&qemu_drive_opts);

  qemu_add_drive_opts(&qemu_legacy_drive_opts);
@@ -4164,11 +4170,6 @@ void qemu_init(int argc, char **argv, char **envp)
 machine_class->default_machine_opts, 0);
  }
  
-/* process plugin before CPUs are created, but once -smp has been parsed */

-if (qemu_plugin_load_list(&plugin_list)) {
-exit(1);
-}
-
  qemu_opts_foreach(qemu_find_opts("device"),
default_driver_check, NULL, NULL);
  qemu_opts_foreach(qemu_find_opts("global"),






Re: [PATCH 20/22] vl: move CHECKPOINT_INIT after preconfig

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/21/20 10:57 PM, Paolo Bonzini wrote:

Move CHECKPOINT_INIT right before the machine initialization is
completed.  Everything before is essentially an extension of
command line parsing.

Signed-off-by: Paolo Bonzini 


Reviewed-by: Philippe Mathieu-Daudé 


---
  softmmu/vl.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 0a6f47e7d6..e9391929f6 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -3194,6 +3194,11 @@ void qemu_finish_machine_init(void)
  exit(1);
  }
  
+/* This checkpoint is required by replay to separate prior clock

+   reading from the other reads, because timer polling functions query
+   clock values from the log. */
+replay_checkpoint(CHECKPOINT_INIT);
+
  machine_run_board_init(current_machine);
  
  /*

@@ -4424,11 +4429,6 @@ void qemu_init(int argc, char **argv, char **envp)
  if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
  exit(1);
  
-/* This checkpoint is required by replay to separate prior clock

-   reading from the other reads, because timer polling functions query
-   clock values from the log. */
-replay_checkpoint(CHECKPOINT_INIT);
-
  current_machine->boot_order = boot_order;
  
  /* parse features once if machine provides default cpu_type */







Re: [PATCH v3 2/2] hw/block/nvme: add the dataset management command

2020-10-22 Thread Klaus Jensen
On Oct 21 17:59, Keith Busch wrote:
> On Thu, Oct 22, 2020 at 12:17:36AM +0200, Klaus Jensen wrote:
> > +static void nvme_aio_discard_cb(void *opaque, int ret)
> > +{
> > +NvmeRequest *req = opaque;
> > +int *discards = req->opaque;
> > +
> > +trace_pci_nvme_aio_discard_cb(nvme_cid(req));
> > +
> > +if (ret) {
> > +req->status = NVME_INTERNAL_DEV_ERROR;
> > +trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret),
> > +   req->status);
> > +}
> > +
> > +if (discards && --(*discards) > 0) {
> > +return;
> > +}
> > +
> > +g_free(req->opaque);
> > +req->opaque = NULL;
> > +
> > +nvme_enqueue_req_completion(nvme_cq(req), req);
> > +}
> > +
> > +static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
> > +{
> > +NvmeNamespace *ns = req->ns;
> > +NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
> > +NvmeDsmRange *range = NULL;
> > +int *discards = NULL;
> > +
> > +uint32_t attr = le32_to_cpu(dsm->attributes);
> > +uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
> > +
> > +uint16_t status = NVME_SUCCESS;
> > +
> > +trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr);
> > +
> > +if (attr & NVME_DSMGMT_AD) {
> > +int64_t offset;
> > +size_t len;
> > +
> > +range = g_new(NvmeDsmRange, nr);
> > +
> > +status = nvme_dma(n, (uint8_t *)range, nr * sizeof(NvmeDsmRange),
> > +  DMA_DIRECTION_TO_DEVICE, req);
> > +if (status) {
> > +goto out;
> > +}
> > +
> > +discards = g_new0(int, 1);
> > +req->opaque = discards;
> 
> I think you need to initialize discards to 1 so that this function is
> holding a reference on it while the asynchronous part is running.
> Otherwise, the callback may see 'discards' at 0 and free it while we're
> trying to discard the next range.
> 

Yes - you are right. The callback might actually get called immediately
in some cases.

> > +
> > +for (int i = 0; i < nr; i++) {
> > +uint64_t slba = le64_to_cpu(range[i].slba);
> > +uint32_t nlb = le32_to_cpu(range[i].nlb);
> > +
> > +if (nvme_check_bounds(n, ns, slba, nlb)) {
> > +trace_pci_nvme_err_invalid_lba_range(slba, nlb,
> > + ns->id_ns.nsze);
> > +continue;
> > +}
> > +
> > +trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), 
> > slba,
> > +  nlb);
> > +
> > +offset = nvme_l2b(ns, slba);
> > +len = nvme_l2b(ns, nlb);
> > +
> > +while (len) {
> > +size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
> > +
> > +blk_aio_pdiscard(ns->blkconf.blk, offset, bytes,
> > + nvme_aio_discard_cb, req);
> > +
> > +(*discards)++;
> 
> The increment ought to be before the aio call so that the _cb can't see
> the value before it's accounted for. 
> 

Right. Same issue as above.

> > +
> > +offset += bytes;
> > +len -= bytes;
> > +}
> > +}
> > +
> > +if (*discards) {

And then we decrement here before checking for 0.

Thanks, looks better now.

> > +status = NVME_NO_COMPLETE;
> > +} else {
> > +g_free(discards);
> > +req->opaque = NULL;
> > +}
> > +}
> > +
> > +out:
> > +g_free(range);
> > +
> > +return status;
> > +}
> > +
> >  static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
> >  {
> >  block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
> > @@ -1088,6 +1183,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
> > *req)
> >  case NVME_CMD_WRITE:
> >  case NVME_CMD_READ:
> >  return nvme_rw(n, req);
> > +case NVME_CMD_DSM:
> > +return nvme_dsm(n, req);
> >  default:
> >  trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
> >  return NVME_INVALID_OPCODE | NVME_DNR;
> > @@ -2810,7 +2907,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> > *pci_dev)
> >  id->cqes = (0x4 << 4) | 0x4;
> >  id->nn = cpu_to_le32(n->num_namespaces);
> >  id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
> > -   NVME_ONCS_FEATURES);
> > +   NVME_ONCS_FEATURES | NVME_ONCS_DSM);
> 
> I think we should set ID_NS.NDPG and NDPA since there really is a
> preferred granularity and alignment.
> 

Yeah. I had some issues with doing this reliably. But I think I nailed
it, see my v4.

> >  
> >  id->vwc = 0x1;
> >  id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
> > -- 
> > 2.28.0
> > 

-- 
One of us - No more doubt, silence or taboo about mental illness.


signature.asc
Description: PGP signature


[PATCH v4 0/2] hw/block/nvme: dulbe and dsm support

2020-10-22 Thread Klaus Jensen
From: Klaus Jensen 

This adds support for the Deallocated or Unwritten Logical Block error
recovery feature as well as the Dataset Management command.

I wanted to add support for the NPDG and NPDA fields such that the host
could get a hint on how many blocks to request deallocation of for the
deallocation to actually happen, but I cannot find a realiable way to
get the actual block size of the underlying device. If it is an image on
a file system we could typically use the host page size, but if it is a
raw device, we might have 512 byte sectors that we can issue discards
on. And QEMU doesn't seem to provide this without root privileges at
least.

See the two patches for some gotchas.

I also integrated this into my zoned proposal. I'll spare you the v4, nobody
cares anyway. But I put it in my repo[1] for posterity.

  [1]: https://irrelevant.dk/g/pci-nvme.git/tag/?h=zoned-v4.

v4:
  - Removed mixed declaration and code (Keith)
  - Set NPDG and NPDA and account for the blockdev cluster size.

Klaus Jensen (2):
  hw/block/nvme: add dulbe support
  hw/block/nvme: add the dataset management command

 hw/block/nvme-ns.h|   4 +
 hw/block/nvme.h   |   2 +
 include/block/nvme.h  |  12 ++-
 hw/block/nvme-ns.c|  40 +++--
 hw/block/nvme.c   | 183 +-
 hw/block/trace-events |   4 +
 6 files changed, 236 insertions(+), 9 deletions(-)

-- 
2.28.0




[PATCH v4 1/2] hw/block/nvme: add dulbe support

2020-10-22 Thread Klaus Jensen
From: Klaus Jensen 

Add support for reporting the Deallocated or Unwritten Logical Block
Error (DULBE).

Rely on the block status flags reported by the block layer and consider
any block with the BDRV_BLOCK_ZERO flag to be deallocated.

Multiple factors affect when a Write Zeroes command result in
deallocation of blocks.

  * the underlying file system block size
  * the blockdev format
  * the 'discard' and 'logical_block_size' parameters

 format | discard | wz (512b)  wz (4kb)  wz (64kb)
---
  qcow2ignore   n  n y
  qcow2unmapn  n y
  raw  ignore   n  y y
  raw  unmapn  y y

So, this works best with an image in raw format and 4kb LBAs, since
holes can then be punched on a per-block basis (this assumes a file
system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
of 64kb by default and blocks will only be marked deallocated if a full
cluster is zeroed or discarded. However, this *is* consistent with the
spec since Write Zeroes "should" deallocate the block if the Deallocate
attribute is set and "may" deallocate if the Deallocate attribute is not
set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
always set).

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/nvme-ns.h|  4 +++
 include/block/nvme.h  |  5 +++
 hw/block/nvme-ns.c|  8 +++--
 hw/block/nvme.c   | 83 +--
 hw/block/trace-events |  4 +++
 5 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 83734f4606e1..44bf6271b744 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
 NvmeIdNs id_ns;
 
 NvmeNamespaceParams params;
+
+struct {
+uint32_t err_rec;
+} features;
 } NvmeNamespace;
 
 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 8a46d9cf015f..966c3bb304bd 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -687,6 +687,7 @@ enum NvmeStatusCodes {
 NVME_E2E_REF_ERROR  = 0x0284,
 NVME_CMP_FAILURE= 0x0285,
 NVME_ACCESS_DENIED  = 0x0286,
+NVME_DULB   = 0x0287,
 NVME_MORE   = 0x2000,
 NVME_DNR= 0x4000,
 NVME_NO_COMPLETE= 0x,
@@ -903,6 +904,9 @@ enum NvmeIdCtrlLpa {
 #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
 #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
 
+#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
+#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
+
 enum NvmeFeatureIds {
 NVME_ARBITRATION= 0x1,
 NVME_POWER_MANAGEMENT   = 0x2,
@@ -1023,6 +1027,7 @@ enum NvmeNsIdentifierType {
 
 
 #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
+#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
 #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
 #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
 #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 31c80cdf5b5f..f1cc734c60f5 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
 NvmeIdNs *id_ns = &ns->id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 
-if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
-ns->id_ns.dlfeat = 0x9;
-}
+ns->id_ns.dlfeat = 0x9;
 
 id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
 
@@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
 /* no thin provisioning */
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
+
+/* support DULBE */
+id_ns->nsfeat |= 0x4;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -92,6 +93,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 }
 
 nvme_ns_init(ns);
+
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
 }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2896bb49b9c0..68fa5aa0b1c0 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 
 static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_TEMPERATURE_THRESHOLD]= NVME_FEAT_CAP_CHANGE,
+[NVME_ERROR_RECOVERY]   = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
 [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
@@ -878,6 +879,41 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, 
NvmeNamespace *ns,
 return NVME_SUCCESS;
 }
 
+static u

[PATCH v5 2/6] [RISCV_PM] Support CSRs required for RISC-V PM extension except for ones in hypervisor mode

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c  |   3 +
 target/riscv/cpu.h  |  12 ++
 target/riscv/cpu_bits.h |  66 ++
 target/riscv/csr.c  | 271 
 4 files changed, 352 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 4e305249b3..db72f5cf59 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -438,6 +438,9 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 if (cpu->cfg.ext_h) {
 target_misa |= RVH;
 }
+if (cpu->cfg.ext_j) {
+env->mmte |= PM_EXT_INITIAL;
+}
 if (cpu->cfg.ext_v) {
 target_misa |= RVV;
 if (!is_power_of_2(cpu->cfg.vlen)) {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index eca611a367..c236f01fff 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -226,6 +226,18 @@ struct CPURISCVState {
 
 /* True if in debugger mode.  */
 bool debugger;
+
+/*
+ * CSRs for PM
+ * TODO: move these csr to appropriate groups
+ */
+target_ulong mmte;
+target_ulong mpmmask;
+target_ulong mpmbase;
+target_ulong spmmask;
+target_ulong spmbase;
+target_ulong upmmask;
+target_ulong upmbase;
 #endif
 
 float_status fp_status;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index bd36062877..84c93c77ae 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -354,6 +354,21 @@
 #define CSR_MHPMCOUNTER30H  0xb9e
 #define CSR_MHPMCOUNTER31H  0xb9f
 
+/* Custom user register */
+#define CSR_UMTE0x8c0
+#define CSR_UPMMASK 0x8c1
+#define CSR_UPMBASE 0x8c2
+
+/* Custom machine register */
+#define CSR_MMTE0x7c0
+#define CSR_MPMMASK 0x7c1
+#define CSR_MPMBASE 0x7c2
+
+/* Custom supervisor register */
+#define CSR_SMTE0x9c0
+#define CSR_SPMMASK 0x9c1
+#define CSR_SPMBASE 0x9c2
+
 /* Legacy Machine Protection and Translation (priv v1.9.1) */
 #define CSR_MBASE   0x380
 #define CSR_MBOUND  0x381
@@ -604,4 +619,55 @@
 #define MIE_UTIE   (1 << IRQ_U_TIMER)
 #define MIE_SSIE   (1 << IRQ_S_SOFT)
 #define MIE_USIE   (1 << IRQ_U_SOFT)
+
+/* general mte CSR bits*/
+#define PM_ENABLE   0x0001ULL
+#define PM_CURRENT  0x0002ULL
+#define PM_XS_MASK  0x0003ULL
+
+/* PM XS bits values */
+#define PM_EXT_DISABLE  0xULL
+#define PM_EXT_INITIAL  0x0001ULL
+#define PM_EXT_CLEAN0x0002ULL
+#define PM_EXT_DIRTY0x0003ULL
+
+/* offsets for every pair of control bits per each priv level */
+#define XS_OFFSET0ULL
+#define U_OFFSET 2ULL
+#define S_OFFSET 4ULL
+#define M_OFFSET 6ULL
+
+#define PM_XS_BITS   (PM_XS_MASK << XS_OFFSET)
+#define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
+#define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
+#define S_PM_ENABLE  (PM_ENABLE  << S_OFFSET)
+#define S_PM_CURRENT (PM_CURRENT << S_OFFSET)
+#define M_PM_ENABLE  (PM_ENABLE  << M_OFFSET)
+
+/* mmte CSR bits */
+#define MMTE_PM_XS_BITS PM_XS_BITS
+#define MMTE_U_PM_ENABLEU_PM_ENABLE
+#define MMTE_U_PM_CURRENT   U_PM_CURRENT
+#define MMTE_S_PM_ENABLES_PM_ENABLE
+#define MMTE_S_PM_CURRENT   S_PM_CURRENT
+#define MMTE_M_PM_ENABLEM_PM_ENABLE
+#define MMTE_MASK   (MMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | \
+ MMTE_S_PM_ENABLE | MMTE_S_PM_CURRENT | \
+ MMTE_M_PM_ENABLE | MMTE_PM_XS_BITS)
+
+/* smte CSR bits */
+#define SMTE_PM_XS_BITS PM_XS_BITS
+#define SMTE_U_PM_ENABLEU_PM_ENABLE
+#define SMTE_U_PM_CURRENT   U_PM_CURRENT
+#define SMTE_S_PM_ENABLES_PM_ENABLE
+#define SMTE_S_PM_CURRENT   S_PM_CURRENT
+#define SMTE_MASK   (SMTE_U_PM_ENABLE | SMTE_U_PM_CURRENT | \
+ SMTE_S_PM_ENABLE | SMTE_S_PM_CURRENT | \
+ SMTE_PM_XS_BITS)
+
+/* umte CSR bits */
+#define UMTE_U_PM_ENABLEU_PM_ENABLE
+#define UMTE_U_PM_CURRENT   U_PM_CURRENT
+#define UMTE_MASK   (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT)
+
 #endif
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index aaef6c6f20..e4839c8fc9 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -140,6 +140,11 @@ static int any(CPURISCVState *env, int csrno)
 return 0;
 }
 
+static int umode(CPURISCVState *env, int csrno)
+{
+return -!riscv_has_ext(env, RVU);
+}
+
 static int smode(CPURISCVState *env, int csrno)
 {
 return -!riscv_has_ext(env, RVS);
@@ -1250,6 +1255,257 @@ static int write_pmpaddr(CPURISCVState *env, int csrno, 
target_ulong val)
 return 0;
 }
 
+/*
+ * Functions to access Pointer Masking feature registers
+ * We have to check if current priv lvl could modify
+ * csr in given mode
+ */
+static int check_pm_current_disabled(CPURISCVState *env, int csrno)
+{
+int csr_priv = get_field(csrno, 0xC00);
+/*
+ * If priv l

[PATCH v5 4/6] [RISCV_PM] Support pointer masking for RISC-V for i/c/f/d/a types of instructions

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/insn_trans/trans_rva.c.inc |  3 +++
 target/riscv/insn_trans/trans_rvd.c.inc |  2 ++
 target/riscv/insn_trans/trans_rvf.c.inc |  2 ++
 target/riscv/insn_trans/trans_rvi.c.inc |  2 ++
 target/riscv/translate.c| 14 ++
 5 files changed, 23 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rva.c.inc 
b/target/riscv/insn_trans/trans_rva.c.inc
index be8a9f06dd..5559e347ba 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -26,6 +26,7 @@ static inline bool gen_lr(DisasContext *ctx, arg_atomic *a, 
MemOp mop)
 if (a->rl) {
 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
 }
+gen_pm_adjust_address(ctx, src1, src1);
 tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
 if (a->aq) {
 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
@@ -46,6 +47,7 @@ static inline bool gen_sc(DisasContext *ctx, arg_atomic *a, 
MemOp mop)
 TCGLabel *l2 = gen_new_label();
 
 gen_get_gpr(src1, a->rs1);
+gen_pm_adjust_address(ctx, src1, src1);
 tcg_gen_brcond_tl(TCG_COND_NE, load_res, src1, l1);
 
 gen_get_gpr(src2, a->rs2);
@@ -91,6 +93,7 @@ static bool gen_amo(DisasContext *ctx, arg_atomic *a,
 gen_get_gpr(src1, a->rs1);
 gen_get_gpr(src2, a->rs2);
 
+gen_pm_adjust_address(ctx, src1, src1);
 (*func)(src2, src1, src2, ctx->mem_idx, mop);
 
 gen_set_gpr(a->rd, src2);
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc 
b/target/riscv/insn_trans/trans_rvd.c.inc
index 4f832637fa..935342f66d 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -25,6 +25,7 @@ static bool trans_fld(DisasContext *ctx, arg_fld *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEQ);
 
@@ -40,6 +41,7 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEQ);
 
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc 
b/target/riscv/insn_trans/trans_rvf.c.inc
index 3dfec8211d..04b3c3eb3d 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -30,6 +30,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL);
 gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
@@ -47,6 +48,7 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
 gen_get_gpr(t0, a->rs1);
 
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUL);
 
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index d04ca0394c..bee7f6be46 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -141,6 +141,7 @@ static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp 
memop)
 TCGv t1 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, memop);
 gen_set_gpr(a->rd, t1);
@@ -180,6 +181,7 @@ static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp 
memop)
 TCGv dat = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 gen_get_gpr(dat, a->rs2);
 
 tcg_gen_qemu_st_tl(dat, t0, ctx->mem_idx, memop);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 79dca2291b..a7cbf909f3 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,16 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
 tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
 }
 
+/*
+ * Temp stub: generates address adjustment for PointerMasking
+ */
+static void gen_pm_adjust_address(DisasContext *s,
+  TCGv_i64  dst,
+  TCGv_i64  src)
+{
+tcg_gen_mov_i64(dst, src);
+}
+
 /*
  * A narrow n-bit operation, where n < FLEN, checks that input operands
  * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
@@ -380,6 +390,7 @@ static void gen_load_c(DisasContext *ctx, uint32_t opc, int 
rd, int rs1,
 TCGv t1 = tcg_temp_new();
 gen_get_gpr(t0, rs1);
 tcg_gen_addi_tl(t0, t0, imm);
+gen_pm_adjust_address(ctx, t0, t0);
 int memop = tcg_memop_lookup[(opc >> 12) & 0x7];
 
 if (memop < 0) {
@@ -400,6 +411,7 @@ static void gen_store_c(DisasContext *ctx, uint32_t opc, 

[PATCH v5 1/6] [RISCV_PM] Add J-extension into RISC-V

2020-10-22 Thread Alexey Baturo
---
 target/riscv/cpu.c | 1 +
 target/riscv/cpu.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0bbfd7f457..4e305249b3 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -516,6 +516,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 /* This is experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
+DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
 DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index de275782e6..eca611a367 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -66,6 +66,7 @@
 #define RVS RV('S')
 #define RVU RV('U')
 #define RVH RV('H')
+#define RVJ RV('J')
 
 /* S extension denotes that Supervisor mode exists, however it is possible
to have a core that support S mode but does not have an MMU and there
@@ -277,6 +278,7 @@ struct RISCVCPU {
 bool ext_s;
 bool ext_u;
 bool ext_h;
+bool ext_j;
 bool ext_v;
 bool ext_counters;
 bool ext_ifencei;
-- 
2.20.1




[PATCH v5 6/6] [RISCV_PM] Allow experimental J-ext to be turned on

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1c00d9ea26..56633c14eb 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -465,6 +465,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 if (cpu->cfg.ext_j) {
 env->mmte |= PM_EXT_INITIAL;
+target_misa |= RVJ;
 }
 if (cpu->cfg.ext_v) {
 target_misa |= RVV;
-- 
2.20.1




[PATCH v5 5/6] [RISCV_PM] Implement address masking functions required for RISC-V Pointer Masking extension

2020-10-22 Thread Alexey Baturo
From: Anatoly Parshintsev 

Signed-off-by: Anatoly Parshintsev 
---
 target/riscv/cpu.h   | 19 +++
 target/riscv/translate.c | 34 --
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c236f01fff..13accaa232 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -386,6 +386,7 @@ FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1)
 FIELD(TB_FLAGS, LMUL, 3, 2)
 FIELD(TB_FLAGS, SEW, 5, 3)
 FIELD(TB_FLAGS, VILL, 8, 1)
+FIELD(TB_FLAGS, PM_ENABLED, 9, 1)
 
 /*
  * A simplification for VLMAX
@@ -432,6 +433,24 @@ static inline void cpu_get_tb_cpu_state(CPURISCVState 
*env, target_ulong *pc,
 if (riscv_cpu_fp_enabled(env)) {
 flags |= env->mstatus & MSTATUS_FS;
 }
+if (riscv_has_ext(env, RVJ)) {
+int priv = cpu_mmu_index(env, false);
+bool pm_enabled = false;
+switch (priv) {
+case PRV_U:
+pm_enabled = env->mmte & U_PM_ENABLE;
+break;
+case PRV_S:
+pm_enabled = env->mmte & S_PM_ENABLE;
+break;
+case PRV_M:
+pm_enabled = env->mmte & M_PM_ENABLE;
+break;
+default:
+g_assert_not_reached();
+}
+flags = FIELD_DP32(flags, TB_FLAGS, PM_ENABLED, pm_enabled);
+}
 #endif
 *pflags = flags;
 }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a7cbf909f3..b3e7b93bc9 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -36,6 +36,9 @@ static TCGv cpu_gpr[32], cpu_pc, cpu_vl;
 static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
 static TCGv load_res;
 static TCGv load_val;
+/* globals for PM CSRs */
+static TCGv pm_mask[4];
+static TCGv pm_base[4];
 
 #include "exec/gen-icount.h"
 
@@ -63,6 +66,10 @@ typedef struct DisasContext {
 uint16_t vlen;
 uint16_t mlen;
 bool vl_eq_vlmax;
+/* PointerMasking extension */
+bool pm_enabled;
+TCGv pm_mask;
+TCGv pm_base;
 } DisasContext;
 
 #ifdef TARGET_RISCV64
@@ -102,13 +109,19 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
 }
 
 /*
- * Temp stub: generates address adjustment for PointerMasking
+ * Generates address adjustment for PointerMasking
  */
 static void gen_pm_adjust_address(DisasContext *s,
   TCGv_i64  dst,
   TCGv_i64  src)
 {
-tcg_gen_mov_i64(dst, src);
+if (!s->pm_enabled) {
+/* Load unmodified address */
+tcg_gen_mov_i64(dst, src);
+} else {
+tcg_gen_andc_i64(dst, src, s->pm_mask);
+tcg_gen_or_i64(dst, dst, s->pm_base);
+}
 }
 
 /*
@@ -826,6 +839,10 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
 ctx->mlen = 1 << (ctx->sew  + 3 - ctx->lmul);
 ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
+ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED);
+int priv = cpu_mmu_index(env, false);
+ctx->pm_mask = pm_mask[priv];
+ctx->pm_base = pm_base[priv];
 }
 
 static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -945,4 +962,17 @@ void riscv_translate_init(void)
  "load_res");
 load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
  "load_val");
+/* Assign PM CSRs to tcg globals */
+pm_mask[PRV_U] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmmask), "upmmask");
+pm_base[PRV_U] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmbase), "upmbase");
+pm_mask[PRV_S] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmmask), "spmmask");
+pm_base[PRV_S] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmbase), "spmbase");
+pm_mask[PRV_M] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmmask), "mpmmask");
+pm_base[PRV_M] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmbase), "mpmbase");
 }
-- 
2.20.1




[PATCH v5 3/6] [RISCV_PM] Print new PM CSRs in QEMU logs

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index db72f5cf59..1c00d9ea26 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -255,6 +255,31 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "htval ", env->htval);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mtval2 ", env->mtval2);
 }
+if (riscv_has_ext(env, RVJ)) {
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mmte", env->mmte);
+switch (env->priv) {
+case PRV_U:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "upmbase ",
+ env->upmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "upmmask ",
+ env->upmmask);
+break;
+case PRV_S:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "spmbase ",
+ env->spmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "spmmask ",
+ env->spmmask);
+break;
+case PRV_M:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mpmbase ",
+ env->mpmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mpmmask ",
+ env->mpmmask);
+break;
+default:
+assert(0 && "Unreachable");
+}
+}
 #endif
 
 for (i = 0; i < 32; i++) {
-- 
2.20.1




[PATCH v4 2/2] hw/block/nvme: add the dataset management command

2020-10-22 Thread Klaus Jensen
From: Klaus Jensen 

Add support for the Dataset Management command and the Deallocate
attribute. Deallocation results in discards being sent to the underlying
block device. Whether of not the blocks are actually deallocated is
affected by the same factors as Write Zeroes (see previous commit).

 format | discard | dsm (512b)  dsm (4kb)  dsm (64kb)
--
  qcow2ignore   n   n  n
  qcow2unmapn   n  y
  raw  ignore   n   n  n
  raw  unmapn   y  y

Again, a raw format and 4kb LBAs are preferable.

In order to set the Namespace Preferred Deallocate Granularity and
Alignment fields (NPDG and NPDA), choose a sane minimum discard
granularity of 4kb. If we are using a passthru device supporting discard
at a 512b granularity, user should set the discard_granularity property
explicitly. NPDG and NPDA will also account for the cluster_size of the
block driver if required (i.e. for QCOW2).

See NVM Express 1.3d, Section 6.7 ("Dataset Management command").

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.h  |   2 +
 include/block/nvme.h |   7 ++-
 hw/block/nvme-ns.c   |  36 ++--
 hw/block/nvme.c  | 100 ++-
 4 files changed, 139 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e080a2318a50..574333caa3f9 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -28,6 +28,7 @@ typedef struct NvmeRequest {
 struct NvmeNamespace*ns;
 BlockAIOCB  *aiocb;
 uint16_tstatus;
+void*opaque;
 NvmeCqe cqe;
 NvmeCmd cmd;
 BlockAcctCookie acct;
@@ -60,6 +61,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
 case NVME_CMD_WRITE:return "NVME_NVM_CMD_WRITE";
 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+case NVME_CMD_DSM:  return "NVME_NVM_CMD_DSM";
 default:return "NVME_NVM_CMD_UNKNOWN";
 }
 }
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 966c3bb304bd..e95ff6ca9b37 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -990,7 +990,12 @@ typedef struct QEMU_PACKED NvmeIdNs {
 uint16_tnabspf;
 uint16_tnoiob;
 uint8_t nvmcap[16];
-uint8_t rsvd64[40];
+uint16_tnpwg;
+uint16_tnpwa;
+uint16_tnpdg;
+uint16_tnpda;
+uint16_tnows;
+uint8_t rsvd74[30];
 uint8_t nguid[16];
 uint64_teui64;
 NvmeLBAFlbaf[16];
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index f1cc734c60f5..840651db7256 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -28,10 +28,14 @@
 #include "nvme.h"
 #include "nvme-ns.h"
 
-static void nvme_ns_init(NvmeNamespace *ns)
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 {
+BlockDriverInfo bdi;
 NvmeIdNs *id_ns = &ns->id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+int npdg, ret;
 
 ns->id_ns.dlfeat = 0x9;
 
@@ -43,8 +47,25 @@ static void nvme_ns_init(NvmeNamespace *ns)
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
 
-/* support DULBE */
-id_ns->nsfeat |= 0x4;
+/* support DULBE and I/O optimization fields */
+id_ns->nsfeat |= (0x4 | 0x10);
+
+npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
+
+ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "could not get block driver info");
+return ret;
+}
+
+if (bdi.cluster_size &&
+bdi.cluster_size > ns->blkconf.discard_granularity) {
+npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
+}
+
+id_ns->npda = id_ns->npdg = npdg - 1;
+
+return 0;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -59,6 +80,11 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, 
Error **errp)
 return -1;
 }
 
+if (ns->blkconf.discard_granularity == -1) {
+ns->blkconf.discard_granularity =
+MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+}
+
 ns->size = blk_getlength(ns->blkconf.blk);
 if (ns->size < 0) {
 error_setg_errno(errp, -ns->size, "could not get blockdev size");
@@ -92,7 +118,9 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 return -1;
 }
 
-nvme_ns_init(ns);
+if (nvme_ns_init(ns, errp)) {
+return -1;
+}
 
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 68fa5aa0b1c0..8b3f4d24968b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -95

Re: [PATCH v26 05/17] vfio: Add VM state change handler to know state of VM

2020-10-22 Thread Cornelia Huck
On Wed, 21 Oct 2020 11:03:23 +0530
Kirti Wankhede  wrote:

> On 10/20/2020 4:21 PM, Cornelia Huck wrote:
> > On Sun, 18 Oct 2020 01:54:56 +0530
> > Kirti Wankhede  wrote:
> >   
> >> On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote:  
> >>> * Cornelia Huck (coh...@redhat.com) wrote:  
>  On Wed, 23 Sep 2020 04:54:07 +0530
>  Kirti Wankhede  wrote:

> > +static void vfio_vmstate_change(void *opaque, int running, RunState 
> > state)
> > +{
> > +VFIODevice *vbasedev = opaque;
> > +
> > +if ((vbasedev->vm_running != running)) {
> > +int ret;
> > +uint32_t value = 0, mask = 0;
> > +
> > +if (running) {
> > +value = VFIO_DEVICE_STATE_RUNNING;
> > +if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
> > +mask = ~VFIO_DEVICE_STATE_RESUMING;  
> 
>  I've been staring at this for some time and I think that the desired
>  result is
>  - set _RUNNING
>  - if _RESUMING was set, clear it, but leave the other bits intact  
> >>
> >> Upto here, you're correct.
> >>  
>  - if _RESUMING was not set, clear everything previously set
>  This would really benefit from a comment (or am I the only one
>  struggling here?)
>  
> >>
> >> Here mask should be ~0. Correcting it.  
> > 
> > Hm, now I'm confused. With value == _RUNNING, ~_RUNNING and ~0 as mask
> > should be equivalent, shouldn't they?
> >   
> 
> I too got confused after reading your comment.
> Lets walk through the device states and transitions can happen here:
> 
> if running
>   - device state could be either _SAVING or _RESUMING or _STOP. Both 
> _SAVING and _RESUMING can't be set at a time, that is the error state. 
> _STOP means 0.
>   - Transition from _SAVING to _RUNNING can happen if there is migration 
> failure, in that case we have to clear _SAVING
> - Transition from _RESUMING to _RUNNING can happen on resuming and we 
> have to clear _RESUMING.
> - In both the above cases, we have to set _RUNNING and clear rest 2 bits.
> Then:
> mask = ~VFIO_DEVICE_STATE_MASK;
> value = VFIO_DEVICE_STATE_RUNNING;

ok

> 
> if !running
> - device state could be either _RUNNING or _SAVING|_RUNNING. Here we 
> have to reset running bit.
> Then:
> mask = ~VFIO_DEVICE_STATE_RUNNING;
> value = 0;

ok

> 
> I'll add comment in the code above.

That will help.

I'm a bit worried though that all that reasoning which flags are set or
cleared when is quite complex, and it's easy to make mistakes.

Can we model this as a FSM, where an event (running state changes)
transitions the device state from one state to another? I (personally)
find FSMs easier to comprehend, but I'm not sure whether that change
would be too invasive. If others can parse the state changes with that
mask/value interface, I won't object to it.

> 
> 
> >>
> >>  
> > +}
> > +} else {
> > +mask = ~VFIO_DEVICE_STATE_RUNNING;
> > +}




[PATCH v5 0/6] RISC-V Pointer Masking implementation

2020-10-22 Thread Alexey Baturo
Hi,

Addressing Alistair comment: J-ext enabling patch is now the last one in the 
series.

Thanks

Alexey Baturo (5):
  [RISCV_PM] Add J-extension into RISC-V
  [RISCV_PM] Support CSRs required for RISC-V PM extension except for
ones in hypervisor mode
  [RISCV_PM] Print new PM CSRs in QEMU logs
  [RISCV_PM] Support pointer masking for RISC-V for i/c/f/d/a types of
instructions
  [RISCV_PM] Allow experimental J-ext to be turned on

Anatoly Parshintsev (1):
  [RISCV_PM] Implement address masking functions required for RISC-V
Pointer Masking extension

 target/riscv/cpu.c  |  30 +++
 target/riscv/cpu.h  |  33 +++
 target/riscv/cpu_bits.h |  66 ++
 target/riscv/csr.c  | 271 
 target/riscv/insn_trans/trans_rva.c.inc |   3 +
 target/riscv/insn_trans/trans_rvd.c.inc |   2 +
 target/riscv/insn_trans/trans_rvf.c.inc |   2 +
 target/riscv/insn_trans/trans_rvi.c.inc |   2 +
 target/riscv/translate.c|  44 
 9 files changed, 453 insertions(+)

-- 
2.20.1




Re: [PATCH] migration/block-dirty-bitmap: fix larger granularity bitmaps

2020-10-22 Thread Stefan Reiter

On 10/21/20 5:17 PM, Vladimir Sementsov-Ogievskiy wrote:

21.10.2020 17:44, Stefan Reiter wrote:

sectors_per_chunk is a 64 bit integer, but the calculation is done in 32
bits, leading to an overflow for coarse bitmap granularities.

If that results in the value 0, it leads to a hang where no progress is
made but send_bitmap_bits is constantly called with nr_sectors being 0.

Signed-off-by: Stefan Reiter 
---
  migration/block-dirty-bitmap.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/migration/block-dirty-bitmap.c 
b/migration/block-dirty-bitmap.c

index 5bef793ac0..5398869e2b 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -562,8 +562,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,

  dbms->bitmap_alias = g_strdup(bitmap_alias);
  dbms->bitmap = bitmap;
  dbms->total_sectors = bdrv_nb_sectors(bs);
-    dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+    dbms->sectors_per_chunk = CHUNK_SIZE * 8lu *


I'd prefer 8llu for absolute safety.


  bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+    assert(dbms->sectors_per_chunk != 0);


I doubt that we need this assertion. Bug fixed, and it's obviously 
impossible.
And if we really want to assert that there is no overflow (assuming 
future changes),

it should look like this:

   assert(bdrv_dirty_bitmap_granularity(bitmap) < (1ull << 63) / 
CHUNK_SIZE / 8 >> BDRV_SECTOR_BITS);


to cover not only corner case but any overflow.. And of course we should 
modify original expression

to do ">> BDRV_SECTOR_BITS" earlier than all multiplies, like

   dbms->sectors_per_chunk = CHUNK_SIZE * 8llu * 
(bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS);



But I think that only s/8/8ull/ change is enough.



I agree, and I wouldn't mind removing the assert, but just to clarify it 
was mostly meant to prevent the case where the migration gets stuck 
entirely. Even if the calculation is wrong, it would at least do 
_something_ instead of endlessly looping.


Maybe an

assert(nr_sectors != 0);

in send_bitmap_bits instead for that?


  if (bdrv_dirty_bitmap_enabled(bitmap)) {
  dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
  }




With 8llu and with or without assertion:
Reviewed-by: Vladimir Sementsov-Ogievskiy 






Re: [PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-22 Thread Andrey Shinkevich



On 21.10.2020 23:43, Andrey Shinkevich wrote:

On 14.10.2020 18:22, Vladimir Sementsov-Ogievskiy wrote:

14.10.2020 15:51, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---


[...]


diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn 
bdrv_aligned_preadv(BdrvChild *child,

  max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
  if (bytes <= max_bytes && bytes <= max_transfer) {
-    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 
qiov_offset, 0);

+    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);



When BDRV_REQ_PREFETCH is passed, qiov may be (and generally should 
be) NULL. This means, that we can't just drop the flag when call the 
driver that doesn't support it.


Actually, if driver doesn't support the PREFETCH flag we should do 
nothing.





Ah, OK.  I see.  I expected this to be a separate patch.  I still wonder
why it isn’t.




Could it be part of patch 07? I mean introduce new field 
supported_read_flags and handle it in generic code in one patch, prior 
to implementing support for it in COR driver.





We have to add the supported flags for the COR driver in the same patch. 
Or before handling the supported_read_flags at the generic layer 
(handling zero does not make a sence). Otherwise, the test #216 (where 
the COR-filter is applied) will not pass.


Andrey


I have found a workaround and am going to send all the related patches 
as a separate series.


Andrey



Re: [PATCH v5 0/6] RISC-V Pointer Masking implementation

2020-10-22 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20201022074309.3210-1-space.monkey.deliv...@gmail.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20201022074309.3210-1-space.monkey.deliv...@gmail.com
Subject: [PATCH v5 0/6] RISC-V Pointer Masking implementation

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20201021163136.27324-1-alex.ben...@linaro.org -> 
patchew/20201021163136.27324-1-alex.ben...@linaro.org
 - [tag update]  patchew/20201021212721.440373-1-pet...@redhat.com -> 
patchew/20201021212721.440373-1-pet...@redhat.com
 * [new tag] 
patchew/20201022074309.3210-1-space.monkey.deliv...@gmail.com -> 
patchew/20201022074309.3210-1-space.monkey.deliv...@gmail.com
Switched to a new branch 'test'
047d80b Allow experimental J-ext to be turned on
2f7a895 Implement address masking functions required for RISC-V Pointer Masking 
extension
68d35c2 Support pointer masking for RISC-V for i/c/f/d/a types of instructions
19b617f Print new PM CSRs in QEMU logs
97bfef1 Support CSRs required for RISC-V PM extension except for ones in 
hypervisor mode
a02813c Add J-extension into RISC-V

=== OUTPUT BEGIN ===
1/6 Checking commit a02813c05dac (Add J-extension into RISC-V)
ERROR: Missing Signed-off-by: line(s)

total: 1 errors, 0 warnings, 21 lines checked

Patch 1/6 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/6 Checking commit 97bfef184299 (Support CSRs required for RISC-V PM extension 
except for ones in hypervisor mode)
3/6 Checking commit 19b617fc2d40 (Print new PM CSRs in QEMU logs)
4/6 Checking commit 68d35c24e0d4 (Support pointer masking for RISC-V for 
i/c/f/d/a types of instructions)
5/6 Checking commit 2f7a895949b8 (Implement address masking functions required 
for RISC-V Pointer Masking extension)
6/6 Checking commit 047d80ba64a4 (Allow experimental J-ext to be turned on)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20201022074309.3210-1-space.monkey.deliv...@gmail.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH v6 2/6] [RISCV_PM] Support CSRs required for RISC-V PM extension except for ones in hypervisor mode

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c  |   3 +
 target/riscv/cpu.h  |  12 ++
 target/riscv/cpu_bits.h |  66 ++
 target/riscv/csr.c  | 271 
 4 files changed, 352 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 4e305249b3..db72f5cf59 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -438,6 +438,9 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 if (cpu->cfg.ext_h) {
 target_misa |= RVH;
 }
+if (cpu->cfg.ext_j) {
+env->mmte |= PM_EXT_INITIAL;
+}
 if (cpu->cfg.ext_v) {
 target_misa |= RVV;
 if (!is_power_of_2(cpu->cfg.vlen)) {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index eca611a367..c236f01fff 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -226,6 +226,18 @@ struct CPURISCVState {
 
 /* True if in debugger mode.  */
 bool debugger;
+
+/*
+ * CSRs for PM
+ * TODO: move these csr to appropriate groups
+ */
+target_ulong mmte;
+target_ulong mpmmask;
+target_ulong mpmbase;
+target_ulong spmmask;
+target_ulong spmbase;
+target_ulong upmmask;
+target_ulong upmbase;
 #endif
 
 float_status fp_status;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index bd36062877..84c93c77ae 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -354,6 +354,21 @@
 #define CSR_MHPMCOUNTER30H  0xb9e
 #define CSR_MHPMCOUNTER31H  0xb9f
 
+/* Custom user register */
+#define CSR_UMTE0x8c0
+#define CSR_UPMMASK 0x8c1
+#define CSR_UPMBASE 0x8c2
+
+/* Custom machine register */
+#define CSR_MMTE0x7c0
+#define CSR_MPMMASK 0x7c1
+#define CSR_MPMBASE 0x7c2
+
+/* Custom supervisor register */
+#define CSR_SMTE0x9c0
+#define CSR_SPMMASK 0x9c1
+#define CSR_SPMBASE 0x9c2
+
 /* Legacy Machine Protection and Translation (priv v1.9.1) */
 #define CSR_MBASE   0x380
 #define CSR_MBOUND  0x381
@@ -604,4 +619,55 @@
 #define MIE_UTIE   (1 << IRQ_U_TIMER)
 #define MIE_SSIE   (1 << IRQ_S_SOFT)
 #define MIE_USIE   (1 << IRQ_U_SOFT)
+
+/* general mte CSR bits*/
+#define PM_ENABLE   0x0001ULL
+#define PM_CURRENT  0x0002ULL
+#define PM_XS_MASK  0x0003ULL
+
+/* PM XS bits values */
+#define PM_EXT_DISABLE  0xULL
+#define PM_EXT_INITIAL  0x0001ULL
+#define PM_EXT_CLEAN0x0002ULL
+#define PM_EXT_DIRTY0x0003ULL
+
+/* offsets for every pair of control bits per each priv level */
+#define XS_OFFSET0ULL
+#define U_OFFSET 2ULL
+#define S_OFFSET 4ULL
+#define M_OFFSET 6ULL
+
+#define PM_XS_BITS   (PM_XS_MASK << XS_OFFSET)
+#define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
+#define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
+#define S_PM_ENABLE  (PM_ENABLE  << S_OFFSET)
+#define S_PM_CURRENT (PM_CURRENT << S_OFFSET)
+#define M_PM_ENABLE  (PM_ENABLE  << M_OFFSET)
+
+/* mmte CSR bits */
+#define MMTE_PM_XS_BITS PM_XS_BITS
+#define MMTE_U_PM_ENABLEU_PM_ENABLE
+#define MMTE_U_PM_CURRENT   U_PM_CURRENT
+#define MMTE_S_PM_ENABLES_PM_ENABLE
+#define MMTE_S_PM_CURRENT   S_PM_CURRENT
+#define MMTE_M_PM_ENABLEM_PM_ENABLE
+#define MMTE_MASK   (MMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | \
+ MMTE_S_PM_ENABLE | MMTE_S_PM_CURRENT | \
+ MMTE_M_PM_ENABLE | MMTE_PM_XS_BITS)
+
+/* smte CSR bits */
+#define SMTE_PM_XS_BITS PM_XS_BITS
+#define SMTE_U_PM_ENABLEU_PM_ENABLE
+#define SMTE_U_PM_CURRENT   U_PM_CURRENT
+#define SMTE_S_PM_ENABLES_PM_ENABLE
+#define SMTE_S_PM_CURRENT   S_PM_CURRENT
+#define SMTE_MASK   (SMTE_U_PM_ENABLE | SMTE_U_PM_CURRENT | \
+ SMTE_S_PM_ENABLE | SMTE_S_PM_CURRENT | \
+ SMTE_PM_XS_BITS)
+
+/* umte CSR bits */
+#define UMTE_U_PM_ENABLEU_PM_ENABLE
+#define UMTE_U_PM_CURRENT   U_PM_CURRENT
+#define UMTE_MASK   (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT)
+
 #endif
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index aaef6c6f20..e4839c8fc9 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -140,6 +140,11 @@ static int any(CPURISCVState *env, int csrno)
 return 0;
 }
 
+static int umode(CPURISCVState *env, int csrno)
+{
+return -!riscv_has_ext(env, RVU);
+}
+
 static int smode(CPURISCVState *env, int csrno)
 {
 return -!riscv_has_ext(env, RVS);
@@ -1250,6 +1255,257 @@ static int write_pmpaddr(CPURISCVState *env, int csrno, 
target_ulong val)
 return 0;
 }
 
+/*
+ * Functions to access Pointer Masking feature registers
+ * We have to check if current priv lvl could modify
+ * csr in given mode
+ */
+static int check_pm_current_disabled(CPURISCVState *env, int csrno)
+{
+int csr_priv = get_field(csrno, 0xC00);
+/*
+ * If priv l

[PATCH v6 1/6] [RISCV_PM] Add J-extension into RISC-V

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c | 1 +
 target/riscv/cpu.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0bbfd7f457..4e305249b3 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -516,6 +516,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 /* This is experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
+DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
 DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index de275782e6..eca611a367 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -66,6 +66,7 @@
 #define RVS RV('S')
 #define RVU RV('U')
 #define RVH RV('H')
+#define RVJ RV('J')
 
 /* S extension denotes that Supervisor mode exists, however it is possible
to have a core that support S mode but does not have an MMU and there
@@ -277,6 +278,7 @@ struct RISCVCPU {
 bool ext_s;
 bool ext_u;
 bool ext_h;
+bool ext_j;
 bool ext_v;
 bool ext_counters;
 bool ext_ifencei;
-- 
2.20.1




[PATCH v6 0/6] RISC-V Pointer Masking implementation

2020-10-22 Thread Alexey Baturo
Hi,

Added missing sign-off on the first patch.

Thanks

Alexey Baturo (5):
  [RISCV_PM] Add J-extension into RISC-V
  [RISCV_PM] Support CSRs required for RISC-V PM extension except for
ones in hypervisor mode
  [RISCV_PM] Print new PM CSRs in QEMU logs
  [RISCV_PM] Support pointer masking for RISC-V for i/c/f/d/a types of
instructions
  [RISCV_PM] Allow experimental J-ext to be turned on

Anatoly Parshintsev (1):
  [RISCV_PM] Implement address masking functions required for RISC-V
Pointer Masking extension

 target/riscv/cpu.c  |  30 +++
 target/riscv/cpu.h  |  33 +++
 target/riscv/cpu_bits.h |  66 ++
 target/riscv/csr.c  | 271 
 target/riscv/insn_trans/trans_rva.c.inc |   3 +
 target/riscv/insn_trans/trans_rvd.c.inc |   2 +
 target/riscv/insn_trans/trans_rvf.c.inc |   2 +
 target/riscv/insn_trans/trans_rvi.c.inc |   2 +
 target/riscv/translate.c|  44 
 9 files changed, 453 insertions(+)

-- 
2.20.1




[PATCH v6 3/6] [RISCV_PM] Print new PM CSRs in QEMU logs

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index db72f5cf59..1c00d9ea26 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -255,6 +255,31 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "htval ", env->htval);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mtval2 ", env->mtval2);
 }
+if (riscv_has_ext(env, RVJ)) {
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mmte", env->mmte);
+switch (env->priv) {
+case PRV_U:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "upmbase ",
+ env->upmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "upmmask ",
+ env->upmmask);
+break;
+case PRV_S:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "spmbase ",
+ env->spmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "spmmask ",
+ env->spmmask);
+break;
+case PRV_M:
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mpmbase ",
+ env->mpmbase);
+qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mpmmask ",
+ env->mpmmask);
+break;
+default:
+assert(0 && "Unreachable");
+}
+}
 #endif
 
 for (i = 0; i < 32; i++) {
-- 
2.20.1




[PATCH] i386/cpu: Expose the PTWRITE to the guest

2020-10-22 Thread Luwei Kang
PTWRITE provides a mechanism by which software can instrument the
Intel PT trace. The current implementation will mask off this
feature when the PTWRITE is supported on the host because of the
Intel PT CPUID is a constant value(ICX CPUID) in qemu. This patch
will expose the PTWRITE feature to the guest.

Signed-off-by: Luwei Kang 
---
 target/i386/cpu.c | 24 
 target/i386/cpu.h |  4 
 2 files changed, 28 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index aeabdd5bd4..242ba8a870 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -672,6 +672,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t 
vendor1,
 #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
   /* missing:
   CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */
+#define TCG_14_0_EBX_FEATURES 0
 #define TCG_14_0_ECX_FEATURES 0
 
 typedef enum FeatureWordType {
@@ -1302,6 +1303,26 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] 
= {
 }
 },
 
+[FEAT_14_0_EBX] = {
+.type = CPUID_FEATURE_WORD,
+.feat_names = {
+NULL, NULL, NULL, NULL,
+"ptwrite", NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+},
+.cpuid = {
+.eax = 0x14,
+.needs_ecx = true, .ecx = 0,
+.reg = R_EBX,
+},
+.tcg_features = TCG_14_0_EBX_FEATURES,
+},
+
 [FEAT_14_0_ECX] = {
 .type = CPUID_FEATURE_WORD,
 .feat_names = {
@@ -5764,6 +5785,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *eax = INTEL_PT_MAX_SUBLEAF;
 *ebx = INTEL_PT_MINIMAL_EBX;
 *ecx = INTEL_PT_MINIMAL_ECX;
+if (env->features[FEAT_14_0_EBX] & CPUID_14_0_EBX_PTWRITE) {
+*ebx |= CPUID_14_0_EBX_PTWRITE;
+}
 if (env->features[FEAT_14_0_ECX] & CPUID_14_0_ECX_LIP) {
 *ecx |= CPUID_14_0_ECX_LIP;
 }
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1fcd93e39a..9fffe6eb6f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -541,6 +541,7 @@ typedef enum FeatureWord {
 FEAT_VMX_EPT_VPID_CAPS,
 FEAT_VMX_BASIC,
 FEAT_VMX_VMFUNC,
+FEAT_14_0_EBX,
 FEAT_14_0_ECX,
 FEATURE_WORDS,
 } FeatureWord;
@@ -798,6 +799,9 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 /* AVX512 BFloat16 Instruction */
 #define CPUID_7_1_EAX_AVX512_BF16   (1U << 5)
 
+/* Intel PT support PTWRITE */
+#define CPUID_14_0_EBX_PTWRITE  (1U << 4)
+
 /* Packets which contain IP payload have LIP values */
 #define CPUID_14_0_ECX_LIP  (1U << 31)
 
-- 
2.18.4




[PATCH v6 4/6] [RISCV_PM] Support pointer masking for RISC-V for i/c/f/d/a types of instructions

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/insn_trans/trans_rva.c.inc |  3 +++
 target/riscv/insn_trans/trans_rvd.c.inc |  2 ++
 target/riscv/insn_trans/trans_rvf.c.inc |  2 ++
 target/riscv/insn_trans/trans_rvi.c.inc |  2 ++
 target/riscv/translate.c| 14 ++
 5 files changed, 23 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rva.c.inc 
b/target/riscv/insn_trans/trans_rva.c.inc
index be8a9f06dd..5559e347ba 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -26,6 +26,7 @@ static inline bool gen_lr(DisasContext *ctx, arg_atomic *a, 
MemOp mop)
 if (a->rl) {
 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
 }
+gen_pm_adjust_address(ctx, src1, src1);
 tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
 if (a->aq) {
 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
@@ -46,6 +47,7 @@ static inline bool gen_sc(DisasContext *ctx, arg_atomic *a, 
MemOp mop)
 TCGLabel *l2 = gen_new_label();
 
 gen_get_gpr(src1, a->rs1);
+gen_pm_adjust_address(ctx, src1, src1);
 tcg_gen_brcond_tl(TCG_COND_NE, load_res, src1, l1);
 
 gen_get_gpr(src2, a->rs2);
@@ -91,6 +93,7 @@ static bool gen_amo(DisasContext *ctx, arg_atomic *a,
 gen_get_gpr(src1, a->rs1);
 gen_get_gpr(src2, a->rs2);
 
+gen_pm_adjust_address(ctx, src1, src1);
 (*func)(src2, src1, src2, ctx->mem_idx, mop);
 
 gen_set_gpr(a->rd, src2);
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc 
b/target/riscv/insn_trans/trans_rvd.c.inc
index 4f832637fa..935342f66d 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -25,6 +25,7 @@ static bool trans_fld(DisasContext *ctx, arg_fld *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEQ);
 
@@ -40,6 +41,7 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEQ);
 
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc 
b/target/riscv/insn_trans/trans_rvf.c.inc
index 3dfec8211d..04b3c3eb3d 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -30,6 +30,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a)
 TCGv t0 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL);
 gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
@@ -47,6 +48,7 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
 gen_get_gpr(t0, a->rs1);
 
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUL);
 
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index d04ca0394c..bee7f6be46 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -141,6 +141,7 @@ static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp 
memop)
 TCGv t1 = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, memop);
 gen_set_gpr(a->rd, t1);
@@ -180,6 +181,7 @@ static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp 
memop)
 TCGv dat = tcg_temp_new();
 gen_get_gpr(t0, a->rs1);
 tcg_gen_addi_tl(t0, t0, a->imm);
+gen_pm_adjust_address(ctx, t0, t0);
 gen_get_gpr(dat, a->rs2);
 
 tcg_gen_qemu_st_tl(dat, t0, ctx->mem_idx, memop);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 79dca2291b..a7cbf909f3 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,16 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
 tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
 }
 
+/*
+ * Temp stub: generates address adjustment for PointerMasking
+ */
+static void gen_pm_adjust_address(DisasContext *s,
+  TCGv_i64  dst,
+  TCGv_i64  src)
+{
+tcg_gen_mov_i64(dst, src);
+}
+
 /*
  * A narrow n-bit operation, where n < FLEN, checks that input operands
  * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
@@ -380,6 +390,7 @@ static void gen_load_c(DisasContext *ctx, uint32_t opc, int 
rd, int rs1,
 TCGv t1 = tcg_temp_new();
 gen_get_gpr(t0, rs1);
 tcg_gen_addi_tl(t0, t0, imm);
+gen_pm_adjust_address(ctx, t0, t0);
 int memop = tcg_memop_lookup[(opc >> 12) & 0x7];
 
 if (memop < 0) {
@@ -400,6 +411,7 @@ static void gen_store_c(DisasContext *ctx, uint32_t opc, 

[PATCH v6 5/6] [RISCV_PM] Implement address masking functions required for RISC-V Pointer Masking extension

2020-10-22 Thread Alexey Baturo
From: Anatoly Parshintsev 

Signed-off-by: Anatoly Parshintsev 
---
 target/riscv/cpu.h   | 19 +++
 target/riscv/translate.c | 34 --
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c236f01fff..13accaa232 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -386,6 +386,7 @@ FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1)
 FIELD(TB_FLAGS, LMUL, 3, 2)
 FIELD(TB_FLAGS, SEW, 5, 3)
 FIELD(TB_FLAGS, VILL, 8, 1)
+FIELD(TB_FLAGS, PM_ENABLED, 9, 1)
 
 /*
  * A simplification for VLMAX
@@ -432,6 +433,24 @@ static inline void cpu_get_tb_cpu_state(CPURISCVState 
*env, target_ulong *pc,
 if (riscv_cpu_fp_enabled(env)) {
 flags |= env->mstatus & MSTATUS_FS;
 }
+if (riscv_has_ext(env, RVJ)) {
+int priv = cpu_mmu_index(env, false);
+bool pm_enabled = false;
+switch (priv) {
+case PRV_U:
+pm_enabled = env->mmte & U_PM_ENABLE;
+break;
+case PRV_S:
+pm_enabled = env->mmte & S_PM_ENABLE;
+break;
+case PRV_M:
+pm_enabled = env->mmte & M_PM_ENABLE;
+break;
+default:
+g_assert_not_reached();
+}
+flags = FIELD_DP32(flags, TB_FLAGS, PM_ENABLED, pm_enabled);
+}
 #endif
 *pflags = flags;
 }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a7cbf909f3..b3e7b93bc9 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -36,6 +36,9 @@ static TCGv cpu_gpr[32], cpu_pc, cpu_vl;
 static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
 static TCGv load_res;
 static TCGv load_val;
+/* globals for PM CSRs */
+static TCGv pm_mask[4];
+static TCGv pm_base[4];
 
 #include "exec/gen-icount.h"
 
@@ -63,6 +66,10 @@ typedef struct DisasContext {
 uint16_t vlen;
 uint16_t mlen;
 bool vl_eq_vlmax;
+/* PointerMasking extension */
+bool pm_enabled;
+TCGv pm_mask;
+TCGv pm_base;
 } DisasContext;
 
 #ifdef TARGET_RISCV64
@@ -102,13 +109,19 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
 }
 
 /*
- * Temp stub: generates address adjustment for PointerMasking
+ * Generates address adjustment for PointerMasking
  */
 static void gen_pm_adjust_address(DisasContext *s,
   TCGv_i64  dst,
   TCGv_i64  src)
 {
-tcg_gen_mov_i64(dst, src);
+if (!s->pm_enabled) {
+/* Load unmodified address */
+tcg_gen_mov_i64(dst, src);
+} else {
+tcg_gen_andc_i64(dst, src, s->pm_mask);
+tcg_gen_or_i64(dst, dst, s->pm_base);
+}
 }
 
 /*
@@ -826,6 +839,10 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
 ctx->mlen = 1 << (ctx->sew  + 3 - ctx->lmul);
 ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
+ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED);
+int priv = cpu_mmu_index(env, false);
+ctx->pm_mask = pm_mask[priv];
+ctx->pm_base = pm_base[priv];
 }
 
 static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -945,4 +962,17 @@ void riscv_translate_init(void)
  "load_res");
 load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
  "load_val");
+/* Assign PM CSRs to tcg globals */
+pm_mask[PRV_U] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmmask), "upmmask");
+pm_base[PRV_U] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmbase), "upmbase");
+pm_mask[PRV_S] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmmask), "spmmask");
+pm_base[PRV_S] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmbase), "spmbase");
+pm_mask[PRV_M] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmmask), "mpmmask");
+pm_base[PRV_M] =
+  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmbase), "mpmbase");
 }
-- 
2.20.1




[PATCH v6 6/6] [RISCV_PM] Allow experimental J-ext to be turned on

2020-10-22 Thread Alexey Baturo
Signed-off-by: Alexey Baturo 
---
 target/riscv/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1c00d9ea26..56633c14eb 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -465,6 +465,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 if (cpu->cfg.ext_j) {
 env->mmte |= PM_EXT_INITIAL;
+target_misa |= RVJ;
 }
 if (cpu->cfg.ext_v) {
 target_misa |= RVV;
-- 
2.20.1




Re: [PATCH v6 01/16] memory: Add FlatView foreach function

2020-10-22 Thread Darren Kenny
Hi Alex,

On Wednesday, 2020-10-21 at 17:09:07 -04, Alexander Bulekov wrote:
> Acked-by: Paolo Bonzini 
> Signed-off-by: Alexander Bulekov 
> ---
>  include/exec/memory.h | 5 +
>  softmmu/memory.c  | 9 +
>  2 files changed, 14 insertions(+)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 622207bde1..042918dd16 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -719,6 +719,11 @@ static inline FlatView 
> *address_space_to_flatview(AddressSpace *as)
>  return qatomic_rcu_read(&as->current_map);
>  }
>  
> +typedef int (*flatview_cb)(Int128 start,
> +   Int128 len,
> +   const MemoryRegion*, void*);
> +
> +void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque);
>  
>  /**
>   * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 403ff3abc9..c46b0c6d65 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -656,6 +656,15 @@ static void render_memory_region(FlatView *view,
>  }
>  }
>  
> +void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque)
> +{
> +FlatRange *fr;

Just to be complete, you probably should check that both fv and cb are
not NULL here - if not using an explicit if/return, at least with
assertions just in case a developer does something silly...

> +FOR_EACH_FLAT_RANGE(fr, fv) {
> +if (cb(fr->addr.start, fr->addr.size, fr->mr, opaque))
> +break;
> +}
> +}
> +
>  static MemoryRegion *memory_region_get_flatview_root(MemoryRegion *mr)
>  {
>  while (mr->enabled) {

Otherwise, all looks good, so:

Reviewed-by: Darren Kenny 

Thanks,

Darren.



Re: [PATCH v3 0/6] virtio-mem: block size and address-assignment optimizations

2020-10-22 Thread David Hildenbrand
On 08.10.20 10:30, David Hildenbrand wrote:
> 
> 
> Let's try to detect the actual THP size and use it as default block size
> (unless the page size of the backend indicates that THP don't apply).
> Always allow to set a block size of 1 MiB, but warn if the configured block
> size is smaller than the default. Handle large block sizes better, avoiding
> a virtio-spec violation and optimizing address auto-detection.
> 
> For existing setups (x86-64), the default block size won't change (was, and
> will be 2 MiB on anonymous memory). For existing x86-64 setups, the address
> auto-detection won't change in relevant setups (esp., anonymous memory
> and hugetlbfs with 2 MiB pages and no manual configuration of the block
> size). I don't see the need for compatibility handling (especially, as
> virtio-mem is still not considered production-ready).
> 
> Most of this is a preparation for future architectures, using hugetlbfs
> to full extend, and using manually configured, larger block sizes
> (relevant for vfio in the future).

Ping.

-- 
Thanks,

David / dhildenb




[PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread Janosch Frank
Diag318 fencing needs to be determined on the current VM PV state and
not on the state that the VM has when we create the CPU model.

Signed-off-by: Janosch Frank 
Reported-by: Marc Hartmayer 
Reviewed-by: Christian Borntraeger 
Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
---

If you're sure that this is what you want, then I'll send a v2 of the
patch set.

---
 target/s390x/cpu_features.c | 5 +
 target/s390x/cpu_features.h | 4 
 target/s390x/cpu_models.c   | 4 
 target/s390x/kvm.c  | 3 +--
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 31ea8df246..42fe0bf4ca 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qemu/module.h"
 #include "cpu_features.h"
+#include "hw/s390x/pv.h"
 
 #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
 [S390_FEAT_##_FEAT] = {\
@@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
S390FeatType type,
 }
 feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
 }
+
+if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
+clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
+}
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index ef52ffce83..87463f064d 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
group);
 
 #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
 
+static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
+{
+array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
+}
 static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
 {
 array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index ca484bfda7..461e0b8f4a 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -29,6 +29,7 @@
 #include "hw/pci/pci.h"
 #endif
 #include "qapi/qapi-commands-machine-target.h"
+#include "hw/s390x/pv.h"
 
 #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
 {\
@@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
 }
 return 0;
 }
+if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
+return false;
+}
 return test_bit(feat, cpu->model->features);
 }
 
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index f13eff688c..baa070fdf7 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
  */
 set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
 
-/* DIAGNOSE 0x318 is not supported under protected virtualization */
-if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) 
{
+if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
 set_bit(S390_FEAT_DIAG_318, model->features);
 }
 
-- 
2.25.1




[PATCH 2/8] hw/misc/pvpanic: Cosmetic renaming

2020-10-22 Thread Mihai Carabas
From: Philippe Mathieu-Daudé 

To ease the MMIO device addition in the next patch, rename:
- ISA_PVPANIC_DEVICE -> PVPANIC_ISA_DEVICE.
- PVPanicState -> PVPanicISAState.
- TYPE_PVPANIC -> TYPE_PVPANIC_ISA.
- MemoryRegion io -> mr.

Reviewed-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Peng Hao 
Signed-off-by: Mihai Carabas 
---
 hw/misc/pvpanic.c | 28 
 include/hw/misc/pvpanic.h |  2 +-
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 598d547..2e085f4 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -30,9 +30,9 @@
 #define PVPANIC_PANICKED(1 << PVPANIC_F_PANICKED)
 #define PVPANIC_CRASHLOADED (1 << PVPANIC_F_CRASHLOADED)
 
-typedef struct PVPanicState PVPanicState;
-DECLARE_INSTANCE_CHECKER(PVPanicState, ISA_PVPANIC_DEVICE,
- TYPE_PVPANIC)
+typedef struct PVPanicISAState PVPanicISAState;
+DECLARE_INSTANCE_CHECKER(PVPanicISAState, PVPANIC_ISA_DEVICE,
+ TYPE_PVPANIC_ISA)
 
 static void handle_event(int event)
 {
@@ -56,10 +56,14 @@ static void handle_event(int event)
 
 #include "hw/isa/isa.h"
 
-struct PVPanicState {
+/*
+ * PVPanicISAState for ISA device and
+ * use ioport.
+ */
+struct PVPanicISAState {
 ISADevice parent_obj;
 
-MemoryRegion io;
+MemoryRegion mr;
 uint16_t ioport;
 };
 
@@ -86,15 +90,15 @@ static const MemoryRegionOps pvpanic_ops = {
 
 static void pvpanic_isa_initfn(Object *obj)
 {
-PVPanicState *s = ISA_PVPANIC_DEVICE(obj);
+PVPanicISAState *s = PVPANIC_ISA_DEVICE(obj);
 
-memory_region_init_io(&s->io, OBJECT(s), &pvpanic_ops, s, "pvpanic", 1);
+memory_region_init_io(&s->mr, OBJECT(s), &pvpanic_ops, s, "pvpanic", 1);
 }
 
 static void pvpanic_isa_realizefn(DeviceState *dev, Error **errp)
 {
 ISADevice *d = ISA_DEVICE(dev);
-PVPanicState *s = ISA_PVPANIC_DEVICE(dev);
+PVPanicISAState *s = PVPANIC_ISA_DEVICE(dev);
 FWCfgState *fw_cfg = fw_cfg_find();
 uint16_t *pvpanic_port;
 
@@ -107,11 +111,11 @@ static void pvpanic_isa_realizefn(DeviceState *dev, Error 
**errp)
 fw_cfg_add_file(fw_cfg, "etc/pvpanic-port", pvpanic_port,
 sizeof(*pvpanic_port));
 
-isa_register_ioport(d, &s->io, s->ioport);
+isa_register_ioport(d, &s->mr, s->ioport);
 }
 
 static Property pvpanic_isa_properties[] = {
-DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicState, ioport, 0x505),
+DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicISAState, ioport, 0x505),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -125,9 +129,9 @@ static void pvpanic_isa_class_init(ObjectClass *klass, void 
*data)
 }
 
 static TypeInfo pvpanic_isa_info = {
-.name  = TYPE_PVPANIC,
+.name  = TYPE_PVPANIC_ISA,
 .parent= TYPE_ISA_DEVICE,
-.instance_size = sizeof(PVPanicState),
+.instance_size = sizeof(PVPanicISAState),
 .instance_init = pvpanic_isa_initfn,
 .class_init= pvpanic_isa_class_init,
 };
diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index ae0c818..30e9f8f 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -17,7 +17,7 @@
 
 #include "qom/object.h"
 
-#define TYPE_PVPANIC "pvpanic"
+#define TYPE_PVPANIC_ISA "pvpanic"
 
 #define PVPANIC_IOPORT_PROP "ioport"
 
-- 
1.8.3.1




[PATCH 1/8] hw/misc/pvpanic: Build the pvpanic device for any machine

2020-10-22 Thread Mihai Carabas
From: Philippe Mathieu-Daudé 

The 'pvpanic' ISA device can be use by any machine with an ISA bus.

Reviewed-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Peng Hao 
Signed-off-by: Mihai Carabas 
---
 hw/misc/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 793d45b..cb250dd 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -11,6 +11,7 @@ softmmu_ss.add(when: 'CONFIG_TMP105', if_true: 
files('tmp105.c'))
 softmmu_ss.add(when: 'CONFIG_TMP421', if_true: files('tmp421.c'))
 softmmu_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c'))
 softmmu_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c'))
+softmmu_ss.add(when: 'CONFIG_PVPANIC', if_true: files('pvpanic.c'))
 
 # ARM devices
 softmmu_ss.add(when: 'CONFIG_PL310', if_true: files('arm_l2x0.c'))
@@ -90,7 +91,6 @@ softmmu_ss.add(when: 'CONFIG_IOTKIT_SYSINFO', if_true: 
files('iotkit-sysinfo.c')
 softmmu_ss.add(when: 'CONFIG_ARMSSE_CPUID', if_true: files('armsse-cpuid.c'))
 softmmu_ss.add(when: 'CONFIG_ARMSSE_MHU', if_true: files('armsse-mhu.c'))
 
-softmmu_ss.add(when: 'CONFIG_PVPANIC', if_true: files('pvpanic.c'))
 softmmu_ss.add(when: 'CONFIG_AUX', if_true: files('auxbus.c'))
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_scu.c', 
'aspeed_sdmc.c', 'aspeed_xdma.c'))
 softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-sysreg.c'))
-- 
1.8.3.1




Re: [PATCH v2 1/8] tests/9pfs: simplify do_mkdir()

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:06:53 +0200
Christian Schoenebeck  wrote:

> Split out walking a directory path to a separate new utility function
> do_walk() and use that function in do_mkdir().
> 
> The code difference saved this way is not much, but we'll use that new
> do_walk() function in the upcoming patches, so it will avoid quite
> some code duplication after all.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 27 +++
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 2ea555fa04..21807037df 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -583,6 +583,23 @@ static void do_version(QVirtio9P *v9p)
>  g_free(server_version);
>  }
>  
> +/* utility function: walk to requested dir and return fid for that dir */
> +static uint32_t do_walk(QVirtio9P *v9p, const char *path)
> +{
> +char **wnames;
> +P9Req *req;
> +const uint32_t fid = genfid();
> +
> +int nwnames = split(path, "/", &wnames);
> +
> +req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0);
> +v9fs_req_wait_for_reply(req, NULL);
> +v9fs_rwalk(req, NULL, NULL);
> +
> +split_free(&wnames);
> +return fid;
> +}
> +
>  static void fs_version(void *obj, void *data, QGuestAllocator *t_alloc)
>  {
>  alloc = t_alloc;
> @@ -974,23 +991,17 @@ static void fs_flush_ignored(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  
>  static void do_mkdir(QVirtio9P *v9p, const char *path, const char *cname)
>  {
> -char **wnames;
>  char *const name = g_strdup(cname);
> +uint32_t fid;
>  P9Req *req;
> -const uint32_t fid = genfid();
>  
> -int nwnames = split(path, "/", &wnames);
> -
> -req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0);
> -v9fs_req_wait_for_reply(req, NULL);
> -v9fs_rwalk(req, NULL, NULL);
> +fid = do_walk(v9p, path);
>  
>  req = v9fs_tmkdir(v9p, fid, name, 0750, 0, 0);
>  v9fs_req_wait_for_reply(req, NULL);
>  v9fs_rmkdir(req, NULL);
>  
>  g_free(name);
> -split_free(&wnames);
>  }
>  
>  static void fs_readdir_split_128(void *obj, void *data,




[PATCH 0/8] Add support for pvpanic mmio device

2020-10-22 Thread Mihai Carabas
The patchset was assembled from chuncks from some old patches from 2018 [1]
which were left unmerged and some additions from me. Surprisingly their Linux
kernel counterpart were merged (so the pvpanic driver from the kernel supports
mmio).

I have seen the discussions about moving the pvpanic to PCI [1]. Those patches
were sent but nothing happened. Also they are not trivial and require major
modifications at the driver level also. Given the fact that we already have
mmio driver support for pvpanic in the Linux kernel, I have sent these patches
to ask again the maintainers if this can be merged.

How to test this:
/usr/bin/qemu-system-aarch64 \
-machine virt,gic-version=3,pvpanic=on

There is a new property for machine virt called pvpanic which is by default
turned off.

[1] 
http://patchwork.ozlabs.org/project/qemu-devel/cover/1543865209-50994-1-git-send-email-peng.h...@zte.com.cn/

Mihai Carabas (1):
  pvpanic: break dependency on ISA_BUS

Peng Hao (5):
  hw/misc/pvpanic: Add the MMIO interface
  hw/arm/virt: Use the pvpanic device
  hw/arm/virt: add pvpanic device in virt acpi table
  hw/arm/virt: add configure interface for pvpanic-mmio
  pvpanic : update pvpanic document

Philippe Mathieu-Daudé (2):
  hw/misc/pvpanic: Build the pvpanic device for any machine
  hw/misc/pvpanic: Cosmetic renaming

 docs/specs/pvpanic.txt| 12 ++-
 hw/arm/Kconfig|  1 +
 hw/arm/virt-acpi-build.c  | 17 ++
 hw/arm/virt.c | 47 +++
 hw/misc/Kconfig   |  2 +-
 hw/misc/meson.build   |  2 +-
 hw/misc/pvpanic.c | 83 ++-
 include/hw/arm/virt.h |  2 ++
 include/hw/misc/pvpanic.h |  5 +--
 9 files changed, 150 insertions(+), 21 deletions(-)

-- 
1.8.3.1




[PATCH 8/8] pvpanic: break dependency on ISA_BUS

2020-10-22 Thread Mihai Carabas
pvpanic is supported on ARM VIRT MACHINE as an MMIO device, no need for an ISA
bus.

Signed-off-by: Mihai Carabas 
---
 hw/arm/Kconfig| 1 +
 hw/misc/Kconfig   | 2 +-
 hw/misc/pvpanic.c | 4 
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index f303c6b..0dd570b 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -26,6 +26,7 @@ config ARM_VIRT
 select ACPI_MEMORY_HOTPLUG
 select ACPI_HW_REDUCED
 select ACPI_NVDIMM
+select PVPANIC
 
 config CHEETAH
 bool
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 3185456..5924e70 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -117,7 +117,7 @@ config IOTKIT_SYSINFO
 
 config PVPANIC
 bool
-depends on ISA_BUS
+depends on ISA_BUS || ARM_VIRT
 
 config AUX
 bool
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 136f1d6..a70adb8 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -105,6 +105,7 @@ static const MemoryRegionOps pvpanic_ops = {
 },
 };
 
+#ifdef CONFIG_ISA_BUS
 static void pvpanic_isa_initfn(Object *obj)
 {
 PVPanicISAState *s = PVPANIC_ISA_DEVICE(obj);
@@ -152,6 +153,7 @@ static TypeInfo pvpanic_isa_info = {
 .instance_init = pvpanic_isa_initfn,
 .class_init= pvpanic_isa_class_init,
 };
+#endif
 
 static void pvpanic_mmio_initfn(Object *obj)
 {
@@ -180,7 +182,9 @@ static TypeInfo pvpanic_mmio_info = {
 
 static void pvpanic_register_types(void)
 {
+#ifdef CONFIG_ISA_BUS
 type_register_static(&pvpanic_isa_info);
+#endif
 type_register_static(&pvpanic_mmio_info);
 }
 
-- 
1.8.3.1




[PATCH 3/8] hw/misc/pvpanic: Add the MMIO interface

2020-10-22 Thread Mihai Carabas
From: Peng Hao 

Add new pvpanic type: "TYPE_PVPANIC_MMIO".

Reviewed-by: Peter Maydell 
Signed-off-by: Peng Hao 
Signed-off-by: Mihai Carabas 
---
 hw/misc/pvpanic.c | 51 +++
 include/hw/misc/pvpanic.h |  3 ++-
 2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 2e085f4..136f1d6 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -2,10 +2,12 @@
  * QEMU simulated pvpanic device.
  *
  * Copyright Fujitsu, Corp. 2013
+ * Copyright (c) 2018 ZTE Ltd.
  *
  * Authors:
  * Wen Congyang 
  * Hu Tao 
+ * Peng Hao 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -34,6 +36,10 @@ typedef struct PVPanicISAState PVPanicISAState;
 DECLARE_INSTANCE_CHECKER(PVPanicISAState, PVPANIC_ISA_DEVICE,
  TYPE_PVPANIC_ISA)
 
+typedef struct PVPanicMMIOState PVPanicMMIOState;
+DECLARE_INSTANCE_CHECKER(PVPanicMMIOState, PVPANIC_MMIO_DEVICE,
+ TYPE_PVPANIC_MMIO)
+
 static void handle_event(int event)
 {
 static bool logged;
@@ -67,21 +73,32 @@ struct PVPanicISAState {
 uint16_t ioport;
 };
 
+/* PVPanicMMIOState for sysbus device and
+ * use mmio.
+ */
+typedef struct PVPanicMMIOState {
+SysBusDevice parent_obj;
+/**/
+
+/* public */
+MemoryRegion mr;
+} PVPanicMMIOState;
+
 /* return supported events on read */
-static uint64_t pvpanic_ioport_read(void *opaque, hwaddr addr, unsigned size)
+static uint64_t pvpanic_read(void *opaque, hwaddr addr, unsigned size)
 {
 return PVPANIC_PANICKED;
 }
 
-static void pvpanic_ioport_write(void *opaque, hwaddr addr, uint64_t val,
+static void pvpanic_write(void *opaque, hwaddr addr, uint64_t val,
  unsigned size)
 {
 handle_event(val);
 }
 
 static const MemoryRegionOps pvpanic_ops = {
-.read = pvpanic_ioport_read,
-.write = pvpanic_ioport_write,
+.read = pvpanic_read,
+.write = pvpanic_write,
 .impl = {
 .min_access_size = 1,
 .max_access_size = 1,
@@ -136,9 +153,35 @@ static TypeInfo pvpanic_isa_info = {
 .class_init= pvpanic_isa_class_init,
 };
 
+static void pvpanic_mmio_initfn(Object *obj)
+{
+PVPanicMMIOState *s = PVPANIC_MMIO_DEVICE(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+memory_region_init_io(&s->mr, OBJECT(s), &pvpanic_ops, s,
+  TYPE_PVPANIC_MMIO, 2);
+sysbus_init_mmio(sbd, &s->mr);
+}
+
+static void pvpanic_mmio_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static TypeInfo pvpanic_mmio_info = {
+.name  = TYPE_PVPANIC_MMIO,
+.parent= TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(PVPanicMMIOState),
+.instance_init = pvpanic_mmio_initfn,
+.class_init= pvpanic_mmio_class_init,
+};
+
 static void pvpanic_register_types(void)
 {
 type_register_static(&pvpanic_isa_info);
+type_register_static(&pvpanic_mmio_info);
 }
 
 type_init(pvpanic_register_types)
diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index 30e9f8f..ee645dc 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -18,12 +18,13 @@
 #include "qom/object.h"
 
 #define TYPE_PVPANIC_ISA "pvpanic"
+#define TYPE_PVPANIC_MMIO "pvpanic-mmio"
 
 #define PVPANIC_IOPORT_PROP "ioport"
 
 static inline uint16_t pvpanic_port(void)
 {
-Object *o = object_resolve_path_type("", TYPE_PVPANIC, NULL);
+Object *o = object_resolve_path_type("", TYPE_PVPANIC_ISA, NULL);
 if (!o) {
 return 0;
 }
-- 
1.8.3.1




[PATCH 4/8] hw/arm/virt: Use the pvpanic device

2020-10-22 Thread Mihai Carabas
From: Peng Hao 

Add pvpanic device in arm virt machine.

Signed-off-by: Peng Hao 
Reviewed-by: Andrew Jones 
Signed-off-by: Mihai Carabas 
---
 hw/arm/virt.c | 25 +
 include/hw/arm/virt.h |  2 ++
 2 files changed, 27 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e465a98..63f09a6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -78,6 +78,7 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
 #include "qemu/guest-random.h"
+#include "hw/misc/pvpanic.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -152,6 +153,7 @@ static const MemMapEntry base_memmap[] = {
 [VIRT_ACPI_GED] =   { 0x0908, ACPI_GED_EVT_SEL_LEN },
 [VIRT_NVDIMM_ACPI] ={ 0x0909, NVDIMM_ACPI_IO_LEN},
 [VIRT_PVTIME] = { 0x090a, 0x0001 },
+[VIRT_PVPANIC] ={ 0x090b, 0x0002 },
 [VIRT_MMIO] =   { 0x0a00, 0x0200 },
 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
 [VIRT_PLATFORM_BUS] =   { 0x0c00, 0x0200 },
@@ -226,6 +228,27 @@ static void create_kaslr_seed(VirtMachineState *vms, const 
char *node)
 qemu_fdt_setprop_u64(vms->fdt, node, "kaslr-seed", seed);
 }
 
+static void create_pvpanic_device(const VirtMachineState *vms)
+{
+char *nodename;
+hwaddr base = vms->memmap[VIRT_PVPANIC].base;
+hwaddr size = vms->memmap[VIRT_PVPANIC].size;
+
+if (!vms->pvpanic) {
+return;
+}
+sysbus_create_simple(TYPE_PVPANIC_MMIO, base, NULL);
+
+nodename = g_strdup_printf("/pvpanic-mmio@%" PRIx64, base);
+qemu_fdt_add_subnode(vms->fdt, nodename);
+qemu_fdt_setprop_string(vms->fdt, nodename,
+"compatible", "qemu,pvpanic-mmio");
+qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
+ 2, base, 2, size);
+
+g_free(nodename);
+}
+
 static void create_fdt(VirtMachineState *vms)
 {
 MachineState *ms = MACHINE(vms);
@@ -1957,6 +1980,8 @@ static void machvirt_init(MachineState *machine)
 
 virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
 
+create_pvpanic_device(vms);
+
 create_gic(vms);
 
 virt_cpu_post_init(vms, possible_cpus->len, sysmem);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index aad6d69..e6410c3 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -69,6 +69,7 @@ enum {
 VIRT_GIC_ITS,
 VIRT_GIC_REDIST,
 VIRT_SMMU,
+VIRT_PVPANIC,
 VIRT_UART,
 VIRT_MMIO,
 VIRT_RTC,
@@ -139,6 +140,7 @@ struct VirtMachineState {
 bool highmem;
 bool highmem_ecam;
 bool its;
+bool pvpanic;
 bool virt;
 bool ras;
 bool mte;
-- 
1.8.3.1




[PATCH 6/8] hw/arm/virt: add configure interface for pvpanic-mmio

2020-10-22 Thread Mihai Carabas
From: Peng Hao 

Add the option to turn on/off the pvpanic-mmio device in virt machine.

Signed-off-by: Peng Hao 
Reviewed-by: Andrew Jones 
Signed-off-by: Mihai Carabas 
---
 hw/arm/virt.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 63f09a6..7548977 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2161,6 +2161,20 @@ static void virt_set_mte(Object *obj, bool value, Error 
**errp)
 vms->mte = value;
 }
 
+static bool virt_get_pvpanic(Object *obj, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+return vms->pvpanic;
+}
+
+static void virt_set_pvpanic(Object *obj, bool value, Error **errp)
+{
+VirtMachineState *vms = VIRT_MACHINE(obj);
+
+vms->pvpanic = value;
+}
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2594,6 +2608,14 @@ static void virt_instance_init(Object *obj)
 "guest CPU which implements the ARM "
 "Memory Tagging Extension");
 
+/* Default disallows pvpanic-mmio instantiation */
+vms->pvpanic = false;
+object_property_add_bool(obj, "pvpanic", virt_get_pvpanic,
+ virt_set_pvpanic);
+object_property_set_description(obj, "pvpanic",
+"Set on/off to enable/disable "
+"PVPANIC MMIO device");
+
 vms->irqmap = a15irqmap;
 
 virt_flash_create(vms);
-- 
1.8.3.1




[PATCH 5/8] hw/arm/virt: add pvpanic device in virt acpi table

2020-10-22 Thread Mihai Carabas
From: Peng Hao 

Add pvpanic device in virt acpi table, so when kernel command line
uses acpi=force, kernel can get info from acpi table.

Reviewed-by: Andrew Jones 
Signed-off-by: Peng Hao 
Signed-off-by: Mihai Carabas 
---
 hw/arm/virt-acpi-build.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 9747a64..f64bf89 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -87,6 +87,20 @@ static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry 
*uart_memmap,
 aml_append(scope, dev);
 }
 
+static void acpi_dsdt_add_pvpanic(Aml *scope, const MemMapEntry 
*pvpanic_memmap)
+{
+Aml *dev = aml_device("PEVT");
+aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0001")));
+aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(pvpanic_memmap->base,
+   pvpanic_memmap->size, AML_READ_WRITE));
+
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+}
+
 static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
 {
 Aml *dev = aml_device("FWCF");
@@ -605,6 +619,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 if (vmc->acpi_expose_flash) {
 acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]);
 }
+if (vms->pvpanic) {
+acpi_dsdt_add_pvpanic(scope, &memmap[VIRT_PVPANIC]);
+}
 acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]);
 acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
 (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
-- 
1.8.3.1




[PATCH 7/8] pvpanic : update pvpanic document

2020-10-22 Thread Mihai Carabas
From: Peng Hao 

Add mmio support info in docs/specs/pvpanic.txt.

Reviewed-by: Andrew Jones 
Signed-off-by: Peng Hao 
Signed-off-by: Mihai Carabas 
---
 docs/specs/pvpanic.txt | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/specs/pvpanic.txt b/docs/specs/pvpanic.txt
index a90fbca..b50140b 100644
--- a/docs/specs/pvpanic.txt
+++ b/docs/specs/pvpanic.txt
@@ -1,7 +1,7 @@
 PVPANIC DEVICE
 ==
 
-pvpanic device is a simulated ISA device, through which a guest panic
+pvpanic device is a simulated device, through which a guest panic
 event is sent to qemu, and a QMP event is generated. This allows
 management apps (e.g. libvirt) to be notified and respond to the event.
 
@@ -9,6 +9,9 @@ The management app has the option of waiting for GUEST_PANICKED 
events,
 and/or polling for guest-panicked RunState, to learn when the pvpanic
 device has fired a panic event.
 
+The pvpanic device can be implemented as an ISA device (using IOPORT),
+or, since qemu 4.0, as a SYSBUS device (using MMIO).
+
 ISA Interface
 -
 
@@ -24,6 +27,13 @@ bit 1: a guest panic has happened and will be handled by the 
guest;
the host should record it or report it, but should not affect
the execution of the guest.
 
+SYSBUS Interface
+
+
+The SYSBUS interface is similar to the ISA interface except that it uses
+MMIO. For example, the arm virt machine could put the pvpanic device at
+[0x90b, 0x90b0001], where currently only the first byte is used.
+
 ACPI Interface
 --
 
-- 
1.8.3.1




Re: [PATCH 1/2] s390x: pv: Remove sclp boundary checks

2020-10-22 Thread Thomas Huth
On 21/10/2020 15.43, Janosch Frank wrote:
> The SCLP boundary cross check is done by the Ultravisor for a
> protected guest, hence we don't need to do it. As QEMU doesn't get a
> valid SCCB address in protected mode this is even problematic and can
> lead to QEMU reporting a false boundary cross error.
> 
> Signed-off-by: Janosch Frank 
> Reported-by: Marc Hartmayer 
> Fixes: db13387ca0 ("s390/sclp: rework sclp boundary checks")
> Reviewed-by: Christian Borntraeger 
> Tested-by: Marc Hartmayer 
> ---
>  hw/s390x/sclp.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
> index 00f1e4648d..0cf2290826 100644
> --- a/hw/s390x/sclp.c
> +++ b/hw/s390x/sclp.c
> @@ -285,11 +285,6 @@ int sclp_service_call_protected(CPUS390XState *env, 
> uint64_t sccb,
>  goto out_write;
>  }
>  
> -if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) 
> {
> -work_sccb->h.response_code = 
> cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> -goto out_write;
> -}
> -
>  sclp_c->execute(sclp, work_sccb, code);
>  out_write:
>  s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb,
> 

Reviewed-by: Thomas Huth 




Re: [PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread David Hildenbrand
On 22.10.20 10:23, Janosch Frank wrote:
> Diag318 fencing needs to be determined on the current VM PV state and
> not on the state that the VM has when we create the CPU model.
> 
> Signed-off-by: Janosch Frank 
> Reported-by: Marc Hartmayer 
> Reviewed-by: Christian Borntraeger 
> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
> ---
> 
> If you're sure that this is what you want, then I'll send a v2 of the
> patch set.

So that's going to be the first CPU feature that gets suppressed in PC
mode - which seems to be what we want.

diag318_needed() will return false, resulting in vmstate_diag318() not
being included in the migration stream (I know, we don't support
migration yet for PV).

I don't see where diag318 would get reset during a reipl - is it
expected to be persistent when switching in/out of PV, or when reipling
etc..?

> 
> ---
>  target/s390x/cpu_features.c | 5 +
>  target/s390x/cpu_features.h | 4 
>  target/s390x/cpu_models.c   | 4 
>  target/s390x/kvm.c  | 3 +--
>  4 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 31ea8df246..42fe0bf4ca 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -14,6 +14,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "cpu_features.h"
> +#include "hw/s390x/pv.h"
>  
>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>  [S390_FEAT_##_FEAT] = {\
> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
> S390FeatType type,
>  }
>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>  }
> +
> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
> +}
>  }
>  
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
> index ef52ffce83..87463f064d 100644
> --- a/target/s390x/cpu_features.h
> +++ b/target/s390x/cpu_features.h
> @@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
> group);
>  
>  #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
>  
> +static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
> +{
> +array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
> +}
>  static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
>  {
>  array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index ca484bfda7..461e0b8f4a 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -29,6 +29,7 @@
>  #include "hw/pci/pci.h"
>  #endif
>  #include "qapi/qapi-commands-machine-target.h"
> +#include "hw/s390x/pv.h"
>  
>  #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
>  {\
> @@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
>  }
>  return 0;
>  }
> +if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
> +return false;
> +}
>  return test_bit(feat, cpu->model->features);
>  }
>  
> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> index f13eff688c..baa070fdf7 100644
> --- a/target/s390x/kvm.c
> +++ b/target/s390x/kvm.c
> @@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
> Error **errp)
>   */
>  set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
>  
> -/* DIAGNOSE 0x318 is not supported under protected virtualization */
> -if (!s390_is_pv() && kvm_check_extension(kvm_state, 
> KVM_CAP_S390_DIAG318)) {
> +if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
>  set_bit(S390_FEAT_DIAG_318, model->features);
>  }
>  
> 


-- 
Thanks,

David / dhildenb




Re: [PATCH v2 2/8] tests/9pfs: add local Tunlinkat directory test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:17:01 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tunlinkat 9p request with flag AT_REMOVEDIR
> (see 'man 2 unlink') to remove a directory from host's test directory.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 71 
>  1 file changed, 71 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 21807037df..abd7e44648 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -258,6 +258,7 @@ static const char *rmessage_name(uint8_t id)
>  id == P9_RLOPEN ? "RLOPEN" :
>  id == P9_RWRITE ? "RWRITE" :
>  id == P9_RMKDIR ? "RMKDIR" :
> +id == P9_RUNLINKAT ? "RUNLINKAT" :
>  id == P9_RFLUSH ? "RFLUSH" :
>  id == P9_RREADDIR ? "READDIR" :
>  "";
> @@ -693,6 +694,33 @@ static void v9fs_rmkdir(P9Req *req, v9fs_qid *qid)
>  v9fs_req_free(req);
>  }
>  
> +/* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
> +static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char 
> *name,
> + uint32_t flags, uint16_t tag)
> +{
> +P9Req *req;
> +
> +uint32_t body_size = 4 + 4;
> +uint16_t string_size = v9fs_string_size(name);
> +
> +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> +body_size += string_size;
> +
> +req = v9fs_req_init(v9p, body_size, P9_TUNLINKAT, tag);
> +v9fs_uint32_write(req, dirfd);
> +v9fs_string_write(req, name);
> +v9fs_uint32_write(req, flags);
> +v9fs_req_send(req);
> +return req;
> +}
> +
> +/* size[4] Runlinkat tag[2] */
> +static void v9fs_runlinkat(P9Req *req)
> +{
> +v9fs_req_recv(req, P9_RUNLINKAT);
> +v9fs_req_free(req);
> +}
> +
>  /* basic readdir test where reply fits into a single response message */
>  static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
>  {
> @@ -1004,6 +1032,22 @@ static void do_mkdir(QVirtio9P *v9p, const char *path, 
> const char *cname)
>  g_free(name);
>  }
>  
> +static void do_unlinkat(QVirtio9P *v9p, const char *atpath, const char 
> *rpath,
> +uint32_t flags)
> +{
> +char *const name = g_strdup(rpath);
> +uint32_t fid;
> +P9Req *req;
> +
> +fid = do_walk(v9p, atpath);
> +
> +req = v9fs_tunlinkat(v9p, fid, name, flags, 0);
> +v9fs_req_wait_for_reply(req, NULL);
> +v9fs_runlinkat(req);
> +
> +g_free(name);
> +}
> +
>  static void fs_readdir_split_128(void *obj, void *data,
>   QGuestAllocator *t_alloc)
>  {
> @@ -1050,6 +1094,32 @@ static void fs_create_dir(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(root_path);
>  }
>  
> +static void fs_unlinkat_dir(void *obj, void *data, QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st;
> +char *root_path = virtio_9p_test_path("");
> +char *new_dir = virtio_9p_test_path("02");
> +
> +g_assert(root_path != NULL);
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "02");
> +
> +/* check if created directory really exists now ... */
> +g_assert(stat(new_dir, &st) == 0);
> +/* ... and is actually a directory */
> +g_assert((st.st_mode & S_IFMT) == S_IFDIR);
> +
> +do_unlinkat(v9p, "/", "02", AT_REMOVEDIR);
> +/* directory should be gone now */
> +g_assert(stat(new_dir, &st) != 0);
> +
> +g_free(new_dir);
> +g_free(root_path);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1090,6 +1160,7 @@ static void register_virtio_9p_test(void)
>  opts.before = assign_9p_local_driver;
>  qos_add_test("local/config", "virtio-9p", pci_config,  &opts);
>  qos_add_test("local/create_dir", "virtio-9p", fs_create_dir, &opts);
> +qos_add_test("local/unlinkat_dir", "virtio-9p", fs_unlinkat_dir, &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread Janosch Frank
On 10/22/20 10:32 AM, David Hildenbrand wrote:
> On 22.10.20 10:23, Janosch Frank wrote:
>> Diag318 fencing needs to be determined on the current VM PV state and
>> not on the state that the VM has when we create the CPU model.
>>
>> Signed-off-by: Janosch Frank 
>> Reported-by: Marc Hartmayer 
>> Reviewed-by: Christian Borntraeger 
>> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
>> ---
>>
>> If you're sure that this is what you want, then I'll send a v2 of the
>> patch set.
> 
> So that's going to be the first CPU feature that gets suppressed in PC
> mode - which seems to be what we want.
> 
> diag318_needed() will return false, resulting in vmstate_diag318() not
> being included in the migration stream (I know, we don't support
> migration yet for PV).

Well either you have it and need to migrate it or you don't.
As it doesn't persist over IPLs, that should not be a problem, no?

> 
> I don't see where diag318 would get reset during a reipl - is it
> expected to be persistent when switching in/out of PV, or when reipling
> etc..?

That's actually another bug we need to address. Diag318 will need to be
reset on a diag308 reset instead of when doing the cpu resets...

> 
>>
>> ---
>>  target/s390x/cpu_features.c | 5 +
>>  target/s390x/cpu_features.h | 4 
>>  target/s390x/cpu_models.c   | 4 
>>  target/s390x/kvm.c  | 3 +--
>>  4 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
>> index 31ea8df246..42fe0bf4ca 100644
>> --- a/target/s390x/cpu_features.c
>> +++ b/target/s390x/cpu_features.c
>> @@ -14,6 +14,7 @@
>>  #include "qemu/osdep.h"
>>  #include "qemu/module.h"
>>  #include "cpu_features.h"
>> +#include "hw/s390x/pv.h"
>>  
>>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>>  [S390_FEAT_##_FEAT] = {\
>> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap 
>> features, S390FeatType type,
>>  }
>>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>>  }
>> +
>> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
>> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
>> +}
>>  }
>>  
>>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
>> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
>> index ef52ffce83..87463f064d 100644
>> --- a/target/s390x/cpu_features.h
>> +++ b/target/s390x/cpu_features.h
>> @@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
>> group);
>>  
>>  #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
>>  
>> +static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
>> +{
>> +array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
>> +}
>>  static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
>>  {
>>  array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
>> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
>> index ca484bfda7..461e0b8f4a 100644
>> --- a/target/s390x/cpu_models.c
>> +++ b/target/s390x/cpu_models.c
>> @@ -29,6 +29,7 @@
>>  #include "hw/pci/pci.h"
>>  #endif
>>  #include "qapi/qapi-commands-machine-target.h"
>> +#include "hw/s390x/pv.h"
>>  
>>  #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
>>  {\
>> @@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
>>  }
>>  return 0;
>>  }
>> +if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
>> +return false;
>> +}
>>  return test_bit(feat, cpu->model->features);
>>  }
>>  
>> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
>> index f13eff688c..baa070fdf7 100644
>> --- a/target/s390x/kvm.c
>> +++ b/target/s390x/kvm.c
>> @@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
>> Error **errp)
>>   */
>>  set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
>>  
>> -/* DIAGNOSE 0x318 is not supported under protected virtualization */
>> -if (!s390_is_pv() && kvm_check_extension(kvm_state, 
>> KVM_CAP_S390_DIAG318)) {
>> +if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
>>  set_bit(S390_FEAT_DIAG_318, model->features);
>>  }
>>  
>>
> 
> 




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] migration/block-dirty-bitmap: fix larger granularity bitmaps

2020-10-22 Thread Vladimir Sementsov-Ogievskiy

22.10.2020 10:46, Stefan Reiter wrote:

On 10/21/20 5:17 PM, Vladimir Sementsov-Ogievskiy wrote:

21.10.2020 17:44, Stefan Reiter wrote:

sectors_per_chunk is a 64 bit integer, but the calculation is done in 32
bits, leading to an overflow for coarse bitmap granularities.

If that results in the value 0, it leads to a hang where no progress is
made but send_bitmap_bits is constantly called with nr_sectors being 0.

Signed-off-by: Stefan Reiter 
---
  migration/block-dirty-bitmap.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 5bef793ac0..5398869e2b 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -562,8 +562,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
  dbms->bitmap_alias = g_strdup(bitmap_alias);
  dbms->bitmap = bitmap;
  dbms->total_sectors = bdrv_nb_sectors(bs);
-    dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+    dbms->sectors_per_chunk = CHUNK_SIZE * 8lu *


I'd prefer 8llu for absolute safety.


  bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+    assert(dbms->sectors_per_chunk != 0);


I doubt that we need this assertion. Bug fixed, and it's obviously impossible.
And if we really want to assert that there is no overflow (assuming future 
changes),
it should look like this:

   assert(bdrv_dirty_bitmap_granularity(bitmap) < (1ull << 63) / CHUNK_SIZE / 8 
>> BDRV_SECTOR_BITS);

to cover not only corner case but any overflow.. And of course we should modify 
original expression
to do ">> BDRV_SECTOR_BITS" earlier than all multiplies, like

   dbms->sectors_per_chunk = CHUNK_SIZE * 8llu * 
(bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS);


But I think that only s/8/8ull/ change is enough.



I agree, and I wouldn't mind removing the assert, but just to clarify it was 
mostly meant to prevent the case where the migration gets stuck entirely. Even 
if the calculation is wrong, it would at least do _something_ instead of 
endlessly looping.

Maybe an

     assert(nr_sectors != 0);

in send_bitmap_bits instead for that?


Hmm, just sending 0 sectors should not be a problem by itself. It's a problem 
when we don't make a progress in the loop in bulk_phase().. So, I'd prefer your 
original assertion, as sectors_per_chunk=0 is definitely wrong thing.




  if (bdrv_dirty_bitmap_enabled(bitmap)) {
  dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
  }




With 8llu and with or without assertion:
Reviewed-by: Vladimir Sementsov-Ogievskiy 






--
Best regards,
Vladimir



Re: [PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread David Hildenbrand
On 22.10.20 10:39, Janosch Frank wrote:
> On 10/22/20 10:32 AM, David Hildenbrand wrote:
>> On 22.10.20 10:23, Janosch Frank wrote:
>>> Diag318 fencing needs to be determined on the current VM PV state and
>>> not on the state that the VM has when we create the CPU model.
>>>
>>> Signed-off-by: Janosch Frank 
>>> Reported-by: Marc Hartmayer 
>>> Reviewed-by: Christian Borntraeger 
>>> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
>>> ---
>>>
>>> If you're sure that this is what you want, then I'll send a v2 of the
>>> patch set.
>>
>> So that's going to be the first CPU feature that gets suppressed in PC
>> mode - which seems to be what we want.
>>
>> diag318_needed() will return false, resulting in vmstate_diag318() not
>> being included in the migration stream (I know, we don't support
>> migration yet for PV).
> 
> Well either you have it and need to migrate it or you don't.
> As it doesn't persist over IPLs, that should not be a problem, no?

Okay, was confused by no finding a proper reset from our central reset
handler. So this feels like the right thing to do.


-- 
Thanks,

David / dhildenb




Re: [PATCH v2 3/8] tests/9pfs: add local Tlcreate test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:25:33 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tlcreate 9p request to create a regular file inside
> host's test directory.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Just one remark, see below.

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 77 
>  1 file changed, 77 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index abd7e44648..c030bc2a6c 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -258,6 +258,7 @@ static const char *rmessage_name(uint8_t id)
>  id == P9_RLOPEN ? "RLOPEN" :
>  id == P9_RWRITE ? "RWRITE" :
>  id == P9_RMKDIR ? "RMKDIR" :
> +id == P9_RLCREATE ? "RLCREATE" :
>  id == P9_RUNLINKAT ? "RUNLINKAT" :
>  id == P9_RFLUSH ? "RFLUSH" :
>  id == P9_RREADDIR ? "READDIR" :
> @@ -694,6 +695,44 @@ static void v9fs_rmkdir(P9Req *req, v9fs_qid *qid)
>  v9fs_req_free(req);
>  }
>  
> +/* size[4] Tlcreate tag[2] fid[4] name[s] flags[4] mode[4] gid[4] */
> +static P9Req *v9fs_tlcreate(QVirtio9P *v9p, uint32_t fid, const char *name,
> +uint32_t flags, uint32_t mode, uint32_t gid,
> +uint16_t tag)
> +{
> +P9Req *req;
> +
> +uint32_t body_size = 4 + 4 + 4 + 4;
> +uint16_t string_size = v9fs_string_size(name);
> +
> +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> +body_size += string_size;
> +
> +req = v9fs_req_init(v9p, body_size, P9_TLCREATE, tag);
> +v9fs_uint32_write(req, fid);
> +v9fs_string_write(req, name);
> +v9fs_uint32_write(req, flags);
> +v9fs_uint32_write(req, mode);
> +v9fs_uint32_write(req, gid);
> +v9fs_req_send(req);
> +return req;
> +}
> +
> +/* size[4] Rlcreate tag[2] qid[13] iounit[4] */
> +static void v9fs_rlcreate(P9Req *req, v9fs_qid *qid, uint32_t *iounit)
> +{
> +v9fs_req_recv(req, P9_RLCREATE);
> +if (qid) {
> +v9fs_memread(req, qid, 13);
> +} else {
> +v9fs_memskip(req, 13);
> +}
> +if (iounit) {
> +v9fs_uint32_read(req, iounit);
> +}
> +v9fs_req_free(req);
> +}
> +
>  /* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
>  static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char 
> *name,
>   uint32_t flags, uint16_t tag)
> @@ -1032,6 +1071,24 @@ static void do_mkdir(QVirtio9P *v9p, const char *path, 
> const char *cname)
>  g_free(name);
>  }
>  
> +/* create a regular file with Tlcreate and return file's fid */
> +static uint32_t do_lcreate(QVirtio9P *v9p, const char *path,
> +   const char *cname)
> +{
> +char *const name = g_strdup(cname);
> +uint32_t fid;
> +P9Req *req;
> +
> +fid = do_walk(v9p, path);
> +
> +req = v9fs_tlcreate(v9p, fid, name, 0, 0750, 0, 0);

Maybe it could make sense to make the mode a parameter of
do_lcreate() in case someone wants to write a test case
around that ? Same remark applies to do_mkdir() actually.

No need to change this now anyway.

> +v9fs_req_wait_for_reply(req, NULL);
> +v9fs_rlcreate(req, NULL, NULL);
> +
> +g_free(name);
> +return fid;
> +}
> +
>  static void do_unlinkat(QVirtio9P *v9p, const char *atpath, const char 
> *rpath,
>  uint32_t flags)
>  {
> @@ -1120,6 +1177,25 @@ static void fs_unlinkat_dir(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(root_path);
>  }
>  
> +static void fs_create_file(void *obj, void *data, QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st;
> +char *new_file = virtio_9p_test_path("03/1st_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "03");
> +do_lcreate(v9p, "03", "1st_file");
> +
> +/* check if created file exists now ... */
> +g_assert(stat(new_file, &st) == 0);
> +/* ... and is a regular file */
> +g_assert((st.st_mode & S_IFMT) == S_IFREG);
> +
> +g_free(new_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1161,6 +1237,7 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/config", "virtio-9p", pci_config,  &opts);
>  qos_add_test("local/create_dir", "virtio-9p", fs_create_dir, &opts);
>  qos_add_test("local/unlinkat_dir", "virtio-9p", fs_unlinkat_dir, &opts);
> +qos_add_test("local/create_file", "virtio-9p", fs_create_file, &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH v2 4/8] tests/9pfs: add local Tunlinkat file test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:28:37 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tunlinkat request to remove a regular file using
> the 9pfs 'local' fs driver.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 24 
>  1 file changed, 24 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index c030bc2a6c..6b74a1fd7e 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -1196,6 +1196,29 @@ static void fs_create_file(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(new_file);
>  }
>  
> +static void fs_unlinkat_file(void *obj, void *data, QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st;
> +char *new_file = virtio_9p_test_path("04/doa_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "04");
> +do_lcreate(v9p, "04", "doa_file");
> +
> +/* check if created file exists now ... */
> +g_assert(stat(new_file, &st) == 0);
> +/* ... and is a regular file */
> +g_assert((st.st_mode & S_IFMT) == S_IFREG);
> +
> +do_unlinkat(v9p, "04", "doa_file", 0);
> +/* file should be gone now */
> +g_assert(stat(new_file, &st) != 0);
> +
> +g_free(new_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1238,6 +1261,7 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/create_dir", "virtio-9p", fs_create_dir, &opts);
>  qos_add_test("local/unlinkat_dir", "virtio-9p", fs_unlinkat_dir, &opts);
>  qos_add_test("local/create_file", "virtio-9p", fs_create_file, &opts);
> +qos_add_test("local/unlinkat_file", "virtio-9p", fs_unlinkat_file, 
> &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-22 Thread Vladimir Sementsov-Ogievskiy

22.10.2020 10:50, Andrey Shinkevich wrote:


On 21.10.2020 23:43, Andrey Shinkevich wrote:

On 14.10.2020 18:22, Vladimir Sementsov-Ogievskiy wrote:

14.10.2020 15:51, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---


[...]


diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
  max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
  if (bytes <= max_bytes && bytes <= max_transfer) {
-    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);



When BDRV_REQ_PREFETCH is passed, qiov may be (and generally should be) NULL. 
This means, that we can't just drop the flag when call the driver that doesn't 
support it.

Actually, if driver doesn't support the PREFETCH flag we should do nothing.




Ah, OK.  I see.  I expected this to be a separate patch.  I still wonder
why it isn’t.




Could it be part of patch 07? I mean introduce new field supported_read_flags 
and handle it in generic code in one patch, prior to implementing support for 
it in COR driver.




We have to add the supported flags for the COR driver in the same patch. Or 
before handling the supported_read_flags at the generic layer (handling zero 
does not make a sence). Otherwise, the test #216 (where the COR-filter is 
applied) will not pass.

Andrey


I have found a workaround and am going to send all the related patches as a 
separate series.



What is the problem?

If in a separate patch prior to modifying COR driver, we add new field 
supported_read_flags and add a support for it in generic layer, when no driver 
support it yet, nothing should be changed and all tests should pass..



--
Best regards,
Vladimir



Re: [PATCH v2 5/8] tests/9pfs: add local Tsymlink test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:33:34 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tsymlink 9p request to create a symbolic link using
> the 9pfs 'local' fs driver.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 77 
>  1 file changed, 77 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 6b74a1fd7e..0c11417236 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -259,6 +259,7 @@ static const char *rmessage_name(uint8_t id)
>  id == P9_RWRITE ? "RWRITE" :
>  id == P9_RMKDIR ? "RMKDIR" :
>  id == P9_RLCREATE ? "RLCREATE" :
> +id == P9_RSYMLINK ? "RSYMLINK" :
>  id == P9_RUNLINKAT ? "RUNLINKAT" :
>  id == P9_RFLUSH ? "RFLUSH" :
>  id == P9_RREADDIR ? "READDIR" :
> @@ -733,6 +734,39 @@ static void v9fs_rlcreate(P9Req *req, v9fs_qid *qid, 
> uint32_t *iounit)
>  v9fs_req_free(req);
>  }
>  
> +/* size[4] Tsymlink tag[2] fid[4] name[s] symtgt[s] gid[4] */
> +static P9Req *v9fs_tsymlink(QVirtio9P *v9p, uint32_t fid, const char *name,
> +const char *symtgt, uint32_t gid, uint16_t tag)
> +{
> +P9Req *req;
> +
> +uint32_t body_size = 4 + 4;
> +uint16_t string_size = v9fs_string_size(name) + v9fs_string_size(symtgt);
> +
> +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> +body_size += string_size;
> +
> +req = v9fs_req_init(v9p, body_size, P9_TSYMLINK, tag);
> +v9fs_uint32_write(req, fid);
> +v9fs_string_write(req, name);
> +v9fs_string_write(req, symtgt);
> +v9fs_uint32_write(req, gid);
> +v9fs_req_send(req);
> +return req;
> +}
> +
> +/* size[4] Rsymlink tag[2] qid[13] */
> +static void v9fs_rsymlink(P9Req *req, v9fs_qid *qid)
> +{
> +v9fs_req_recv(req, P9_RSYMLINK);
> +if (qid) {
> +v9fs_memread(req, qid, 13);
> +} else {
> +v9fs_memskip(req, 13);
> +}
> +v9fs_req_free(req);
> +}
> +
>  /* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
>  static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char 
> *name,
>   uint32_t flags, uint16_t tag)
> @@ -1089,6 +1123,25 @@ static uint32_t do_lcreate(QVirtio9P *v9p, const char 
> *path,
>  return fid;
>  }
>  
> +/* create symlink named @a clink in directory @a path pointing to @a to */
> +static void do_symlink(QVirtio9P *v9p, const char *path, const char *clink,
> +   const char *to)
> +{
> +char *const name = g_strdup(clink);
> +char *const dst = g_strdup(to);
> +uint32_t fid;
> +P9Req *req;
> +
> +fid = do_walk(v9p, path);
> +
> +req = v9fs_tsymlink(v9p, fid, name, dst, 0, 0);
> +v9fs_req_wait_for_reply(req, NULL);
> +v9fs_rsymlink(req, NULL);
> +
> +g_free(dst);
> +g_free(name);
> +}
> +
>  static void do_unlinkat(QVirtio9P *v9p, const char *atpath, const char 
> *rpath,
>  uint32_t flags)
>  {
> @@ -1219,6 +1272,29 @@ static void fs_unlinkat_file(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(new_file);
>  }
>  
> +static void fs_symlink_file(void *obj, void *data, QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st;
> +char *real_file = virtio_9p_test_path("05/real_file");
> +char *symlink_file = virtio_9p_test_path("05/symlink_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "05");
> +do_lcreate(v9p, "05", "real_file");
> +g_assert(stat(real_file, &st) == 0);
> +g_assert((st.st_mode & S_IFMT) == S_IFREG);
> +
> +do_symlink(v9p, "05", "symlink_file", "real_file");
> +
> +/* check if created link exists now */
> +g_assert(stat(symlink_file, &st) == 0);
> +
> +g_free(symlink_file);
> +g_free(real_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1262,6 +1338,7 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/unlinkat_dir", "virtio-9p", fs_unlinkat_dir, &opts);
>  qos_add_test("local/create_file", "virtio-9p", fs_create_file, &opts);
>  qos_add_test("local/unlinkat_file", "virtio-9p", fs_unlinkat_file, 
> &opts);
> +qos_add_test("local/symlink_file", "virtio-9p", fs_symlink_file, &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH v2 6/8] tests/9pfs: add local Tunlinkat symlink test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:36:23 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tunlinkat request to remove a symlink using
> the 9pfs 'local' fs driver.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 28 
>  1 file changed, 28 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 0c11417236..33cba24b18 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -1295,6 +1295,32 @@ static void fs_symlink_file(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(real_file);
>  }
>  
> +static void fs_unlinkat_symlink(void *obj, void *data,
> +QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st;
> +char *real_file = virtio_9p_test_path("06/real_file");
> +char *symlink_file = virtio_9p_test_path("06/symlink_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "06");
> +do_lcreate(v9p, "06", "real_file");
> +g_assert(stat(real_file, &st) == 0);
> +g_assert((st.st_mode & S_IFMT) == S_IFREG);
> +
> +do_symlink(v9p, "06", "symlink_file", "real_file");
> +g_assert(stat(symlink_file, &st) == 0);
> +
> +do_unlinkat(v9p, "06", "symlink_file", 0);
> +/* symlink should be gone now */
> +g_assert(stat(symlink_file, &st) != 0);
> +
> +g_free(symlink_file);
> +g_free(real_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1339,6 +1365,8 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/create_file", "virtio-9p", fs_create_file, &opts);
>  qos_add_test("local/unlinkat_file", "virtio-9p", fs_unlinkat_file, 
> &opts);
>  qos_add_test("local/symlink_file", "virtio-9p", fs_symlink_file, &opts);
> +qos_add_test("local/unlinkat_symlink", "virtio-9p", fs_unlinkat_symlink,
> + &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




[PATCH] MAINTAINERS: Update nvme entries

2020-10-22 Thread Klaus Jensen
From: Klaus Jensen 

Make sure that maintainers of both the nvme block driver and the
emulated nvme device is notified about changes to the shared nvme.h.

Signed-off-by: Klaus Jensen 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6a197bd358d6..f546ac6840f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1876,6 +1876,7 @@ M: Klaus Jensen 
 L: qemu-bl...@nongnu.org
 S: Supported
 F: hw/block/nvme*
+F: include/block/nvme.h
 F: tests/qtest/nvme-test.c
 T: git git://git.infradead.org/qemu-nvme.git nvme-next
 
@@ -2953,6 +2954,7 @@ R: Fam Zheng 
 L: qemu-bl...@nongnu.org
 S: Supported
 F: block/nvme*
+F: include/block/nvme.h
 T: git https://github.com/stefanha/qemu.git block
 
 Bootdevice
-- 
2.28.0




Re: [PATCH] MAINTAINERS: Update nvme entries

2020-10-22 Thread Klaus Jensen
On Oct 22 11:00, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Make sure that maintainers of both the nvme block driver and the
> emulated nvme device is notified about changes to the shared nvme.h.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  MAINTAINERS | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6a197bd358d6..f546ac6840f0 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1876,6 +1876,7 @@ M: Klaus Jensen 
>  L: qemu-bl...@nongnu.org
>  S: Supported
>  F: hw/block/nvme*
> +F: include/block/nvme.h
>  F: tests/qtest/nvme-test.c
>  T: git git://git.infradead.org/qemu-nvme.git nvme-next
>  
> @@ -2953,6 +2954,7 @@ R: Fam Zheng 
>  L: qemu-bl...@nongnu.org
>  S: Supported
>  F: block/nvme*
> +F: include/block/nvme.h
>  T: git https://github.com/stefanha/qemu.git block
>  
>  Bootdevice
> -- 
> 2.28.0
> 

Please diregard. Just realized that Philippe already posted this[1].

So consider this a gentle bump for Philippe's patch to be merged ;)


  [1]: 
https://lore.kernel.org/qemu-devel/20200701140634.25994-1-phi...@redhat.com/



signature.asc
Description: PGP signature


Re: [PATCH v2 7/8] tests/9pfs: add local Tlink test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:51:09 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tlink request to create a hard link to a regular
> file using the 9pfs 'local' fs driver.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 71 
>  1 file changed, 71 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 33cba24b18..460fa49fe3 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -260,6 +260,7 @@ static const char *rmessage_name(uint8_t id)
>  id == P9_RMKDIR ? "RMKDIR" :
>  id == P9_RLCREATE ? "RLCREATE" :
>  id == P9_RSYMLINK ? "RSYMLINK" :
> +id == P9_RLINK ? "RLINK" :
>  id == P9_RUNLINKAT ? "RUNLINKAT" :
>  id == P9_RFLUSH ? "RFLUSH" :
>  id == P9_RREADDIR ? "READDIR" :
> @@ -767,6 +768,33 @@ static void v9fs_rsymlink(P9Req *req, v9fs_qid *qid)
>  v9fs_req_free(req);
>  }
>  
> +/* size[4] Tlink tag[2] dfid[4] fid[4] name[s] */
> +static P9Req *v9fs_tlink(QVirtio9P *v9p, uint32_t dfid, uint32_t fid,
> + const char *name, uint16_t tag)
> +{
> +P9Req *req;
> +
> +uint32_t body_size = 4 + 4;
> +uint16_t string_size = v9fs_string_size(name);
> +
> +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> +body_size += string_size;
> +
> +req = v9fs_req_init(v9p, body_size, P9_TLINK, tag);
> +v9fs_uint32_write(req, dfid);
> +v9fs_uint32_write(req, fid);
> +v9fs_string_write(req, name);
> +v9fs_req_send(req);
> +return req;
> +}
> +
> +/* size[4] Rlink tag[2] */
> +static void v9fs_rlink(P9Req *req)
> +{
> +v9fs_req_recv(req, P9_RLINK);
> +v9fs_req_free(req);
> +}
> +
>  /* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
>  static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char 
> *name,
>   uint32_t flags, uint16_t tag)
> @@ -1142,6 +1170,21 @@ static void do_symlink(QVirtio9P *v9p, const char 
> *path, const char *clink,
>  g_free(name);
>  }
>  
> +/* create a hard link named @a clink in directory @a path pointing to @a to 
> */
> +static void do_hardlink(QVirtio9P *v9p, const char *path, const char *clink,
> +const char *to)
> +{
> +uint32_t dfid, fid;
> +P9Req *req;
> +
> +dfid = do_walk(v9p, path);
> +fid = do_walk(v9p, to);
> +
> +req = v9fs_tlink(v9p, dfid, fid, clink, 0);
> +v9fs_req_wait_for_reply(req, NULL);
> +v9fs_rlink(req);
> +}
> +
>  static void do_unlinkat(QVirtio9P *v9p, const char *atpath, const char 
> *rpath,
>  uint32_t flags)
>  {
> @@ -1321,6 +1364,33 @@ static void fs_unlinkat_symlink(void *obj, void *data,
>  g_free(real_file);
>  }
>  
> +static void fs_hardlink_file(void *obj, void *data, QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st_real, st_link;
> +char *real_file = virtio_9p_test_path("07/real_file");
> +char *hardlink_file = virtio_9p_test_path("07/hardlink_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "07");
> +do_lcreate(v9p, "07", "real_file");
> +g_assert(stat(real_file, &st_real) == 0);
> +g_assert((st_real.st_mode & S_IFMT) == S_IFREG);
> +
> +do_hardlink(v9p, "07", "hardlink_file", "07/real_file");
> +
> +/* check if link exists now ... */
> +g_assert(stat(hardlink_file, &st_link) == 0);
> +/* ... and it's a hard link, right? */
> +g_assert((st_link.st_mode & S_IFMT) == S_IFREG);
> +g_assert(st_link.st_dev == st_real.st_dev);
> +g_assert(st_link.st_ino == st_real.st_ino);
> +
> +g_free(hardlink_file);
> +g_free(real_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1367,6 +1437,7 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/symlink_file", "virtio-9p", fs_symlink_file, &opts);
>  qos_add_test("local/unlinkat_symlink", "virtio-9p", fs_unlinkat_symlink,
>   &opts);
> +qos_add_test("local/hardlink_file", "virtio-9p", fs_hardlink_file, 
> &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH v4 0/2] Skip copy-on-write when allocating a zero cluster

2020-10-22 Thread Alberto Garcia
ping

On Mon 21 Sep 2020 04:30:48 PM CEST, Alberto Garcia wrote:
> I had to rebase the series due to conflicting changes on master. There
> are no other differences.
>
> Berto
>
> v4:
> - Fix rebase conflicts after cb8503159a
>
> v3: https://lists.gnu.org/archive/html/qemu-block/2020-09/msg00912.html
> - Add a new patch to improve the reporting of BDRV_BLOCK_ZERO [Vladimir]
> - Rename function to bdrv_co_is_zero_fast() [Vladimir, Kevin]
> - Don't call bdrv_common_block_status_above() if bytes == 0
>
> v2: https://lists.gnu.org/archive/html/qemu-block/2020-08/msg01165.html
> - Add new, simpler API: bdrv_is_unallocated_or_zero_above()
>
> v1: https://lists.gnu.org/archive/html/qemu-block/2020-08/msg00403.html



Re: [PATCH v2 7/8] tests/9pfs: add local Tlink test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 20:20:08 +0200
Christian Schoenebeck  wrote:

> On Mittwoch, 21. Oktober 2020 14:51:09 CEST Christian Schoenebeck wrote:
> > This test case uses a Tlink request to create a hard link to a regular
> > file using the 9pfs 'local' fs driver.
> > 
> > Signed-off-by: Christian Schoenebeck 
> > ---
> >  tests/qtest/virtio-9p-test.c | 71 
> >  1 file changed, 71 insertions(+)
> > 
> > diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> > index 33cba24b18..460fa49fe3 100644
> > --- a/tests/qtest/virtio-9p-test.c
> > +++ b/tests/qtest/virtio-9p-test.c
> > @@ -260,6 +260,7 @@ static const char *rmessage_name(uint8_t id)
> >  id == P9_RMKDIR ? "RMKDIR" :
> >  id == P9_RLCREATE ? "RLCREATE" :
> >  id == P9_RSYMLINK ? "RSYMLINK" :
> > +id == P9_RLINK ? "RLINK" :
> >  id == P9_RUNLINKAT ? "RUNLINKAT" :
> >  id == P9_RFLUSH ? "RFLUSH" :
> >  id == P9_RREADDIR ? "READDIR" :
> > @@ -767,6 +768,33 @@ static void v9fs_rsymlink(P9Req *req, v9fs_qid *qid)
> >  v9fs_req_free(req);
> >  }
> > 
> > +/* size[4] Tlink tag[2] dfid[4] fid[4] name[s] */
> > +static P9Req *v9fs_tlink(QVirtio9P *v9p, uint32_t dfid, uint32_t fid,
> > + const char *name, uint16_t tag)
> > +{
> > +P9Req *req;
> > +
> > +uint32_t body_size = 4 + 4;
> > +uint16_t string_size = v9fs_string_size(name);
> > +
> > +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> > +body_size += string_size;
> > +
> > +req = v9fs_req_init(v9p, body_size, P9_TLINK, tag);
> > +v9fs_uint32_write(req, dfid);
> > +v9fs_uint32_write(req, fid);
> > +v9fs_string_write(req, name);
> > +v9fs_req_send(req);
> > +return req;
> > +}
> > +
> > +/* size[4] Rlink tag[2] */
> > +static void v9fs_rlink(P9Req *req)
> > +{
> > +v9fs_req_recv(req, P9_RLINK);
> > +v9fs_req_free(req);
> > +}
> > +
> >  /* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
> >  static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char
> > *name, uint32_t flags, uint16_t tag)
> > @@ -1142,6 +1170,21 @@ static void do_symlink(QVirtio9P *v9p, const char
> > *path, const char *clink, g_free(name);
> >  }
> > 
> > +/* create a hard link named @a clink in directory @a path pointing to @a to
> > */ +static void do_hardlink(QVirtio9P *v9p, const char *path, const char
> > *clink, +const char *to)
> > +{
> > +uint32_t dfid, fid;
> > +P9Req *req;
> > +
> > +dfid = do_walk(v9p, path);
> > +fid = do_walk(v9p, to);
> > +
> > +req = v9fs_tlink(v9p, dfid, fid, clink, 0);
> > +v9fs_req_wait_for_reply(req, NULL);
> > +v9fs_rlink(req);
> > +}
> > +
> >  static void do_unlinkat(QVirtio9P *v9p, const char *atpath, const char
> > *rpath, uint32_t flags)
> >  {
> > @@ -1321,6 +1364,33 @@ static void fs_unlinkat_symlink(void *obj, void
> > *data, g_free(real_file);
> >  }
> > 
> > +static void fs_hardlink_file(void *obj, void *data, QGuestAllocator
> > *t_alloc) +{
> > +QVirtio9P *v9p = obj;
> > +alloc = t_alloc;
> > +struct stat st_real, st_link;
> > +char *real_file = virtio_9p_test_path("07/real_file");
> > +char *hardlink_file = virtio_9p_test_path("07/hardlink_file");
> > +
> > +do_attach(v9p);
> > +do_mkdir(v9p, "/", "07");
> > +do_lcreate(v9p, "07", "real_file");
> > +g_assert(stat(real_file, &st_real) == 0);
> > +g_assert((st_real.st_mode & S_IFMT) == S_IFREG);
> > +
> > +do_hardlink(v9p, "07", "hardlink_file", "07/real_file");
> > +
> > +/* check if link exists now ... */
> > +g_assert(stat(hardlink_file, &st_link) == 0);
> > +/* ... and it's a hard link, right? */
> > +g_assert((st_link.st_mode & S_IFMT) == S_IFREG);
> > +g_assert(st_link.st_dev == st_real.st_dev);
> > +g_assert(st_link.st_ino == st_real.st_ino);
> > +
> > +g_free(hardlink_file);
> > +g_free(real_file);
> > +}
> > +
> >  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
> >  {
> >  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> > @@ -1367,6 +1437,7 @@ static void register_virtio_9p_test(void)
> >  qos_add_test("local/symlink_file", "virtio-9p", fs_symlink_file,
> > &opts); qos_add_test("local/unlinkat_symlink", "virtio-9p",
> > fs_unlinkat_symlink, &opts);
> > +qos_add_test("local/hardlink_file", "virtio-9p", fs_hardlink_file,
> > &opts); }
> > 
> >  libqos_init(register_virtio_9p_test);
> 
> Ok, I found the problem on the mentioned box that failed to create hard links 
> with 9p: it is libvirt auto generating AppArmor policy rules for 9p export 
> pathes, which libvirt generates with "rw" AA (AppArmor) permissions. Once I 
> manually adjusted the AA rule to "rwl", creating hard links worked again.
> 
> I guess I'll send a patch for libvirt to change that. At least IMO creating 
> hard links with 9pfs is a very basic operation and should be enable

Re: [PATCH v2 8/8] tests/9pfs: add local Tunlinkat hard link test

2020-10-22 Thread Greg Kurz
On Wed, 21 Oct 2020 14:55:46 +0200
Christian Schoenebeck  wrote:

> This test case uses a Tunlinkat request to remove a previously hard
> linked file by using the 9pfs 'local' fs driver.
> 
> Signed-off-by: Christian Schoenebeck 
> ---

Reviewed-by: Greg Kurz 

>  tests/qtest/virtio-9p-test.c | 30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> index 460fa49fe3..23433913bb 100644
> --- a/tests/qtest/virtio-9p-test.c
> +++ b/tests/qtest/virtio-9p-test.c
> @@ -1391,6 +1391,34 @@ static void fs_hardlink_file(void *obj, void *data, 
> QGuestAllocator *t_alloc)
>  g_free(real_file);
>  }
>  
> +static void fs_unlinkat_hardlink(void *obj, void *data,
> + QGuestAllocator *t_alloc)
> +{
> +QVirtio9P *v9p = obj;
> +alloc = t_alloc;
> +struct stat st_real, st_link;
> +char *real_file = virtio_9p_test_path("08/real_file");
> +char *hardlink_file = virtio_9p_test_path("08/hardlink_file");
> +
> +do_attach(v9p);
> +do_mkdir(v9p, "/", "08");
> +do_lcreate(v9p, "08", "real_file");
> +g_assert(stat(real_file, &st_real) == 0);
> +g_assert((st_real.st_mode & S_IFMT) == S_IFREG);
> +
> +do_hardlink(v9p, "08", "hardlink_file", "08/real_file");
> +g_assert(stat(hardlink_file, &st_link) == 0);
> +
> +do_unlinkat(v9p, "08", "hardlink_file", 0);
> +/* symlink should be gone now */
> +g_assert(stat(hardlink_file, &st_link) != 0);
> +/* and old file should still exist */
> +g_assert(stat(real_file, &st_real) == 0);
> +
> +g_free(hardlink_file);
> +g_free(real_file);
> +}
> +
>  static void *assign_9p_local_driver(GString *cmd_line, void *arg)
>  {
>  virtio_9p_assign_local_driver(cmd_line, "security_model=mapped-xattr");
> @@ -1438,6 +1466,8 @@ static void register_virtio_9p_test(void)
>  qos_add_test("local/unlinkat_symlink", "virtio-9p", fs_unlinkat_symlink,
>   &opts);
>  qos_add_test("local/hardlink_file", "virtio-9p", fs_hardlink_file, 
> &opts);
> +qos_add_test("local/unlinkat_hardlink", "virtio-9p", 
> fs_unlinkat_hardlink,
> + &opts);
>  }
>  
>  libqos_init(register_virtio_9p_test);




Re: [PATCH v6 02/16] fuzz: Add generic virtual-device fuzzer

2020-10-22 Thread Darren Kenny
Hi Alex,

On Wednesday, 2020-10-21 at 17:09:08 -04, Alexander Bulekov wrote:
> This is a generic fuzzer designed to fuzz a virtual device's
> MemoryRegions, as long as they exist within the Memory or Port IO (if it
> exists) AddressSpaces. The fuzzer's input is interpreted into a sequence
> of qtest commands (outb, readw, etc). The interpreted commands are
> separated by a magic seaparator, which should be easy for the fuzzer to
> guess. Without ASan, the separator can be specified as a "dictionary
> value" using the -dict argument (see libFuzzer documentation).
>
> Signed-off-by: Alexander Bulekov 

Just a couple of very minor nits below (spacing between functions),
which are not vital, so otherwise:

Reviewed-by: Darren Kenny 

> ---
>  tests/qtest/fuzz/generic_fuzz.c | 512 
>  tests/qtest/fuzz/meson.build|   1 +
>  2 files changed, 513 insertions(+)
>  create mode 100644 tests/qtest/fuzz/generic_fuzz.c
>
> diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
> new file mode 100644
> index 00..f69e9583ce
> --- /dev/null
> +++ b/tests/qtest/fuzz/generic_fuzz.c
> @@ -0,0 +1,512 @@
> +/*
> + * Generic Virtual-Device Fuzzing Target
> + *
> + * Copyright Red Hat Inc., 2020
> + *
> + * Authors:
> + *  Alexander Bulekov   
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include 
> +
> +#include "hw/core/cpu.h"
> +#include "tests/qtest/libqos/libqtest.h"
> +#include "fuzz.h"
> +#include "fork_fuzz.h"
> +#include "exec/address-spaces.h"
> +#include "string.h"
> +#include "exec/memory.h"
> +#include "exec/ramblock.h"
> +#include "exec/address-spaces.h"
> +#include "hw/qdev-core.h"
> +
> +/*
> + * SEPARATOR is used to separate "operations" in the fuzz input
> + */
> +#define SEPARATOR "FUZZ"
> +
> +enum cmds {
> +OP_IN,
> +OP_OUT,
> +OP_READ,
> +OP_WRITE,
> +OP_CLOCK_STEP,
> +};
> +
> +#define DEFAULT_TIMEOUT_US 10
> +#define USEC_IN_SEC 10
> +
> +typedef struct {
> +ram_addr_t addr;
> +ram_addr_t size; /* The number of bytes until the end of the I/O region 
> */
> +} address_range;
> +
> +static useconds_t timeout = DEFAULT_TIMEOUT_US;
> +
> +static bool qtest_log_enabled;
> +
> +/*
> + * List of memory regions that are children of QOM objects specified by the
> + * user for fuzzing.
> + */
> +static GHashTable *fuzzable_memoryregions;
> +
> +struct get_io_cb_info {
> +int index;
> +int found;
> +address_range result;
> +};
> +
> +static int get_io_address_cb(Int128 start, Int128 size,
> +  const MemoryRegion *mr, void *opaque) {
> +struct get_io_cb_info *info = opaque;
> +if (g_hash_table_lookup(fuzzable_memoryregions, mr)) {
> +if (info->index == 0) {
> +info->result.addr = (ram_addr_t)start;
> +info->result.size = (ram_addr_t)size;
> +info->found = 1;
> +return 1;
> +}
> +info->index--;
> +}
> +return 0;
> +}
> +
> +/*
> + * Here we want to convert a fuzzer-provided [io-region-index, offset] to
> + * a physical address. To do this, we iterate over all of the matched
> + * MemoryRegions. Check whether each region exists within the particular io
> + * space. Return the absolute address of the offset within the index'th 
> region
> + * that is a subregion of the io_space and the distance until the end of the
> + * memory region.
> + */
> +static bool get_io_address(address_range *result, AddressSpace *as,
> +uint8_t index,
> +uint32_t offset) {
> +FlatView *view;
> +view = as->current_map;
> +g_assert(view);
> +struct get_io_cb_info cb_info = {};
> +
> +cb_info.index = index;
> +
> +/*
> + * Loop around the FlatView until we match "index" number of
> + * fuzzable_memoryregions, or until we know that there are no matching
> + * memory_regions.
> + */
> +do {
> +flatview_for_each_range(view, get_io_address_cb , &cb_info);
> +} while (cb_info.index != index && !cb_info.found);
> +
> +*result = cb_info.result;
> +return cb_info.found;
> +}

NIT: Add blank line here.

> +static bool get_pio_address(address_range *result,
> +uint8_t index, uint16_t offset)
> +{
> +/*
> + * PIO BARs can be set past the maximum port address (0x). Thus, 
> result
> + * can contain an addr that extends past the PIO space. When we pass this
> + * address to qtest_in/qtest_out, it is cast to a uint16_t, so we might 
> end
> + * up fuzzing a completely different MemoryRegion/Device. Therefore, 
> check
> + * that the address here is within the PIO space limits.
> + */
> +bool found = get_io_address(result, &address_space_io, index, offset);
> +return result->addr <= 0x ? found : false;
> +

Re: [PATCH v6 13/16] fuzz: add an "opaque" to the FuzzTarget struct

2020-10-22 Thread Darren Kenny
On Wednesday, 2020-10-21 at 17:09:19 -04, Alexander Bulekov wrote:
> It can be useful to register FuzzTargets that have nearly-identical
> initialization handlers (e.g. for using the same fuzzing code, with
> different configuration options). Add an opaque pointer to the
> FuzzTarget struct, so that FuzzTargets can hold some data, useful for
> storing target-specific configuration options, that can be read by the
> get_init_cmdline function.
>
> Signed-off-by: Alexander Bulekov 

Reviewed-by: Darren Kenny 

> ---
>  tests/qtest/fuzz/fuzz.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/tests/qtest/fuzz/fuzz.h b/tests/qtest/fuzz/fuzz.h
> index ed9ce17154..08e9560a79 100644
> --- a/tests/qtest/fuzz/fuzz.h
> +++ b/tests/qtest/fuzz/fuzz.h
> @@ -100,6 +100,7 @@ typedef struct FuzzTarget {
> uint8_t *out, size_t max_out_size,
> unsigned int seed);
>  
> +void *opaque;
>  } FuzzTarget;
>  
>  void flush_events(QTestState *);
> -- 
> 2.28.0



Re: [PATCH v6 14/16] fuzz: add generic-fuzz configs for oss-fuzz

2020-10-22 Thread Darren Kenny
On Wednesday, 2020-10-21 at 17:09:20 -04, Alexander Bulekov wrote:
> Predefine some generic-fuzz configs. For each of these, we will create a
> separate FuzzTarget that can be selected through argv0 and, therefore,
> fuzzed on oss-fuzz.
>
> Signed-off-by: Alexander Bulekov 

Reviewed-by: Darren Kenny 

> ---
>  tests/qtest/fuzz/generic_fuzz_configs.h | 121 
>  1 file changed, 121 insertions(+)
>  create mode 100644 tests/qtest/fuzz/generic_fuzz_configs.h
>
> diff --git a/tests/qtest/fuzz/generic_fuzz_configs.h 
> b/tests/qtest/fuzz/generic_fuzz_configs.h
> new file mode 100644
> index 00..c4d925f9e6
> --- /dev/null
> +++ b/tests/qtest/fuzz/generic_fuzz_configs.h
> @@ -0,0 +1,121 @@
> +/*
> + * Generic Virtual-Device Fuzzing Target Configs
> + *
> + * Copyright Red Hat Inc., 2020
> + *
> + * Authors:
> + *  Alexander Bulekov   
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef GENERIC_FUZZ_CONFIGS_H
> +#define GENERIC_FUZZ_CONFIGS_H
> +
> +#include "qemu/osdep.h"
> +
> +typedef struct generic_fuzz_config {
> +const char *name, *args, *objects;
> +} generic_fuzz_config;
> +
> +const generic_fuzz_config predefined_configs[] = {
> +{
> +.name = "virtio-net-pci-slirp",
> +.args = "-M q35 -nodefaults "
> +"-device virtio-net,netdev=net0 -netdev user,id=net0",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-blk",
> +.args = "-machine q35 -device virtio-blk,drive=disk0 "
> +"-drive file=null-co://,id=disk0,if=none,format=raw",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-scsi",
> +.args = "-machine q35 -device virtio-scsi,num_queues=8 "
> +"-device scsi-hd,drive=disk0 "
> +"-drive file=null-co://,id=disk0,if=none,format=raw",
> +.objects = "scsi* virtio*",
> +},{
> +.name = "virtio-gpu",
> +.args = "-machine q35 -nodefaults -device virtio-gpu",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-vga",
> +.args = "-machine q35 -nodefaults -device virtio-vga",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-rng",
> +.args = "-machine q35 -nodefaults -device virtio-rng",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-balloon",
> +.args = "-machine q35 -nodefaults -device virtio-balloon",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-serial",
> +.args = "-machine q35 -nodefaults -device virtio-serial",
> +.objects = "virtio*",
> +},{
> +.name = "virtio-mouse",
> +.args = "-machine q35 -nodefaults -device virtio-mouse",
> +.objects = "virtio*",
> +},{
> +.name = "e1000",
> +.args = "-M q35 -nodefaults "
> +"-device e1000,netdev=net0 -netdev user,id=net0",
> +.objects = "e1000",
> +},{
> +.name = "e1000e",
> +.args = "-M q35 -nodefaults "
> +"-device e1000e,netdev=net0 -netdev user,id=net0",
> +.objects = "e1000e",
> +},{
> +.name = "cirrus-vga",
> +.args = "-machine q35 -nodefaults -device cirrus-vga",
> +.objects = "cirrus*",
> +},{
> +.name = "bochs-display",
> +.args = "-machine q35 -nodefaults -device bochs-display",
> +.objects = "bochs*",
> +},{
> +.name = "intel-hda",
> +.args = "-machine q35 -nodefaults -device intel-hda,id=hda0 "
> +"-device hda-output,bus=hda0.0 -device hda-micro,bus=hda0.0 "
> +"-device hda-duplex,bus=hda0.0",
> +.objects = "intel-hda",
> +},{
> +.name = "ide-hd",
> +.args = "-machine q35 -nodefaults "
> +"-drive file=null-co://,if=none,format=raw,id=disk0 "
> +"-device ide-hd,drive=disk0",
> +.objects = "ahci*",
> +},{
> +.name = "floppy",
> +.args = "-machine pc -nodefaults -device floppy,id=floppy0 "
> +"-drive id=disk0,file=null-co://,file.read-zeroes=on,if=none "
> +"-device floppy,drive=disk0,drive-type=288",
> +.objects = "fd* floppy*",
> +},{
> +.name = "xhci",
> +.args = "-machine q35 -nodefaults "
> +"-drive file=null-co://,if=none,format=raw,id=disk0 "
> +"-device qemu-xhci,id=xhci -device usb-tablet,bus=xhci.0 "
> +"-device usb-bot -device usb-storage,drive=disk0 "
> +"-chardev null,id=cd0 -chardev null,id=cd1 "
> +"-device usb-braille,chardev=cd0 -device usb-ccid -device usb-ccid "
> +"-device usb-kbd -device usb-mouse -device usb-serial,chardev=cd1 "
> +"-device usb-tablet -device usb-wacom-tablet -device usb-audio",
> +.objects = "*usb* *uhci* *xhci*",
> +},{
> +.name = "pc-i440fx",
> +.args = "-machine pc",
> +.objects = "*",
> +},{
> +.name = "pc-q35",
>

Re: [PATCH v6 15/16] fuzz: register predefined generic-fuzz configs

2020-10-22 Thread Darren Kenny
On Wednesday, 2020-10-21 at 17:09:21 -04, Alexander Bulekov wrote:
> We call get_generic_fuzz_configs, which fills an array with
> predefined {name, args, objects} triples. For each of these, we add a
> new FuzzTarget, that uses a small wrapper to set
> QEMU_FUZZ_{ARGS,OBJECTS} to the corresponding predefined values.
>
> Signed-off-by: Alexander Bulekov 

Reviewed-by: Darren Kenny 

> ---
>  tests/qtest/fuzz/generic_fuzz.c | 32 
>  1 file changed, 32 insertions(+)
>
> diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
> index f739937827..bff98fe3c8 100644
> --- a/tests/qtest/fuzz/generic_fuzz.c
> +++ b/tests/qtest/fuzz/generic_fuzz.c
> @@ -26,6 +26,7 @@
>  #include "hw/qdev-core.h"
>  #include "hw/pci/pci.h"
>  #include "hw/boards.h"
> +#include "generic_fuzz_configs.h"
>  
>  /*
>   * SEPARATOR is used to separate "operations" in the fuzz input
> @@ -901,6 +902,17 @@ static GString *generic_fuzz_cmdline(FuzzTarget *t)
>  return cmd_line;
>  }
>  
> +static GString *generic_fuzz_predefined_config_cmdline(FuzzTarget *t)
> +{
> +const generic_fuzz_config *config;
> +g_assert(t->opaque);
> +
> +config = t->opaque;
> +setenv("QEMU_FUZZ_ARGS", config->args, 1);
> +setenv("QEMU_FUZZ_OBJECTS", config->objects, 1);
> +return generic_fuzz_cmdline(t);
> +}
> +
>  static void register_generic_fuzz_targets(void)
>  {
>  fuzz_add_target(&(FuzzTarget){
> @@ -911,6 +923,26 @@ static void register_generic_fuzz_targets(void)
>  .fuzz = generic_fuzz,
>  .crossover = generic_fuzz_crossover
>  });
> +
> +GString *name;
> +const generic_fuzz_config *config;
> +
> +for (int i = 0;
> + i < sizeof(predefined_configs) / sizeof(generic_fuzz_config);
> + i++) {
> +config = predefined_configs + i;
> +name = g_string_new("generic-fuzz");
> +g_string_append_printf(name, "-%s", config->name);
> +fuzz_add_target(&(FuzzTarget){
> +.name = name->str,
> +.description = "Predefined generic-fuzz config.",
> +.get_init_cmdline = generic_fuzz_predefined_config_cmdline,
> +.pre_fuzz = generic_pre_fuzz,
> +.fuzz = generic_fuzz,
> +.crossover = generic_fuzz_crossover,
> +.opaque = (void *)config
> +});
> +}
>  }
>  
>  fuzz_target_init(register_generic_fuzz_targets);
> -- 
> 2.28.0



Re: [PATCH v4 1/2] qcow2: Report BDRV_BLOCK_ZERO more accurately in bdrv_co_block_status()

2020-10-22 Thread Vladimir Sementsov-Ogievskiy

21.09.2020 17:30, Alberto Garcia wrote:

If a BlockDriverState supports backing files but has none then any
unallocated area reads back as zeroes.

bdrv_co_block_status() is only reporting this is if want_zero is true,
but this is an inexpensive test and there is no reason not to do it in
all cases.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Alberto Garcia 


Reviewed-by: Vladimir Sementsov-Ogievskiy 


---
  block/io.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index a2389bb38c..ef1ea806e8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2391,17 +2391,17 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
  
  if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {

  ret |= BDRV_BLOCK_ALLOCATED;
-} else if (want_zero && bs->drv->supports_backing) {
+} else if (bs->drv->supports_backing) {
  BlockDriverState *cow_bs = bdrv_cow_bs(bs);
  
-if (cow_bs) {

+if (!cow_bs) {
+ret |= BDRV_BLOCK_ZERO;
+} else if (want_zero) {
  int64_t size2 = bdrv_getlength(cow_bs);
  
  if (size2 >= 0 && offset >= size2) {

  ret |= BDRV_BLOCK_ZERO;
  }
-} else {
-ret |= BDRV_BLOCK_ZERO;
  }
  }
  




--
Best regards,
Vladimir



Re: [PATCH v6 16/16] scripts/oss-fuzz: remove the generic-fuzz target

2020-10-22 Thread Darren Kenny
Hi Alex,

On Wednesday, 2020-10-21 at 17:09:22 -04, Alexander Bulekov wrote:
> generic-fuzz is not a standalone fuzzer - it requires some env variables
> to be set. On oss-fuzz, we set these with some predefined
> generic-fuzz-{...} targets, that are thin wrappers around generic-fuzz.
> Remove generic-fuzz from the oss-fuzz build, so oss-fuzz does not treat
> it as a standalone fuzzer.
>
> Signed-off-by: Alexander Bulekov 
> ---
>  scripts/oss-fuzz/build.sh | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh
> index 0c3ca9e06f..37cd7f9e25 100755
> --- a/scripts/oss-fuzz/build.sh
> +++ b/scripts/oss-fuzz/build.sh
> @@ -97,5 +97,11 @@ do

I'm presuming that the target that you're removing is being created by
this line, maybe we should just specifically skip it here instead?

The comment below on the removal probably would still apply though.

>  cp qemu-fuzz-i386 "$DEST_DIR/qemu-fuzz-i386-target-$target"

Also, did you look into using hard-links, or even sym-links - they would
require less duplication of the binaries, which may be important, or may
not, and quicker creation too, e.g.

  ln qemu-fuzz-i386 "$DEST_DIR/qemu-fuzz-i386-target-$target"

It's something that has been done for years, for example if you do:

  ls -il /sbin/{e2fsck,fsck.ext*}

you will see they share the same inode. Similarly for vi and ex, they
use symlinks (hardlinks on some OSes, but seems not Linux):

  ls -il /bin/{vi,ex}

The main point is that argv[0] will be the name of link itself, not the
thing pointed to.

Thanks,

Darren.

>  done
>  
> +# Remove the generic-fuzz target, as it requires some environment variables 
> to
> +# be configured. We have some generic-fuzz-{pc-q35, floppy, ...} targets that
> +# are thin wrappers around this target that set the required environment
> +# variables according to predefined configs.
> +rm "$DEST_DIR/qemu-fuzz-i386-target-generic-fuzz"
> +
>  echo "Done. The fuzzers are located in $DEST_DIR"
>  exit 0
> -- 
> 2.28.0



Re: [PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread Halil Pasic
On Thu, 22 Oct 2020 04:23:12 -0400
Janosch Frank  wrote:

> Diag318 fencing needs to be determined on the current VM PV state and
> not on the state that the VM has when we create the CPU model.
> 
> Signed-off-by: Janosch Frank 
> Reported-by: Marc Hartmayer 
> Reviewed-by: Christian Borntraeger 
> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
> ---
> 
> If you're sure that this is what you want, then I'll send a v2 of the
> patch set.
> 
> ---
>  target/s390x/cpu_features.c | 5 +
>  target/s390x/cpu_features.h | 4 
>  target/s390x/cpu_models.c   | 4 
>  target/s390x/kvm.c  | 3 +--
>  4 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 31ea8df246..42fe0bf4ca 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -14,6 +14,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "cpu_features.h"
> +#include "hw/s390x/pv.h"
>  
>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>  [S390_FEAT_##_FEAT] = {\
> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
> S390FeatType type,
>  }
>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>  }
> +
> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
> +}
>  }

Sorry, I'm a little rusty with cpu models. Does this affect the outcome
of the corresponding QMP commands?

I would guess it does...

Regards,
Halil



Re: [PATCH] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread David Hildenbrand
On 22.10.20 11:54, Halil Pasic wrote:
> On Thu, 22 Oct 2020 04:23:12 -0400
> Janosch Frank  wrote:
> 
>> Diag318 fencing needs to be determined on the current VM PV state and
>> not on the state that the VM has when we create the CPU model.
>>
>> Signed-off-by: Janosch Frank 
>> Reported-by: Marc Hartmayer 
>> Reviewed-by: Christian Borntraeger 
>> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
>> ---
>>
>> If you're sure that this is what you want, then I'll send a v2 of the
>> patch set.
>>
>> ---
>>  target/s390x/cpu_features.c | 5 +
>>  target/s390x/cpu_features.h | 4 
>>  target/s390x/cpu_models.c   | 4 
>>  target/s390x/kvm.c  | 3 +--
>>  4 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
>> index 31ea8df246..42fe0bf4ca 100644
>> --- a/target/s390x/cpu_features.c
>> +++ b/target/s390x/cpu_features.c
>> @@ -14,6 +14,7 @@
>>  #include "qemu/osdep.h"
>>  #include "qemu/module.h"
>>  #include "cpu_features.h"
>> +#include "hw/s390x/pv.h"
>>  
>>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>>  [S390_FEAT_##_FEAT] = {\
>> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap 
>> features, S390FeatType type,
>>  }
>>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>>  }
>> +
>> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
>> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
>> +}
>>  }
> 
> Sorry, I'm a little rusty with cpu models. Does this affect the outcome
> of the corresponding QMP commands?
> 
> I would guess it does...

No, it shouldn't.


-- 
Thanks,

David / dhildenb




Re: [PULL 00/17] Microvm 20201021 patches

2020-10-22 Thread Peter Maydell
On Wed, 21 Oct 2020 at 15:52, Gerd Hoffmann  wrote:
>
> The following changes since commit 4c41341af76cfc85b5a6c0f87de4838672ab9f89:
>
>   Merge remote-tracking branch 'remotes/aperard/tags/pull-xen-20201020' into 
> staging (2020-10-20 11:20:36 +0100)
>
> are available in the Git repository at:
>
>   git://git.kraxel.org/qemu tags/microvm-20201021-pull-request
>
> for you to fetch changes up to 66907f3d3b8bfc2de77b82d89253b7b3a8b728ec:
>
>   tests/acpi: update expected data files (2020-10-21 11:36:19 +0200)
>
> 
> microvm: fix PCIe IRQs in APIC table.
> microvm: add usb support.
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.

-- PMM



[PATCH] target/i386: seg_helper: Correct segement selector nullification in the RET/IRET helper

2020-10-22 Thread Bin Meng
From: Bin Meng 

Per the SDM, when returning to outer privilege level, for segment
registers (ES, FS, GS, and DS) if the check fails, the segment
selector becomes null, but QEMU clears the base/limit/flags as well
as nullifying the segment selector, which should be a spec violation.

Real hardware seems to be compliant with the spec, at least on one
Coffee Lake board I tested.

Signed-off-by: Bin Meng 
---

 target/i386/seg_helper.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index be88938..d8766d8 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -2108,7 +2108,10 @@ static inline void validate_seg(CPUX86State *env, int 
seg_reg, int cpl)
 if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
 /* data or non conforming code segment */
 if (dpl < cpl) {
-cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+cpu_x86_load_seg_cache(env, seg_reg, 0,
+   env->segs[seg_reg].base,
+   env->segs[seg_reg].limit,
+   env->segs[seg_reg].flags);
 }
 }
 }
-- 
2.7.4




Re: [PATCH 0/8] Add support for pvpanic mmio device

2020-10-22 Thread Peter Maydell
On Thu, 22 Oct 2020 at 09:25, Mihai Carabas  wrote:
> The patchset was assembled from chuncks from some old patches from 2018 [1]
> which were left unmerged and some additions from me. Surprisingly their Linux
> kernel counterpart were merged (so the pvpanic driver from the kernel supports
> mmio).
>
> I have seen the discussions about moving the pvpanic to PCI [1]. Those patches
> were sent but nothing happened. Also they are not trivial and require major
> modifications at the driver level also. Given the fact that we already have
> mmio driver support for pvpanic in the Linux kernel, I have sent these patches
> to ask again the maintainers if this can be merged.

I'm afraid the answer is still the same. You need to provide
a convincing argument for why this needs to be an MMIO
device rather than a PCI device. I really don't want to
add MMIO devices to the virt board if I can avoid it,
because they're all extra code and potential extra
security boundary attack surface. PCI devices are guest
probeable and user-pluggable so they're almost always
nicer to use than MMIO.

thanks
-- PMM



[PATCH v2 0/2] s390x: pv: Diag318 fixes

2020-10-22 Thread Janosch Frank
Here are two fixes for the diag318 support that fix crashes when
booting PV guests.

We're working on extending our testing to catch problems like these
earlier.


Branch:
https://gitlab.com/frankja/qemu/-/commits/bb/frankja/diag318_fixes

CI:
https://gitlab.com/frankja/qemu/-/pipelines/206174979


V2:
* Moved fencing nto cpu model functions
* Added rev-by and acks


Janosch Frank (2):
  s390x: pv: Remove sclp boundary checks
  s390x: pv: Fix diag318 PV fencing

 hw/s390x/sclp.c | 5 -
 target/s390x/cpu_features.c | 5 +
 target/s390x/cpu_features.h | 4 
 target/s390x/cpu_models.c   | 4 
 target/s390x/kvm.c  | 3 +--
 5 files changed, 14 insertions(+), 7 deletions(-)

-- 
2.25.1




[PATCH v2 1/2] s390x: pv: Remove sclp boundary checks

2020-10-22 Thread Janosch Frank
The SCLP boundary cross check is done by the Ultravisor for a
protected guest, hence we don't need to do it. As QEMU doesn't get a
valid SCCB address in protected mode this is even problematic and can
lead to QEMU reporting a false boundary cross error.

Signed-off-by: Janosch Frank 
Reported-by: Marc Hartmayer 
Fixes: db13387ca0 ("s390/sclp: rework sclp boundary checks")
Reviewed-by: Christian Borntraeger 
Reviewed-by: Thomas Huth 
Acked-by: Halil Pasic 
Acked-by: David Hildenbrand 
Tested-by: Marc Hartmayer 
---
 hw/s390x/sclp.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 00f1e4648d..0cf2290826 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -285,11 +285,6 @@ int sclp_service_call_protected(CPUS390XState *env, 
uint64_t sccb,
 goto out_write;
 }
 
-if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) {
-work_sccb->h.response_code = 
cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
-goto out_write;
-}
-
 sclp_c->execute(sclp, work_sccb, code);
 out_write:
 s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb,
-- 
2.25.1




[PATCH v2 2/2] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread Janosch Frank
Diag318 fencing needs to be determined on the current VM PV state and
not on the state that the VM has when we create the CPU model.

Signed-off-by: Janosch Frank 
Reported-by: Marc Hartmayer 
Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
---
 target/s390x/cpu_features.c | 5 +
 target/s390x/cpu_features.h | 4 
 target/s390x/cpu_models.c   | 4 
 target/s390x/kvm.c  | 3 +--
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 31ea8df246..42fe0bf4ca 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qemu/module.h"
 #include "cpu_features.h"
+#include "hw/s390x/pv.h"
 
 #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
 [S390_FEAT_##_FEAT] = {\
@@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
S390FeatType type,
 }
 feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
 }
+
+if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
+clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
+}
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index ef52ffce83..87463f064d 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
group);
 
 #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
 
+static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
+{
+array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
+}
 static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
 {
 array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index ca484bfda7..461e0b8f4a 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -29,6 +29,7 @@
 #include "hw/pci/pci.h"
 #endif
 #include "qapi/qapi-commands-machine-target.h"
+#include "hw/s390x/pv.h"
 
 #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
 {\
@@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
 }
 return 0;
 }
+if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
+return false;
+}
 return test_bit(feat, cpu->model->features);
 }
 
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index f13eff688c..baa070fdf7 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
  */
 set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
 
-/* DIAGNOSE 0x318 is not supported under protected virtualization */
-if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) 
{
+if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
 set_bit(S390_FEAT_DIAG_318, model->features);
 }
 
-- 
2.25.1




Re: [PATCH v2 3/8] tests/9pfs: add local Tlcreate test

2020-10-22 Thread Christian Schoenebeck
On Donnerstag, 22. Oktober 2020 10:51:46 CEST Greg Kurz wrote:
> On Wed, 21 Oct 2020 14:25:33 +0200
> 
> Christian Schoenebeck  wrote:
> > This test case uses a Tlcreate 9p request to create a regular file inside
> > host's test directory.
> > 
> > Signed-off-by: Christian Schoenebeck 
> > ---
> 
> Just one remark, see below.
> 
> Reviewed-by: Greg Kurz 
> 
> >  tests/qtest/virtio-9p-test.c | 77 
> >  1 file changed, 77 insertions(+)
> > 
> > diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
> > index abd7e44648..c030bc2a6c 100644
> > --- a/tests/qtest/virtio-9p-test.c
> > +++ b/tests/qtest/virtio-9p-test.c
> > @@ -258,6 +258,7 @@ static const char *rmessage_name(uint8_t id)
> > 
> >  id == P9_RLOPEN ? "RLOPEN" :
> >  id == P9_RWRITE ? "RWRITE" :
> > 
> >  id == P9_RMKDIR ? "RMKDIR" :
> > +id == P9_RLCREATE ? "RLCREATE" :
> >  id == P9_RUNLINKAT ? "RUNLINKAT" :
> >  id == P9_RFLUSH ? "RFLUSH" :
> > 
> >  id == P9_RREADDIR ? "READDIR" :
> > @@ -694,6 +695,44 @@ static void v9fs_rmkdir(P9Req *req, v9fs_qid *qid)
> > 
> >  v9fs_req_free(req);
> >  
> >  }
> > 
> > +/* size[4] Tlcreate tag[2] fid[4] name[s] flags[4] mode[4] gid[4] */
> > +static P9Req *v9fs_tlcreate(QVirtio9P *v9p, uint32_t fid, const char
> > *name, +uint32_t flags, uint32_t mode,
> > uint32_t gid, +uint16_t tag)
> > +{
> > +P9Req *req;
> > +
> > +uint32_t body_size = 4 + 4 + 4 + 4;
> > +uint16_t string_size = v9fs_string_size(name);
> > +
> > +g_assert_cmpint(body_size, <=, UINT32_MAX - string_size);
> > +body_size += string_size;
> > +
> > +req = v9fs_req_init(v9p, body_size, P9_TLCREATE, tag);
> > +v9fs_uint32_write(req, fid);
> > +v9fs_string_write(req, name);
> > +v9fs_uint32_write(req, flags);
> > +v9fs_uint32_write(req, mode);
> > +v9fs_uint32_write(req, gid);
> > +v9fs_req_send(req);
> > +return req;
> > +}
> > +
> > +/* size[4] Rlcreate tag[2] qid[13] iounit[4] */
> > +static void v9fs_rlcreate(P9Req *req, v9fs_qid *qid, uint32_t *iounit)
> > +{
> > +v9fs_req_recv(req, P9_RLCREATE);
> > +if (qid) {
> > +v9fs_memread(req, qid, 13);
> > +} else {
> > +v9fs_memskip(req, 13);
> > +}
> > +if (iounit) {
> > +v9fs_uint32_read(req, iounit);
> > +}
> > +v9fs_req_free(req);
> > +}
> > +
> > 
> >  /* size[4] Tunlinkat tag[2] dirfd[4] name[s] flags[4] */
> >  static P9Req *v9fs_tunlinkat(QVirtio9P *v9p, uint32_t dirfd, const char
> >  *name,>  
> >   uint32_t flags, uint16_t tag)
> > 
> > @@ -1032,6 +1071,24 @@ static void do_mkdir(QVirtio9P *v9p, const char
> > *path, const char *cname)> 
> >  g_free(name);
> >  
> >  }
> > 
> > +/* create a regular file with Tlcreate and return file's fid */
> > +static uint32_t do_lcreate(QVirtio9P *v9p, const char *path,
> > +   const char *cname)
> > +{
> > +char *const name = g_strdup(cname);
> > +uint32_t fid;
> > +P9Req *req;
> > +
> > +fid = do_walk(v9p, path);
> > +
> > +req = v9fs_tlcreate(v9p, fid, name, 0, 0750, 0, 0);
> 
> Maybe it could make sense to make the mode a parameter of
> do_lcreate() in case someone wants to write a test case
> around that ? Same remark applies to do_mkdir() actually.
> 
> No need to change this now anyway.

Yeah, I thought about that for a moment, but decided if that's really needed 
one day, it'll be a trivial change, especially under the assumption that there 
will be probably not a lot of code using this function, even on the long-term.

Thanks for looking at these patches!

Best regards,
Christian Schoenebeck





Re: [PATCH v2 2/2] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread David Hildenbrand
On 22.10.20 12:31, Janosch Frank wrote:
> Diag318 fencing needs to be determined on the current VM PV state and
> not on the state that the VM has when we create the CPU model.
> 
> Signed-off-by: Janosch Frank 
> Reported-by: Marc Hartmayer 
> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")
> ---
>  target/s390x/cpu_features.c | 5 +
>  target/s390x/cpu_features.h | 4 
>  target/s390x/cpu_models.c   | 4 
>  target/s390x/kvm.c  | 3 +--
>  4 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 31ea8df246..42fe0bf4ca 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -14,6 +14,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "cpu_features.h"
> +#include "hw/s390x/pv.h"
>  
>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>  [S390_FEAT_##_FEAT] = {\
> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
> S390FeatType type,
>  }
>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>  }
> +
> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
> +}
>  }
>  
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
> index ef52ffce83..87463f064d 100644
> --- a/target/s390x/cpu_features.h
> +++ b/target/s390x/cpu_features.h
> @@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
> group);
>  
>  #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
>  
> +static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
> +{
> +array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
> +}
>  static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
>  {
>  array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index ca484bfda7..461e0b8f4a 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -29,6 +29,7 @@
>  #include "hw/pci/pci.h"
>  #endif
>  #include "qapi/qapi-commands-machine-target.h"
> +#include "hw/s390x/pv.h"
>  
>  #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
>  {\
> @@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
>  }
>  return 0;
>  }
> +if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
> +return false;
> +}
>  return test_bit(feat, cpu->model->features);
>  }
>  
> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> index f13eff688c..baa070fdf7 100644
> --- a/target/s390x/kvm.c
> +++ b/target/s390x/kvm.c
> @@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
> Error **errp)
>   */
>  set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
>  
> -/* DIAGNOSE 0x318 is not supported under protected virtualization */
> -if (!s390_is_pv() && kvm_check_extension(kvm_state, 
> KVM_CAP_S390_DIAG318)) {
> +if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
>  set_bit(S390_FEAT_DIAG_318, model->features);
>  }
>  
> 

Acked-by: David Hildenbrand 

-- 
Thanks,

David / dhildenb




Re: [PATCH v3 1/2] hw/block/nvme: add dulbe support

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/22/20 4:21 AM, Keith Busch wrote:

On Thu, Oct 22, 2020 at 12:17:35AM +0200, Klaus Jensen wrote:

+for (int i = 1; i <= n->num_namespaces; i++) {


You can call me old-school, but I don't think C should have allowed
mixed declarations with code.


Since commit 7be41675f7c ("configure: Force the C standard to gnu99")
it is acceptable at the maintainer discretion. IOW if you don't want
declarations mixed with code, it is fine to ask contributors to not
do it in your subsystem.

Regards,

Phil.




Re: [PATCH v2 2/2] s390x: pv: Fix diag318 PV fencing

2020-10-22 Thread Christian Borntraeger



On 22.10.20 12:31, Janosch Frank wrote:
> Diag318 fencing needs to be determined on the current VM PV state and
> not on the state that the VM has when we create the CPU model.
> 
> Signed-off-by: Janosch Frank 
> Reported-by: Marc Hartmayer 
> Fixes: fabdada935 ("s390: guest support for diagnose 0x318")

I prefer v1 as it is more obvious what happens, but this looks fine as well
and as David prefers this let us go with this one.

Reviewed-by: Christian Borntraeger 

> ---
>  target/s390x/cpu_features.c | 5 +
>  target/s390x/cpu_features.h | 4 
>  target/s390x/cpu_models.c   | 4 
>  target/s390x/kvm.c  | 3 +--
>  4 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 31ea8df246..42fe0bf4ca 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -14,6 +14,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "cpu_features.h"
> +#include "hw/s390x/pv.h"
>  
>  #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
>  [S390_FEAT_##_FEAT] = {\
> @@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, 
> S390FeatType type,
>  }
>  feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>  }
> +
> +if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) {
> +clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data);
> +}
>  }
>  
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
> index ef52ffce83..87463f064d 100644
> --- a/target/s390x/cpu_features.h
> +++ b/target/s390x/cpu_features.h
> @@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup 
> group);
>  
>  #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
>  
> +static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array)
> +{
> +array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8));
> +}
>  static inline void set_be_bit(unsigned int bit_nr, uint8_t *array)
>  {
>  array[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index ca484bfda7..461e0b8f4a 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -29,6 +29,7 @@
>  #include "hw/pci/pci.h"
>  #endif
>  #include "qapi/qapi-commands-machine-target.h"
> +#include "hw/s390x/pv.h"
>  
>  #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
>  {\
> @@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat)
>  }
>  return 0;
>  }
> +if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) {
> +return false;
> +}
>  return test_bit(feat, cpu->model->features);
>  }
>  
> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> index f13eff688c..baa070fdf7 100644
> --- a/target/s390x/kvm.c
> +++ b/target/s390x/kvm.c
> @@ -2498,8 +2498,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
> Error **errp)
>   */
>  set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features);
>  
> -/* DIAGNOSE 0x318 is not supported under protected virtualization */
> -if (!s390_is_pv() && kvm_check_extension(kvm_state, 
> KVM_CAP_S390_DIAG318)) {
> +if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) {
>  set_bit(S390_FEAT_DIAG_318, model->features);
>  }
>  
> 



Re: [PATCH v1 6/6] tests/acceptance: pick a random gdb port for reverse debugging

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/22/20 7:20 AM, Thomas Huth wrote:

On 21/10/2020 18.31, Alex Bennée wrote:

Currently the test randomly fails if you are using a shared machine
due to contention on the well known port 1234. We can ameliorate this
a bit by picking a random non-ephemeral port although it doesn't
totally avoid the problem. While we could use a totally unique socket
address for debugging it's impossible to probe for gdb support of the
feature which makes this a sub-optimal but less fiddly option.

Signed-off-by: Alex Bennée 
---
  tests/acceptance/reverse_debugging.py | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)


Certainly better than before!


I'd prefer another chardev that tcp, but as you said this is
already an improvement, so:

Reviewed-by: Philippe Mathieu-Daudé 



Reviewed-by: Thomas Huth 






Re: [PATCH v1 4/6] gitlab: skip checkpatch.pl checks if no commit delta on branch

2020-10-22 Thread Philippe Mathieu-Daudé

On 10/21/20 6:31 PM, Alex Bennée wrote:

From: Daniel P. Berrangé 

If the current branch is synced to the current upstream git master,
there are no commits that need checking. This causes checkpatch.pl
to print an error that it found no commits. We need to avoid calling
checkpatch.pl in this case.

Signed-off-by: Daniel P. Berrangé 
Message-Id: <20201019143537.283094-2-berra...@redhat.com>
Signed-off-by: Alex Bennée 
---
  .gitlab-ci.d/check-patch.py | 8 
  1 file changed, 8 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé 




[PATCH] physmem: improve ram size error messages

2020-10-22 Thread Pankaj Gupta
 Ram size mismatch condition logs below message. 

   "Length mismatch: pc.ram: 0x8000 in != 0x18000: Invalid argument"

 This patch improves the readability of error messages. 
 Removed the superflous "in" and changed "Length" to "Size".

Signed-off-by: Pankaj Gupta 
Reported-by: Li Zhang 
---
 softmmu/physmem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index e319fb2a1e..8da184f4a6 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1756,15 +1756,15 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t 
newsize, Error **errp)
 
 if (!(block->flags & RAM_RESIZEABLE)) {
 error_setg_errno(errp, EINVAL,
- "Length mismatch: %s: 0x" RAM_ADDR_FMT
- " in != 0x" RAM_ADDR_FMT, block->idstr,
+ "Size mismatch: %s: 0x" RAM_ADDR_FMT
+ " != 0x" RAM_ADDR_FMT, block->idstr,
  newsize, block->used_length);
 return -EINVAL;
 }
 
 if (block->max_length < newsize) {
 error_setg_errno(errp, EINVAL,
- "Length too large: %s: 0x" RAM_ADDR_FMT
+ "Size too large: %s: 0x" RAM_ADDR_FMT
  " > 0x" RAM_ADDR_FMT, block->idstr,
  newsize, block->max_length);
 return -EINVAL;
-- 
2.20.1




[PULL v2 00/28] Block patches

2020-10-22 Thread Stefan Hajnoczi
The following changes since commit ac793156f650ae2d77834932d72224175ee69086:

  Merge remote-tracking branch 
'remotes/pmaydell/tags/pull-target-arm-20201020-1' into staging (2020-10-20 
21:11:35 +0100)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 32a3fd65e7e3551337fd26bfc0e2f899d70c028c:

  iotests: add commit top->base cases to 274 (2020-10-22 09:55:39 +0100)


Pull request

v2:
 * Fix format string issues on 32-bit hosts [Peter]
 * Fix qemu-nbd.c CONFIG_POSIX ifdef issue [Eric]
 * Fix missing eventfd.h header on macOS [Peter]
 * Drop unreliable vhost-user-blk test (will send a new patch when ready) 
[Peter]

This pull request contains the vhost-user-blk server by Coiby Xu along with my
additions, block/nvme.c alignment and hardware error statistics by Philippe
Mathieu-Daudé, and bdrv_co_block_status_above() fixes by Vladimir
Sementsov-Ogievskiy.



Coiby Xu (6):
  libvhost-user: Allow vu_message_read to be replaced
  libvhost-user: remove watch for kick_fd when de-initialize vu-dev
  util/vhost-user-server: generic vhost user server
  block: move logical block size check function to a common utility
function
  block/export: vhost-user block device backend server
  MAINTAINERS: Add vhost-user block device backend server maintainer

Philippe Mathieu-Daudé (1):
  block/nvme: Add driver statistics for access alignment and hw errors

Stefan Hajnoczi (16):
  util/vhost-user-server: s/fileds/fields/ typo fix
  util/vhost-user-server: drop unnecessary QOM cast
  util/vhost-user-server: drop unnecessary watch deletion
  block/export: consolidate request structs into VuBlockReq
  util/vhost-user-server: drop unused DevicePanicNotifier
  util/vhost-user-server: fix memory leak in vu_message_read()
  util/vhost-user-server: check EOF when reading payload
  util/vhost-user-server: rework vu_client_trip() coroutine lifecycle
  block/export: report flush errors
  block/export: convert vhost-user-blk server to block export API
  util/vhost-user-server: move header to include/
  util/vhost-user-server: use static library in meson.build
  qemu-storage-daemon: avoid compiling blockdev_ss twice
  block: move block exports to libblockdev
  block/export: add iothread and fixed-iothread options
  block/export: add vhost-user-blk multi-queue support

Vladimir Sementsov-Ogievskiy (5):
  block/io: fix bdrv_co_block_status_above
  block/io: bdrv_common_block_status_above: support include_base
  block/io: bdrv_common_block_status_above: support bs == base
  block/io: fix bdrv_is_allocated_above
  iotests: add commit top->base cases to 274

 MAINTAINERS|   9 +
 qapi/block-core.json   |  24 +-
 qapi/block-export.json |  36 +-
 block/coroutines.h |   2 +
 block/export/vhost-user-blk-server.h   |  19 +
 contrib/libvhost-user/libvhost-user.h  |  21 +
 include/qemu/vhost-user-server.h   |  65 +++
 util/block-helpers.h   |  19 +
 block/export/export.c  |  37 +-
 block/export/vhost-user-blk-server.c   | 431 
 block/io.c | 132 +++---
 block/nvme.c   |  27 ++
 block/qcow2.c  |  16 +-
 contrib/libvhost-user/libvhost-user-glib.c |   2 +-
 contrib/libvhost-user/libvhost-user.c  |  15 +-
 hw/core/qdev-properties-system.c   |  31 +-
 nbd/server.c   |   2 -
 qemu-nbd.c |  21 +-
 softmmu/vl.c   |   4 +
 stubs/blk-exp-close-all.c  |   7 +
 tests/vhost-user-bridge.c  |   2 +
 tools/virtiofsd/fuse_virtio.c  |   4 +-
 util/block-helpers.c   |  46 +++
 util/vhost-user-server.c   | 446 +
 block/export/meson.build   |   3 +-
 contrib/libvhost-user/meson.build  |   1 +
 meson.build|  22 +-
 nbd/meson.build|   2 +
 storage-daemon/meson.build |   3 +-
 stubs/meson.build  |   1 +
 tests/qemu-iotests/274 |  20 +
 tests/qemu-iotests/274.out |  68 
 util/meson.build   |   4 +
 33 files changed, 1420 insertions(+), 122 deletions(-)
 create mode 100644 block/export/vhost-user-blk-server.h
 create mode 100644 include/qemu/vhost-user-server.h
 create mode 100644 util/block-helpers.h
 create mode 100644 block/export/vhost-user-blk-server.c
 create mode 100644 stubs/blk-exp-close-all.c
 create mode 100644 util/block-helpers.c
 create mode 100644 util/vhost-user-server.c

-- 
2.26.2



[PULL v2 05/28] block: move logical block size check function to a common utility function

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

Move the constants from hw/core/qdev-properties.c to
util/block-helpers.h so that knowledge of the min/max values is

Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Acked-by: Eduardo Habkost 
Message-id: 20200918080912.321299-5-coiby...@gmail.com
Signed-off-by: Stefan Hajnoczi 
---
 util/block-helpers.h | 19 +
 hw/core/qdev-properties-system.c | 31 -
 util/block-helpers.c | 46 
 util/meson.build |  1 +
 4 files changed, 71 insertions(+), 26 deletions(-)
 create mode 100644 util/block-helpers.h
 create mode 100644 util/block-helpers.c

diff --git a/util/block-helpers.h b/util/block-helpers.h
new file mode 100644
index 00..b53295a529
--- /dev/null
+++ b/util/block-helpers.h
@@ -0,0 +1,19 @@
+#ifndef BLOCK_HELPERS_H
+#define BLOCK_HELPERS_H
+
+#include "qemu/units.h"
+
+/* lower limit is sector size */
+#define MIN_BLOCK_SIZE  INT64_C(512)
+#define MIN_BLOCK_SIZE_STR  "512 B"
+/*
+ * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
+ * matches qcow2 cluster size limit
+ */
+#define MAX_BLOCK_SIZE  (2 * MiB)
+#define MAX_BLOCK_SIZE_STR  "2 MiB"
+
+void check_block_size(const char *id, const char *name, int64_t value,
+  Error **errp);
+
+#endif /* BLOCK_HELPERS_H */
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 49bdd12581..b81a4e8d14 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -30,6 +30,7 @@
 #include "sysemu/blockdev.h"
 #include "net/net.h"
 #include "hw/pci/pci.h"
+#include "util/block-helpers.h"
 
 static bool check_prop_still_unset(DeviceState *dev, const char *name,
const void *old_val, const char *new_val,
@@ -576,16 +577,6 @@ const PropertyInfo qdev_prop_losttickpolicy = {
 
 /* --- blocksize --- */
 
-/* lower limit is sector size */
-#define MIN_BLOCK_SIZE  512
-#define MIN_BLOCK_SIZE_STR  "512 B"
-/*
- * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
- * matches qcow2 cluster size limit
- */
-#define MAX_BLOCK_SIZE  (2 * MiB)
-#define MAX_BLOCK_SIZE_STR  "2 MiB"
-
 static void set_blocksize(Object *obj, Visitor *v, const char *name,
   void *opaque, Error **errp)
 {
@@ -593,6 +584,7 @@ static void set_blocksize(Object *obj, Visitor *v, const 
char *name,
 Property *prop = opaque;
 uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
 uint64_t value;
+Error *local_err = NULL;
 
 if (dev->realized) {
 qdev_prop_set_after_realize(dev, name, errp);
@@ -602,24 +594,11 @@ static void set_blocksize(Object *obj, Visitor *v, const 
char *name,
 if (!visit_type_size(v, name, &value, errp)) {
 return;
 }
-/* value of 0 means "unset" */
-if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
-error_setg(errp,
-   "Property %s.%s doesn't take value %" PRIu64
-   " (minimum: " MIN_BLOCK_SIZE_STR
-   ", maximum: " MAX_BLOCK_SIZE_STR ")",
-   dev->id ? : "", name, value);
+check_block_size(dev->id ? : "", name, value, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
 return;
 }
-
-/* We rely on power-of-2 blocksizes for bitmasks */
-if ((value & (value - 1)) != 0) {
-error_setg(errp,
-  "Property %s.%s doesn't take value '%" PRId64 "', "
-  "it's not a power of 2", dev->id ?: "", name, 
(int64_t)value);
-return;
-}
-
 *ptr = value;
 }
 
diff --git a/util/block-helpers.c b/util/block-helpers.c
new file mode 100644
index 00..c4851432f5
--- /dev/null
+++ b/util/block-helpers.c
@@ -0,0 +1,46 @@
+/*
+ * Block utility functions
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (c) 2020 Coiby Xu 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "block-helpers.h"
+
+/**
+ * check_block_size:
+ * @id: The unique ID of the object
+ * @name: The name of the property being validated
+ * @value: The block size in bytes
+ * @errp: A pointer to an area to store an error
+ *
+ * This function checks that the block size meets the following conditions:
+ * 1. At least MIN_BLOCK_SIZE
+ * 2. No larger than MAX_BLOCK_SIZE
+ * 3. A power of 2
+ */
+void check_block_size(const char *id, const char *name, int64_t value,
+  Error **errp)
+{
+/* value of 0 means "unset" */
+if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
+error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
+   id, name, value

[PULL v2 01/28] block/nvme: Add driver statistics for access alignment and hw errors

2020-10-22 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

Keep statistics of some hardware errors, and number of
aligned/unaligned I/O accesses.

QMP example booting a full RHEL 8.3 aarch64 guest:

{ "execute": "query-blockstats" }
{
"return": [
{
"device": "",
"node-name": "drive0",
"stats": {
"flush_total_time_ns": 6026948,
"wr_highest_offset": 3383991230464,
"wr_total_time_ns": 807450995,
"failed_wr_operations": 0,
"failed_rd_operations": 0,
"wr_merged": 3,
"wr_bytes": 50133504,
"failed_unmap_operations": 0,
"failed_flush_operations": 0,
"account_invalid": false,
"rd_total_time_ns": 1846979900,
"flush_operations": 130,
"wr_operations": 659,
"rd_merged": 1192,
"rd_bytes": 218244096,
"account_failed": false,
"idle_time_ns": 2678641497,
"rd_operations": 7406,
},
"driver-specific": {
"driver": "nvme",
"completion-errors": 0,
"unaligned-accesses": 2959,
"aligned-accesses": 4477
},
"qdev": "/machine/peripheral-anon/device[0]/virtio-backend"
}
]
}

Suggested-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
Acked-by: Markus Armbruster 
Message-id: 20201001162939.1567915-1-phi...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 qapi/block-core.json | 24 +++-
 block/nvme.c | 27 +++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index ee5ebef7f2..e00fc27b5e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -947,6 +947,27 @@
   'discard-nb-failed': 'uint64',
   'discard-bytes-ok': 'uint64' } }
 
+##
+# @BlockStatsSpecificNvme:
+#
+# NVMe driver statistics
+#
+# @completion-errors: The number of completion errors.
+#
+# @aligned-accesses: The number of aligned accesses performed by
+#the driver.
+#
+# @unaligned-accesses: The number of unaligned accesses performed by
+#  the driver.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockStatsSpecificNvme',
+  'data': {
+  'completion-errors': 'uint64',
+  'aligned-accesses': 'uint64',
+  'unaligned-accesses': 'uint64' } }
+
 ##
 # @BlockStatsSpecific:
 #
@@ -959,7 +980,8 @@
   'discriminator': 'driver',
   'data': {
   'file': 'BlockStatsSpecificFile',
-  'host_device': 'BlockStatsSpecificFile' } }
+  'host_device': 'BlockStatsSpecificFile',
+  'nvme': 'BlockStatsSpecificNvme' } }
 
 ##
 # @BlockStats:
diff --git a/block/nvme.c b/block/nvme.c
index b48f6f2588..739a0a700c 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -128,6 +128,12 @@ struct BDRVNVMeState {
 
 /* PCI address (required for nvme_refresh_filename()) */
 char *device;
+
+struct {
+uint64_t completion_errors;
+uint64_t aligned_accesses;
+uint64_t unaligned_accesses;
+} stats;
 };
 
 #define NVME_BLOCK_OPT_DEVICE "device"
@@ -384,6 +390,9 @@ static bool nvme_process_completion(NVMeQueuePair *q)
 break;
 }
 ret = nvme_translate_error(c);
+if (ret) {
+s->stats.completion_errors++;
+}
 q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
 if (!q->cq.head) {
 q->cq_phase = !q->cq_phase;
@@ -1155,8 +1164,10 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 assert(QEMU_IS_ALIGNED(bytes, s->page_size));
 assert(bytes <= s->max_transfer);
 if (nvme_qiov_aligned(bs, qiov)) {
+s->stats.aligned_accesses++;
 return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
 }
+s->stats.unaligned_accesses++;
 trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
 buf = qemu_try_memalign(s->page_size, bytes);
 
@@ -1452,6 +1463,21 @@ static void nvme_unregister_buf(BlockDriverState *bs, 
void *host)
 qemu_vfio_dma_unmap(s->vfio, host);
 }
 
+static BlockStatsSpecific *nvme_get_specific_stats(BlockDriverState *bs)
+{
+BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
+BDRVNVMeState *s = bs->opaque;
+
+stats->driver = BLOCKDEV_DRIVER_NVME;
+stats->u.nvme = (BlockStatsSpecificNvme) {
+.completion_errors = s->stats.completion_errors,
+.aligned_accesses = s->stats.aligned_accesses,
+.unaligned_accesses = s->stats.unaligned_accesses,
+};
+
+return stats;
+}
+
 static const char *const nvme_strong_runtime_opts[] = {
 NVME_BLOCK_OPT_DEVICE,
 NVME_BLOCK_OPT_NAMESPACE,
@@ -1485,6 +1511,7 @@ static BlockDriver bdrv_nvme = {
 .bdrv_refresh_filename= nvme_refresh_filename,
 .bdrv_refresh_limits  = nvme_refresh_limits,
   

[PULL v2 02/28] libvhost-user: Allow vu_message_read to be replaced

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

Allow vu_message_read to be replaced by one which will make use of the
QIOChannel functions. Thus reading vhost-user message won't stall the
guest. For slave channel, we still use the default vu_message_read.

Reviewed-by: Marc-André Lureau 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Message-id: 20200918080912.321299-2-coiby...@gmail.com
Signed-off-by: Stefan Hajnoczi 
---
 contrib/libvhost-user/libvhost-user.h  | 21 +
 contrib/libvhost-user/libvhost-user-glib.c |  2 +-
 contrib/libvhost-user/libvhost-user.c  | 14 +++---
 tests/vhost-user-bridge.c  |  2 ++
 tools/virtiofsd/fuse_virtio.c  |  4 ++--
 5 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.h 
b/contrib/libvhost-user/libvhost-user.h
index 287ac5fec7..3bbeae8587 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -36,6 +36,8 @@
  */
 #define VHOST_USER_MAX_RAM_SLOTS 32
 
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
 typedef enum VhostSetConfigType {
 VHOST_SET_CONFIG_TYPE_MASTER = 0,
 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
@@ -221,6 +223,7 @@ typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
   int *do_reply);
+typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg);
 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
@@ -389,6 +392,23 @@ struct VuDev {
 bool broken;
 uint16_t max_queues;
 
+/* @read_msg: custom method to read vhost-user message
+ *
+ * Read data from vhost_user socket fd and fill up
+ * the passed VhostUserMsg *vmsg struct.
+ *
+ * If reading fails, it should close the received set of file
+ * descriptors as socket message's auxiliary data.
+ *
+ * For the details, please refer to vu_message_read in libvhost-user.c
+ * which will be used by default if not custom method is provided when
+ * calling vu_init
+ *
+ * Returns: true if vhost-user message successfully received,
+ *  otherwise return false.
+ *
+ */
+vu_read_msg_cb read_msg;
 /* @set_watch: add or update the given fd to the watch set,
  * call cb when condition is met */
 vu_set_watch_cb set_watch;
@@ -432,6 +452,7 @@ bool vu_init(VuDev *dev,
  uint16_t max_queues,
  int socket,
  vu_panic_cb panic,
+ vu_read_msg_cb read_msg,
  vu_set_watch_cb set_watch,
  vu_remove_watch_cb remove_watch,
  const VuDevIface *iface);
diff --git a/contrib/libvhost-user/libvhost-user-glib.c 
b/contrib/libvhost-user/libvhost-user-glib.c
index 53f1ca4cdd..0df2ec9271 100644
--- a/contrib/libvhost-user/libvhost-user-glib.c
+++ b/contrib/libvhost-user/libvhost-user-glib.c
@@ -147,7 +147,7 @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
 g_assert(dev);
 g_assert(iface);
 
-if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch,
+if (!vu_init(&dev->parent, max_queues, socket, panic, NULL, set_watch,
  remove_watch, iface)) {
 return false;
 }
diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index 9f1285b8a1..09bdff18f3 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -68,8 +68,6 @@
 /* The version of inflight buffer */
 #define INFLIGHT_VERSION 1
 
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
 /* The version of the protocol we support */
 #define VHOST_USER_VERSION 1
 #define LIBVHOST_USER_DEBUG 0
@@ -268,7 +266,7 @@ have_userfault(void)
 }
 
 static bool
-vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
 {
 char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = 
{};
 struct iovec iov = {
@@ -416,7 +414,7 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg 
*vmsg)
 goto out;
 }
 
-if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
+if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
 goto out;
 }
 
@@ -907,7 +905,7 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg 
*vmsg)
 /* Wait for QEMU to confirm that it's registered the handler for the
  * faults.
  */
-if (!vu_message_read(dev, dev->sock, vmsg) ||
+if (!dev->read_msg(dev, dev->sock, vmsg) ||
 vmsg->size != sizeof(vmsg->payload.u64) ||
 vmsg->payload.u64 != 0) {
 vu_panic(dev, "failed to receive valid ack f

[PULL v2 04/28] util/vhost-user-server: generic vhost user server

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

Sharing QEMU devices via vhost-user protocol.

Only one vhost-user client can connect to the server one time.

Suggested-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Message-id: 20200918080912.321299-4-coiby...@gmail.com
[Fixed size_t %lu -> %zu format string compiler error.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.h |  65 ++
 util/vhost-user-server.c | 428 +++
 util/meson.build |   1 +
 3 files changed, 494 insertions(+)
 create mode 100644 util/vhost-user-server.h
 create mode 100644 util/vhost-user-server.c

diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
new file mode 100644
index 00..5232f96718
--- /dev/null
+++ b/util/vhost-user-server.h
@@ -0,0 +1,65 @@
+/*
+ * Sharing QEMU devices via vhost-user protocol
+ *
+ * Copyright (c) Coiby Xu .
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VHOST_USER_SERVER_H
+#define VHOST_USER_SERVER_H
+
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "io/channel-socket.h"
+#include "io/channel-file.h"
+#include "io/net-listener.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "standard-headers/linux/virtio_blk.h"
+
+typedef struct VuFdWatch {
+VuDev *vu_dev;
+int fd; /*kick fd*/
+void *pvt;
+vu_watch_cb cb;
+bool processing;
+QTAILQ_ENTRY(VuFdWatch) next;
+} VuFdWatch;
+
+typedef struct VuServer VuServer;
+typedef void DevicePanicNotifierFn(VuServer *server);
+
+struct VuServer {
+QIONetListener *listener;
+AioContext *ctx;
+DevicePanicNotifierFn *device_panic_notifier;
+int max_queues;
+const VuDevIface *vu_iface;
+VuDev vu_dev;
+QIOChannel *ioc; /* The I/O channel with the client */
+QIOChannelSocket *sioc; /* The underlying data channel with the client */
+/* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
+QIOChannel *ioc_slave;
+QIOChannelSocket *sioc_slave;
+Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
+QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
+/* restart coroutine co_trip if AIOContext is changed */
+bool aio_context_changed;
+bool processing_msg;
+};
+
+bool vhost_user_server_start(VuServer *server,
+ SocketAddress *unix_socket,
+ AioContext *ctx,
+ uint16_t max_queues,
+ DevicePanicNotifierFn *device_panic_notifier,
+ const VuDevIface *vu_iface,
+ Error **errp);
+
+void vhost_user_server_stop(VuServer *server);
+
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
+
+#endif /* VHOST_USER_SERVER_H */
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
new file mode 100644
index 00..b189944856
--- /dev/null
+++ b/util/vhost-user-server.c
@@ -0,0 +1,428 @@
+/*
+ * Sharing QEMU devices via vhost-user protocol
+ *
+ * Copyright (c) Coiby Xu .
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "vhost-user-server.h"
+
+static void vmsg_close_fds(VhostUserMsg *vmsg)
+{
+int i;
+for (i = 0; i < vmsg->fd_num; i++) {
+close(vmsg->fds[i]);
+}
+}
+
+static void vmsg_unblock_fds(VhostUserMsg *vmsg)
+{
+int i;
+for (i = 0; i < vmsg->fd_num; i++) {
+qemu_set_nonblock(vmsg->fds[i]);
+}
+}
+
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
+  gpointer opaque);
+
+static void close_client(VuServer *server)
+{
+/*
+ * Before closing the client
+ *
+ * 1. Let vu_client_trip stop processing new vhost-user msg
+ *
+ * 2. remove kick_handler
+ *
+ * 3. wait for the kick handler to be finished
+ *
+ * 4. wait for the current vhost-user msg to be finished processing
+ */
+
+QIOChannelSocket *sioc = server->sioc;
+/* When this is set vu_client_trip will stop new processing vhost-user 
message */
+server->sioc = NULL;
+
+VuFdWatch *vu_fd_watch, *next;
+QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
+aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
+   NULL, NULL, NULL);
+}
+
+while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
+QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
+if (!vu_fd_watch->processing) {
+QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
+g_free(vu_fd_watch);
+}
+ 

[PULL v2 03/28] libvhost-user: remove watch for kick_fd when de-initialize vu-dev

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

When the client is running in gdb and quit command is run in gdb,
QEMU will still dispatch the event which will cause segment fault in
the callback function.

Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Message-id: 20200918080912.321299-3-coiby...@gmail.com
Signed-off-by: Stefan Hajnoczi 
---
 contrib/libvhost-user/libvhost-user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index 09bdff18f3..bfec8a881a 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -1918,6 +1918,7 @@ vu_deinit(VuDev *dev)
 }
 
 if (vq->kick_fd != -1) {
+dev->remove_watch(dev, vq->kick_fd);
 close(vq->kick_fd);
 vq->kick_fd = -1;
 }
-- 
2.26.2



[PULL v2 11/28] block/export: consolidate request structs into VuBlockReq

2020-10-22 Thread Stefan Hajnoczi
Only one struct is needed per request. Drop req_data and the separate
VuBlockReq instance. Instead let vu_queue_pop() allocate everything at
once.

This fixes the req_data memory leak in vu_block_virtio_process_req().

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-6-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 block/export/vhost-user-blk-server.c | 68 +---
 1 file changed, 21 insertions(+), 47 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 2ba1613cc9..d227b468d8 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -25,7 +25,7 @@ struct virtio_blk_inhdr {
 };
 
 typedef struct VuBlockReq {
-VuVirtqElement *elem;
+VuVirtqElement elem;
 int64_t sector_num;
 size_t size;
 struct virtio_blk_inhdr *in;
@@ -39,14 +39,10 @@ static void vu_block_req_complete(VuBlockReq *req)
 VuDev *vu_dev = &req->server->vu_dev;
 
 /* IO size with 1 extra status byte */
-vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
+vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
 vu_queue_notify(vu_dev, req->vq);
 
-if (req->elem) {
-free(req->elem);
-}
-
-g_free(req);
+free(req);
 }
 
 static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
@@ -89,20 +85,12 @@ static void coroutine_fn vu_block_flush(VuBlockReq *req)
 blk_co_flush(backend);
 }
 
-struct req_data {
-VuServer *server;
-VuVirtq *vq;
-VuVirtqElement *elem;
-};
-
 static void coroutine_fn vu_block_virtio_process_req(void *opaque)
 {
-struct req_data *data = opaque;
-VuServer *server = data->server;
-VuVirtq *vq = data->vq;
-VuVirtqElement *elem = data->elem;
+VuBlockReq *req = opaque;
+VuServer *server = req->server;
+VuVirtqElement *elem = &req->elem;
 uint32_t type;
-VuBlockReq *req;
 
 VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
 BlockBackend *backend = vdev_blk->backend;
@@ -111,18 +99,13 @@ static void coroutine_fn vu_block_virtio_process_req(void 
*opaque)
 struct iovec *out_iov = elem->out_sg;
 unsigned in_num = elem->in_num;
 unsigned out_num = elem->out_num;
+
 /* refer to hw/block/virtio_blk.c */
 if (elem->out_num < 1 || elem->in_num < 1) {
 error_report("virtio-blk request missing headers");
-free(elem);
-return;
+goto err;
 }
 
-req = g_new0(VuBlockReq, 1);
-req->server = server;
-req->vq = vq;
-req->elem = elem;
-
 if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
 sizeof(req->out)) != sizeof(req->out))) {
 error_report("virtio-blk request outhdr too short");
@@ -202,36 +185,27 @@ static void coroutine_fn vu_block_virtio_process_req(void 
*opaque)
 
 err:
 free(elem);
-g_free(req);
-return;
 }
 
 static void vu_block_process_vq(VuDev *vu_dev, int idx)
 {
-VuServer *server;
-VuVirtq *vq;
-struct req_data *req_data;
+VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+VuVirtq *vq = vu_get_queue(vu_dev, idx);
 
-server = container_of(vu_dev, VuServer, vu_dev);
-assert(server);
-
-vq = vu_get_queue(vu_dev, idx);
-assert(vq);
-VuVirtqElement *elem;
 while (1) {
-elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
-sizeof(VuBlockReq));
-if (elem) {
-req_data = g_new0(struct req_data, 1);
-req_data->server = server;
-req_data->vq = vq;
-req_data->elem = elem;
-Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
-  req_data);
-aio_co_enter(server->ioc->ctx, co);
-} else {
+VuBlockReq *req;
+
+req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
+if (!req) {
 break;
 }
+
+req->server = server;
+req->vq = vq;
+
+Coroutine *co =
+qemu_coroutine_create(vu_block_virtio_process_req, req);
+qemu_coroutine_enter(co);
 }
 }
 
-- 
2.26.2



[PULL v2 10/28] util/vhost-user-server: drop unnecessary watch deletion

2020-10-22 Thread Stefan Hajnoczi
Explicitly deleting watches is not necessary since libvhost-user calls
remove_watch() during vu_deinit(). Add an assertion to check this
though.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-5-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 443ab7448c..6efe2279fd 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -48,21 +48,6 @@ static void close_client(VuServer *server)
 /* When this is set vu_client_trip will stop new processing vhost-user 
message */
 server->sioc = NULL;
 
-VuFdWatch *vu_fd_watch, *next;
-QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
-aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
-   NULL, NULL, NULL);
-}
-
-while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
-QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
-if (!vu_fd_watch->processing) {
-QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
-g_free(vu_fd_watch);
-}
-}
-}
-
 while (server->processing_msg) {
 if (server->ioc->read_coroutine) {
 server->ioc->read_coroutine = NULL;
@@ -73,6 +58,10 @@ static void close_client(VuServer *server)
 }
 
 vu_deinit(&server->vu_dev);
+
+/* vu_deinit() should have called remove_watch() */
+assert(QTAILQ_EMPTY(&server->vu_fd_watches));
+
 object_unref(OBJECT(sioc));
 object_unref(OBJECT(server->ioc));
 }
-- 
2.26.2



[PULL v2 06/28] block/export: vhost-user block device backend server

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

By making use of libvhost-user, block device drive can be shared to
the connected vhost-user client. Only one client can connect to the
server one time.

Since vhost-user-server needs a block drive to be created first, delay
the creation of this object.

Suggested-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Message-id: 20200918080912.321299-6-coiby...@gmail.com
[Shorten "vhost_user_blk_server" string to "vhost_user_blk" to avoid the
following compiler warning:
../block/export/vhost-user-blk-server.c:178:50: error: ‘%s’ directive output 
truncated writing 21 bytes into a region of size 20 [-Werror=format-truncation=]
and fix "Invalid size %ld ..." ssize_t format string arguments for
32-bit hosts.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 block/export/vhost-user-blk-server.h |  36 ++
 block/export/vhost-user-blk-server.c | 661 +++
 softmmu/vl.c |   4 +
 block/meson.build|   1 +
 4 files changed, 702 insertions(+)
 create mode 100644 block/export/vhost-user-blk-server.h
 create mode 100644 block/export/vhost-user-blk-server.c

diff --git a/block/export/vhost-user-blk-server.h 
b/block/export/vhost-user-blk-server.h
new file mode 100644
index 00..f06f37c4c8
--- /dev/null
+++ b/block/export/vhost-user-blk-server.h
@@ -0,0 +1,36 @@
+/*
+ * Sharing QEMU block devices via vhost-user protocal
+ *
+ * Copyright (c) Coiby Xu .
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VHOST_USER_BLK_SERVER_H
+#define VHOST_USER_BLK_SERVER_H
+#include "util/vhost-user-server.h"
+
+typedef struct VuBlockDev VuBlockDev;
+#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
+#define VHOST_USER_BLK_SERVER(obj) \
+   OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
+
+/* vhost user block device */
+struct VuBlockDev {
+Object parent_obj;
+char *node_name;
+SocketAddress *addr;
+AioContext *ctx;
+VuServer vu_server;
+bool running;
+uint32_t blk_size;
+BlockBackend *backend;
+QIOChannelSocket *sioc;
+QTAILQ_ENTRY(VuBlockDev) next;
+struct virtio_blk_config blkcfg;
+bool writable;
+};
+
+#endif /* VHOST_USER_BLK_SERVER_H */
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
new file mode 100644
index 00..2ba1613cc9
--- /dev/null
+++ b/block/export/vhost-user-blk-server.c
@@ -0,0 +1,661 @@
+/*
+ * Sharing QEMU block devices via vhost-user protocal
+ *
+ * Parts of the code based on nbd/server.c.
+ *
+ * Copyright (c) Coiby Xu .
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "vhost-user-blk-server.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/block-backend.h"
+#include "util/block-helpers.h"
+
+enum {
+VHOST_USER_BLK_MAX_QUEUES = 1,
+};
+struct virtio_blk_inhdr {
+unsigned char status;
+};
+
+typedef struct VuBlockReq {
+VuVirtqElement *elem;
+int64_t sector_num;
+size_t size;
+struct virtio_blk_inhdr *in;
+struct virtio_blk_outhdr out;
+VuServer *server;
+struct VuVirtq *vq;
+} VuBlockReq;
+
+static void vu_block_req_complete(VuBlockReq *req)
+{
+VuDev *vu_dev = &req->server->vu_dev;
+
+/* IO size with 1 extra status byte */
+vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
+vu_queue_notify(vu_dev, req->vq);
+
+if (req->elem) {
+free(req->elem);
+}
+
+g_free(req);
+}
+
+static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
+{
+return container_of(server, VuBlockDev, vu_server);
+}
+
+static int coroutine_fn
+vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
+  uint32_t iovcnt, uint32_t type)
+{
+struct virtio_blk_discard_write_zeroes desc;
+ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
+if (unlikely(size != sizeof(desc))) {
+error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
+return -EINVAL;
+}
+
+VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
+uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
+  le32_to_cpu(desc.num_sectors) << 9 };
+if (type == VIRTIO_BLK_T_DISCARD) {
+if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
+return 0;
+}
+} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+if (blk_co_pwrite_zeroes(vdev_blk->backend,
+ range[0], range[1], 0) == 0) {
+return 0;
+}
+}
+
+return -EINVAL;
+}
+
+static 

[PULL v2 15/28] util/vhost-user-server: rework vu_client_trip() coroutine lifecycle

2020-10-22 Thread Stefan Hajnoczi
The vu_client_trip() coroutine is leaked during AioContext switching. It
is also unsafe to destroy the vu_dev in panic_cb() since its callers
still access it in some cases.

Rework the lifecycle to solve these safety issues.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-10-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.h |  29 ++--
 block/export/vhost-user-blk-server.c |   9 +-
 util/vhost-user-server.c | 245 +++
 3 files changed, 155 insertions(+), 128 deletions(-)

diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
index 92177fc911..0da4c2cc4c 100644
--- a/util/vhost-user-server.h
+++ b/util/vhost-user-server.h
@@ -19,34 +19,36 @@
 #include "qapi/error.h"
 #include "standard-headers/linux/virtio_blk.h"
 
+/* A kick fd that we monitor on behalf of libvhost-user */
 typedef struct VuFdWatch {
 VuDev *vu_dev;
 int fd; /*kick fd*/
 void *pvt;
 vu_watch_cb cb;
-bool processing;
 QTAILQ_ENTRY(VuFdWatch) next;
 } VuFdWatch;
 
-typedef struct VuServer VuServer;
-
-struct VuServer {
+/**
+ * VuServer:
+ * A vhost-user server instance with user-defined VuDevIface callbacks.
+ * Vhost-user device backends can be implemented using VuServer. VuDevIface
+ * callbacks and virtqueue kicks run in the given AioContext.
+ */
+typedef struct {
 QIONetListener *listener;
+QEMUBH *restart_listener_bh;
 AioContext *ctx;
 int max_queues;
 const VuDevIface *vu_iface;
+
+/* Protected by ctx lock */
 VuDev vu_dev;
 QIOChannel *ioc; /* The I/O channel with the client */
 QIOChannelSocket *sioc; /* The underlying data channel with the client */
-/* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
-QIOChannel *ioc_slave;
-QIOChannelSocket *sioc_slave;
-Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
 QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
-/* restart coroutine co_trip if AIOContext is changed */
-bool aio_context_changed;
-bool processing_msg;
-};
+
+Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
+} VuServer;
 
 bool vhost_user_server_start(VuServer *server,
  SocketAddress *unix_socket,
@@ -57,6 +59,7 @@ bool vhost_user_server_start(VuServer *server,
 
 void vhost_user_server_stop(VuServer *server);
 
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
+void vhost_user_server_detach_aio_context(VuServer *server);
 
 #endif /* VHOST_USER_SERVER_H */
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index c8fa4ecba9..4d35232bf3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -313,18 +313,13 @@ static const VuDevIface vu_block_iface = {
 static void blk_aio_attached(AioContext *ctx, void *opaque)
 {
 VuBlockDev *vub_dev = opaque;
-aio_context_acquire(ctx);
-vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
-aio_context_release(ctx);
+vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
 }
 
 static void blk_aio_detach(void *opaque)
 {
 VuBlockDev *vub_dev = opaque;
-AioContext *ctx = vub_dev->vu_server.ctx;
-aio_context_acquire(ctx);
-vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
-aio_context_release(ctx);
+vhost_user_server_detach_aio_context(&vub_dev->vu_server);
 }
 
 static void
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 981908fef0..c448800e58 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -9,8 +9,50 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
+#include "block/aio-wait.h"
 #include "vhost-user-server.h"
 
+/*
+ * Theory of operation:
+ *
+ * VuServer is started and stopped by vhost_user_server_start() and
+ * vhost_user_server_stop() from the main loop thread. Starting the server
+ * opens a vhost-user UNIX domain socket and listens for incoming connections.
+ * Only one connection is allowed at a time.
+ *
+ * The connection is handled by the vu_client_trip() coroutine in the
+ * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
+ * where libvhost-user calls vu_message_read() to receive the next vhost-user
+ * protocol messages over the UNIX domain socket.
+ *
+ * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
+ * fds. These fds are also handled in the VuServer->ctx AioContext.
+ *
+ * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
+ * the socket connection. Shutting down the socket connection causes
+ * vu_message_read() to fail since no more data can be received from the 
socket.
+ * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
+ * libvhost-user before terminating the coroutine. vu_deinit() calls
+ * remove

[PULL v2 07/28] MAINTAINERS: Add vhost-user block device backend server maintainer

2020-10-22 Thread Stefan Hajnoczi
From: Coiby Xu 

Suggested-by: Stefano Garzarella 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Message-id: 20200918080912.321299-8-coiby...@gmail.com
[Removed reference to vhost-user-blk-test.c, it will be sent in a
separate pull request.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a7f0acf866..39fd488bd0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3062,6 +3062,13 @@ L: qemu-bl...@nongnu.org
 S: Supported
 F: tests/image-fuzzer/
 
+Vhost-user block device backend server
+M: Coiby Xu 
+S: Maintained
+F: block/export/vhost-user-blk-server.c
+F: util/vhost-user-server.c
+F: tests/qtest/libqos/vhost-user-blk.c
+
 Replication
 M: Wen Congyang 
 M: Xie Changlong 
-- 
2.26.2



[PULL v2 13/28] util/vhost-user-server: fix memory leak in vu_message_read()

2020-10-22 Thread Stefan Hajnoczi
fds[] is leaked when qio_channel_readv_full() fails.

Use vmsg->fds[] instead of keeping a local fds[] array. Then we can
reuse goto fail to clean up fds. vmsg->fd_num must be zeroed before the
loop to make this safe.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-8-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.c | 50 ++--
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 73a1667b54..a7b7a9897f 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -100,21 +100,11 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg 
*vmsg)
 };
 int rc, read_bytes = 0;
 Error *local_err = NULL;
-/*
- * Store fds/nfds returned from qio_channel_readv_full into
- * temporary variables.
- *
- * VhostUserMsg is a packed structure, gcc will complain about passing
- * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
- * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
- * thus two temporary variables nfds and fds are used here.
- */
-size_t nfds = 0, nfds_t = 0;
 const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
-int *fds_t = NULL;
 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
 QIOChannel *ioc = server->ioc;
 
+vmsg->fd_num = 0;
 if (!ioc) {
 error_report_err(local_err);
 goto fail;
@@ -122,41 +112,47 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg 
*vmsg)
 
 assert(qemu_in_coroutine());
 do {
+size_t nfds = 0;
+int *fds = NULL;
+
 /*
  * qio_channel_readv_full may have short reads, keeping calling it
  * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
  */
-rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
+rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
 if (rc < 0) {
 if (rc == QIO_CHANNEL_ERR_BLOCK) {
+assert(local_err == NULL);
 qio_channel_yield(ioc, G_IO_IN);
 continue;
 } else {
 error_report_err(local_err);
-return false;
+goto fail;
 }
 }
-read_bytes += rc;
-if (nfds_t > 0) {
-if (nfds + nfds_t > max_fds) {
+
+if (nfds > 0) {
+if (vmsg->fd_num + nfds > max_fds) {
 error_report("A maximum of %zu fds are allowed, "
  "however got %zu fds now",
- max_fds, nfds + nfds_t);
+ max_fds, vmsg->fd_num + nfds);
+g_free(fds);
 goto fail;
 }
-memcpy(vmsg->fds + nfds, fds_t,
-   nfds_t *sizeof(vmsg->fds[0]));
-nfds += nfds_t;
-g_free(fds_t);
+memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
+vmsg->fd_num += nfds;
+g_free(fds);
 }
-if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
-break;
+
+if (rc == 0) { /* socket closed */
+goto fail;
 }
-iov.iov_base = (char *)vmsg + read_bytes;
-iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
-} while (true);
 
-vmsg->fd_num = nfds;
+iov.iov_base += rc;
+iov.iov_len -= rc;
+read_bytes += rc;
+} while (read_bytes != VHOST_USER_HDR_SIZE);
+
 /* qio_channel_readv_full will make socket fds blocking, unblock them */
 vmsg_unblock_fds(vmsg);
 if (vmsg->size > sizeof(vmsg->payload)) {
-- 
2.26.2



[PULL v2 08/28] util/vhost-user-server: s/fileds/fields/ typo fix

2020-10-22 Thread Stefan Hajnoczi
Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-3-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index b189944856..9bd33e0fdb 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -407,7 +407,7 @@ bool vhost_user_server_start(VuServer *server,
 return false;
 }
 
-/* zero out unspecified fileds */
+/* zero out unspecified fields */
 *server = (VuServer) {
 .listener  = listener,
 .vu_iface  = vu_iface,
-- 
2.26.2



[PULL v2 24/28] block/io: fix bdrv_co_block_status_above

2020-10-22 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

bdrv_co_block_status_above has several design problems with handling
short backing files:

1. With want_zeros=true, it may return ret with BDRV_BLOCK_ZERO but
without BDRV_BLOCK_ALLOCATED flag, when actually short backing file
which produces these after-EOF zeros is inside requested backing
sequence.

2. With want_zero=false, it may return pnum=0 prior to actual EOF,
because of EOF of short backing file.

Fix these things, making logic about short backing files clearer.

With fixed bdrv_block_status_above we also have to improve is_zero in
qcow2 code, otherwise iotest 154 will fail, because with this patch we
stop to merge zeros of different types (produced by fully unallocated
in the whole backing chain regions vs produced by short backing files).

Note also, that this patch leaves for another day the general problem
around block-status: misuse of BDRV_BLOCK_ALLOCATED as is-fs-allocated
vs go-to-backing.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Alberto Garcia 
Reviewed-by: Eric Blake 
Message-id: 20200924194003.22080-2-vsement...@virtuozzo.com
[Fix s/comes/come/ as suggested by Eric Blake
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 block/io.c| 68 ---
 block/qcow2.c | 16 ++--
 2 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/block/io.c b/block/io.c
index 54f0968aee..a718d50ca2 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2350,34 +2350,74 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
   int64_t *map,
   BlockDriverState **file)
 {
+int ret;
 BlockDriverState *p;
-int ret = 0;
-bool first = true;
+int64_t eof = 0;
 
 assert(bs != base);
-for (p = bs; p != base; p = bdrv_filter_or_cow_bs(p)) {
+
+ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
+return ret;
+}
+
+if (ret & BDRV_BLOCK_EOF) {
+eof = offset + *pnum;
+}
+
+assert(*pnum <= bytes);
+bytes = *pnum;
+
+for (p = bdrv_filter_or_cow_bs(bs); p != base;
+ p = bdrv_filter_or_cow_bs(p))
+{
 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
file);
 if (ret < 0) {
-break;
+return ret;
 }
-if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
+if (*pnum == 0) {
 /*
- * Reading beyond the end of the file continues to read
- * zeroes, but we can only widen the result to the
- * unallocated length we learned from an earlier
- * iteration.
+ * The top layer deferred to this layer, and because this layer is
+ * short, any zeroes that we synthesize beyond EOF behave as if 
they
+ * were allocated at this layer.
+ *
+ * We don't include BDRV_BLOCK_EOF into ret, as upper layer may be
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
+ * below.
  */
+assert(ret & BDRV_BLOCK_EOF);
 *pnum = bytes;
+if (file) {
+*file = p;
+}
+ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
+break;
 }
-if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
+if (ret & BDRV_BLOCK_ALLOCATED) {
+/*
+ * We've found the node and the status, we must break.
+ *
+ * Drop BDRV_BLOCK_EOF, as it's not for upper layer, which may be
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
+ * below.
+ */
+ret &= ~BDRV_BLOCK_EOF;
 break;
 }
-/* [offset, pnum] unallocated on this layer, which could be only
- * the first part of [offset, bytes].  */
-bytes = MIN(bytes, *pnum);
-first = false;
+
+/*
+ * OK, [offset, offset + *pnum) region is unallocated on this layer,
+ * let's continue the diving.
+ */
+assert(*pnum <= bytes);
+bytes = *pnum;
+}
+
+if (offset + *pnum == eof) {
+ret |= BDRV_BLOCK_EOF;
 }
+
 return ret;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index b05512718c..b6cb4db8bb 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3860,8 +3860,20 @@ static bool is_zero(BlockDriverState *bs, int64_t 
offset, int64_t bytes)
 if (!bytes) {
 return true;
 }
-res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
-return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
+
+/*
+ * bdrv_block_status_above doesn't merge different types of zeros, for
+ * example, zeros which come from the region which is unallocated in
+ * the whole backing c

[PULL v2 09/28] util/vhost-user-server: drop unnecessary QOM cast

2020-10-22 Thread Stefan Hajnoczi
We already have access to the value with the correct type (ioc and sioc
are the same QIOChannel).

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-4-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 9bd33e0fdb..443ab7448c 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -337,7 +337,7 @@ static void vu_accept(QIONetListener *listener, 
QIOChannelSocket *sioc,
 server->ioc = QIO_CHANNEL(sioc);
 object_ref(OBJECT(server->ioc));
 qio_channel_attach_aio_context(server->ioc, server->ctx);
-qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
+qio_channel_set_blocking(server->ioc, false, NULL);
 vu_client_start(server);
 }
 
-- 
2.26.2



[PULL v2 16/28] block/export: report flush errors

2020-10-22 Thread Stefan Hajnoczi
Propagate the flush return value since errors are possible.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-11-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 block/export/vhost-user-blk-server.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 4d35232bf3..faefcfcaea 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -78,11 +78,11 @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec 
*iov,
 return -EINVAL;
 }
 
-static void coroutine_fn vu_block_flush(VuBlockReq *req)
+static int coroutine_fn vu_block_flush(VuBlockReq *req)
 {
 VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
 BlockBackend *backend = vdev_blk->backend;
-blk_co_flush(backend);
+return blk_co_flush(backend);
 }
 
 static void coroutine_fn vu_block_virtio_process_req(void *opaque)
@@ -152,8 +152,11 @@ static void coroutine_fn vu_block_virtio_process_req(void 
*opaque)
 break;
 }
 case VIRTIO_BLK_T_FLUSH:
-vu_block_flush(req);
-req->in->status = VIRTIO_BLK_S_OK;
+if (vu_block_flush(req) == 0) {
+req->in->status = VIRTIO_BLK_S_OK;
+} else {
+req->in->status = VIRTIO_BLK_S_IOERR;
+}
 break;
 case VIRTIO_BLK_T_GET_ID: {
 size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
-- 
2.26.2



[PULL v2 27/28] block/io: fix bdrv_is_allocated_above

2020-10-22 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

bdrv_is_allocated_above wrongly handles short backing files: it reports
after-EOF space as UNALLOCATED which is wrong, as on read the data is
generated on the level of short backing file (if all overlays have
unallocated areas at that place).

Reusing bdrv_common_block_status_above fixes the issue and unifies code
path.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Message-id: 20200924194003.22080-5-vsement...@virtuozzo.com
[Fix s/has/have/ as suggested by Eric Blake. Fix s/area/areas/.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 block/io.c | 43 +--
 1 file changed, 5 insertions(+), 38 deletions(-)

diff --git a/block/io.c b/block/io.c
index b616bc4ada..02528b3823 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2477,52 +2477,19 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState 
*bs, int64_t offset,
  * at 'offset + *pnum' may return the same allocation status (in other
  * words, the result is not necessarily the maximum possible range);
  * but 'pnum' will only be 0 when end of file is reached.
- *
  */
 int bdrv_is_allocated_above(BlockDriverState *top,
 BlockDriverState *base,
 bool include_base, int64_t offset,
 int64_t bytes, int64_t *pnum)
 {
-BlockDriverState *intermediate;
-int ret;
-int64_t n = bytes;
-
-assert(base || !include_base);
-
-intermediate = top;
-while (include_base || intermediate != base) {
-int64_t pnum_inter;
-int64_t size_inter;
-
-assert(intermediate);
-ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
-if (ret < 0) {
-return ret;
-}
-if (ret) {
-*pnum = pnum_inter;
-return 1;
-}
-
-size_inter = bdrv_getlength(intermediate);
-if (size_inter < 0) {
-return size_inter;
-}
-if (n > pnum_inter &&
-(intermediate == top || offset + pnum_inter < size_inter)) {
-n = pnum_inter;
-}
-
-if (intermediate == base) {
-break;
-}
-
-intermediate = bdrv_filter_or_cow_bs(intermediate);
+int ret = bdrv_common_block_status_above(top, base, include_base, false,
+ offset, bytes, pnum, NULL, NULL);
+if (ret < 0) {
+return ret;
 }
 
-*pnum = n;
-return 0;
+return !!(ret & BDRV_BLOCK_ALLOCATED);
 }
 
 int coroutine_fn
-- 
2.26.2



Re: [PULL 00/13] SD/MMC patches for 2020-10-21

2020-10-22 Thread Peter Maydell
On Wed, 21 Oct 2020 at 18:37, Philippe Mathieu-Daudé  wrote:
>
> The following changes since commit ac793156f650ae2d77834932d72224175ee69086:
>
>   Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-2020102=
> 0-1' into staging (2020-10-20 21:11:35 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/philmd/qemu.git tags/sd-next-20201021
>
> for you to fetch changes up to 84816fb63e5c57159b469a66052d1b2bc862ef77:
>
>   hw/sd/sdcard: Assert if accessing an illegal group (2020-10-21 13:34:27 +02=
> 00)
>
> 
> SD/MMC patches
>
> Fix two heap-overflow reported by Alexander Bulekov while fuzzing:
> - https://bugs.launchpad.net/qemu/+bug/1892960
> - https://bugs.launchpad.net/qemu/+bug/1895310
>
> CI jobs results:
> . https://cirrus-ci.com/build/6399328187056128
> . https://gitlab.com/philmd/qemu/-/pipelines/205701966
> . https://travis-ci.org/github/philmd/qemu/builds/737708930
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.

-- PMM



[PULL v2 18/28] util/vhost-user-server: move header to include/

2020-10-22 Thread Stefan Hajnoczi
Headers used by other subsystems are located in include/. Also add the
vhost-user-server and vhost-user-blk-server headers to MAINTAINERS.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-13-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS| 4 +++-
 {util => include/qemu}/vhost-user-server.h | 0
 block/export/vhost-user-blk-server.c   | 2 +-
 util/vhost-user-server.c   | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)
 rename {util => include/qemu}/vhost-user-server.h (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 39fd488bd0..461dae4989 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3066,8 +3066,10 @@ Vhost-user block device backend server
 M: Coiby Xu 
 S: Maintained
 F: block/export/vhost-user-blk-server.c
-F: util/vhost-user-server.c
+F: block/export/vhost-user-blk-server.h
+F: include/qemu/vhost-user-server.h
 F: tests/qtest/libqos/vhost-user-blk.c
+F: util/vhost-user-server.c
 
 Replication
 M: Wen Congyang 
diff --git a/util/vhost-user-server.h b/include/qemu/vhost-user-server.h
similarity index 100%
rename from util/vhost-user-server.h
rename to include/qemu/vhost-user-server.h
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 3e5bd6caee..f7021cbd7b 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -13,7 +13,7 @@
 #include "block/block.h"
 #include "contrib/libvhost-user/libvhost-user.h"
 #include "standard-headers/linux/virtio_blk.h"
-#include "util/vhost-user-server.h"
+#include "qemu/vhost-user-server.h"
 #include "vhost-user-blk-server.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 516999b38a..783d847a6d 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -9,8 +9,8 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
+#include "qemu/vhost-user-server.h"
 #include "block/aio-wait.h"
-#include "vhost-user-server.h"
 
 /*
  * Theory of operation:
-- 
2.26.2



[PULL v2 12/28] util/vhost-user-server: drop unused DevicePanicNotifier

2020-10-22 Thread Stefan Hajnoczi
The device panic notifier callback is not used. Drop it.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-7-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.h | 3 ---
 block/export/vhost-user-blk-server.c | 3 +--
 util/vhost-user-server.c | 6 --
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
index 5232f96718..92177fc911 100644
--- a/util/vhost-user-server.h
+++ b/util/vhost-user-server.h
@@ -29,12 +29,10 @@ typedef struct VuFdWatch {
 } VuFdWatch;
 
 typedef struct VuServer VuServer;
-typedef void DevicePanicNotifierFn(VuServer *server);
 
 struct VuServer {
 QIONetListener *listener;
 AioContext *ctx;
-DevicePanicNotifierFn *device_panic_notifier;
 int max_queues;
 const VuDevIface *vu_iface;
 VuDev vu_dev;
@@ -54,7 +52,6 @@ bool vhost_user_server_start(VuServer *server,
  SocketAddress *unix_socket,
  AioContext *ctx,
  uint16_t max_queues,
- DevicePanicNotifierFn *device_panic_notifier,
  const VuDevIface *vu_iface,
  Error **errp);
 
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index d227b468d8..c8fa4ecba9 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -439,8 +439,7 @@ static void vhost_user_blk_server_start(VuBlockDev 
*vu_block_device,
 ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
 
 if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
- VHOST_USER_BLK_MAX_QUEUES,
- NULL, &vu_block_iface,
+ VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
  errp)) {
 goto error;
 }
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 6efe2279fd..73a1667b54 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -81,10 +81,6 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
 close_client(server);
 }
 
-if (server->device_panic_notifier) {
-server->device_panic_notifier(server);
-}
-
 /*
  * Set the callback function for network listener so another
  * vhost-user client can connect to this server
@@ -385,7 +381,6 @@ bool vhost_user_server_start(VuServer *server,
  SocketAddress *socket_addr,
  AioContext *ctx,
  uint16_t max_queues,
- DevicePanicNotifierFn *device_panic_notifier,
  const VuDevIface *vu_iface,
  Error **errp)
 {
@@ -402,7 +397,6 @@ bool vhost_user_server_start(VuServer *server,
 .vu_iface  = vu_iface,
 .max_queues= max_queues,
 .ctx   = ctx,
-.device_panic_notifier = device_panic_notifier,
 };
 
 qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
-- 
2.26.2



[PULL v2 19/28] util/vhost-user-server: use static library in meson.build

2020-10-22 Thread Stefan Hajnoczi
Don't compile contrib/libvhost-user/libvhost-user.c again. Instead build
the static library once and then reuse it throughout QEMU.

Also switch from CONFIG_LINUX to CONFIG_VHOST_USER, which is what the
vhost-user tools (vhost-user-gpu, etc) do.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-14-stefa...@redhat.com
[Added CONFIG_LINUX again because libvhost-user doesn't build on macOS.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 block/export/export.c | 8 
 block/export/meson.build  | 2 +-
 contrib/libvhost-user/meson.build | 1 +
 meson.build   | 6 +-
 util/meson.build  | 4 +++-
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block/export/export.c b/block/export/export.c
index bd7cac241f..550897e236 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -17,17 +17,17 @@
 #include "sysemu/block-backend.h"
 #include "block/export.h"
 #include "block/nbd.h"
-#if CONFIG_LINUX
-#include "block/export/vhost-user-blk-server.h"
-#endif
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-export.h"
 #include "qapi/qapi-events-block-export.h"
 #include "qemu/id.h"
+#ifdef CONFIG_VHOST_USER
+#include "vhost-user-blk-server.h"
+#endif
 
 static const BlockExportDriver *blk_exp_drivers[] = {
 &blk_exp_nbd,
-#if CONFIG_LINUX
+#ifdef CONFIG_VHOST_USER
 &blk_exp_vhost_user_blk,
 #endif
 };
diff --git a/block/export/meson.build b/block/export/meson.build
index ef3a9576f7..fffe6b09e8 100644
--- a/block/export/meson.build
+++ b/block/export/meson.build
@@ -1,2 +1,2 @@
 block_ss.add(files('export.c'))
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', 
'../../contrib/libvhost-user/libvhost-user.c'))
+block_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: 
files('vhost-user-blk-server.c'))
diff --git a/contrib/libvhost-user/meson.build 
b/contrib/libvhost-user/meson.build
index e68dd1a581..a261e7665f 100644
--- a/contrib/libvhost-user/meson.build
+++ b/contrib/libvhost-user/meson.build
@@ -1,3 +1,4 @@
 libvhost_user = static_library('vhost-user',
files('libvhost-user.c', 
'libvhost-user-glib.c'),
build_by_default: false)
+vhost_user = declare_dependency(link_with: libvhost_user)
diff --git a/meson.build b/meson.build
index 7627a0ae46..2ec4f114ce 100644
--- a/meson.build
+++ b/meson.build
@@ -1398,6 +1398,11 @@ trace_events_subdirs += [
   'util',
 ]
 
+vhost_user = not_found
+if 'CONFIG_VHOST_USER' in config_host
+  subdir('contrib/libvhost-user')
+endif
+
 subdir('qapi')
 subdir('qobject')
 subdir('stubs')
@@ -1830,7 +1835,6 @@ if have_tools
  install: true)
 
   if 'CONFIG_VHOST_USER' in config_host
-subdir('contrib/libvhost-user')
 subdir('contrib/vhost-user-blk')
 subdir('contrib/vhost-user-gpu')
 subdir('contrib/vhost-user-input')
diff --git a/util/meson.build b/util/meson.build
index 2296e81b34..c5159ad79d 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -66,7 +66,9 @@ if have_block
   util_ss.add(files('main-loop.c'))
   util_ss.add(files('nvdimm-utils.c'))
   util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 
'qemu-coroutine-io.c'))
-  util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
+  util_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: [
+files('vhost-user-server.c'), vhost_user
+  ])
   util_ss.add(files('block-helpers.c'))
   util_ss.add(files('qemu-coroutine-sleep.c'))
   util_ss.add(files('qemu-co-shared-resource.c'))
-- 
2.26.2



[PULL v2 14/28] util/vhost-user-server: check EOF when reading payload

2020-10-22 Thread Stefan Hajnoczi
Unexpected EOF is an error that must be reported.

Signed-off-by: Stefan Hajnoczi 
Message-id: 20200924151549.913737-9-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vhost-user-server.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index a7b7a9897f..981908fef0 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -169,8 +169,10 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg 
*vmsg)
 };
 if (vmsg->size) {
 rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
-if (rc == -1) {
-error_report_err(local_err);
+if (rc != 1) {
+if (local_err) {
+error_report_err(local_err);
+}
 goto fail;
 }
 }
-- 
2.26.2



  1   2   3   4   >