date:20200629

+Gerd

On 6/29/20 11:48 PM, David CARLIER wrote:
> From 157a0374093371719de42e99364352d64190f52a Mon Sep 17 00:00:00 2001
> From: David Carlier 
> Date: Mon, 29 Jun 2020 22:20:06 +
> Subject: [PATCH 7/9] Skipping drm build, unsupported.
> 
> Signed-off-by: David Carlier 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
>  util/Makefile.objs | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/util/Makefile.objs b/util/Makefile.objs
> index cc5e37177a..faebc13fac 100644
> --- a/util/Makefile.objs
> +++ b/util/Makefile.objs
> @@ -39,7 +39,7 @@ util-obj-y += qsp.o
>  util-obj-y += range.o
>  util-obj-y += stats64.o
>  util-obj-y += systemd.o
> -util-obj-$(CONFIG_POSIX) += drm.o
> +util-obj-$(CONFIG_LINUX) += drm.o
>  util-obj-y += guest-random.o
>  util-obj-$(CONFIG_GIO) += dbus.o
>  dbus.o-cflags = $(GIO_CFLAGS)
> --
> 2.26.0
>

Re: [PATCH v1 05/10] vhost-backend: export the vhost backend helper

2020-06-29 Thread Cindy Lu

On Thu, Jun 25, 2020 at 11:07 PM Laurent Vivier  wrote:
>
> On 22/06/2020 17:37, Cindy Lu wrote:
> > export the helper then we can reuse them in other backend
> >
> > Signed-off-by: Cindy Lu 
> > ---
> >  hw/virtio/vhost-backend.c | 18 +-
> >  include/hw/virtio/vhost-backend.h | 28 
> >  2 files changed, 37 insertions(+), 9 deletions(-)
> >
>
> This looks weird to export all these functions whereas they are all
> already exported by the vhost_ops structure.
>
> So if vhost-vdpa is not a subset of vhost-kernel and if these functions
> will diverge from vhost-backend.c definition in the future, perhaps it
> is wise to already copy their definitions right now in vhost-vdpa.c
> rather than exporting them now and to have to copy them in the future in
> vhost-vdpa.c to modify them.
>
> It will also simplify the definition of vhost_kernel_call().
>
> Thanks,
> Laurent
>
Thanks Laurent, will fix this
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 48905383f8..660e9e8588 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -89,7 +89,7 @@ static int vhost_kernel_scsi_get_abi_version(struct 
> > vhost_dev *dev, int *version
> >  return vhost_kernel_call(dev, VHOST_SCSI_GET_ABI_VERSION, version);
> >  }
> >
> > -static int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base,
> > +int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base,
> >   struct vhost_log *log)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base);
> > @@ -101,7 +101,7 @@ static int vhost_kernel_set_mem_table(struct vhost_dev 
> > *dev,
> >  return vhost_kernel_call(dev, VHOST_SET_MEM_TABLE, mem);
> >  }
> >
> > -static int vhost_kernel_set_vring_addr(struct vhost_dev *dev,
> > +int vhost_kernel_set_vring_addr(struct vhost_dev *dev,
> > struct vhost_vring_addr *addr)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_ADDR, addr);
> > @@ -113,31 +113,31 @@ static int vhost_kernel_set_vring_endian(struct 
> > vhost_dev *dev,
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_ENDIAN, ring);
> >  }
> >
> > -static int vhost_kernel_set_vring_num(struct vhost_dev *dev,
> > +int vhost_kernel_set_vring_num(struct vhost_dev *dev,
> >struct vhost_vring_state *ring)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_NUM, ring);
> >  }
> >
> > -static int vhost_kernel_set_vring_base(struct vhost_dev *dev,
> > +int vhost_kernel_set_vring_base(struct vhost_dev *dev,
> > struct vhost_vring_state *ring)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_BASE, ring);
> >  }
> >
> > -static int vhost_kernel_get_vring_base(struct vhost_dev *dev,
> > +int vhost_kernel_get_vring_base(struct vhost_dev *dev,
> > struct vhost_vring_state *ring)
> >  {
> >  return vhost_kernel_call(dev, VHOST_GET_VRING_BASE, ring);
> >  }
> >
> > -static int vhost_kernel_set_vring_kick(struct vhost_dev *dev,
> > +int vhost_kernel_set_vring_kick(struct vhost_dev *dev,
> > struct vhost_vring_file *file)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_KICK, file);
> >  }
> >
> > -static int vhost_kernel_set_vring_call(struct vhost_dev *dev,
> > +int vhost_kernel_set_vring_call(struct vhost_dev *dev,
> > struct vhost_vring_file *file)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_VRING_CALL, file);
> > @@ -155,13 +155,13 @@ static int vhost_kernel_set_features(struct vhost_dev 
> > *dev,
> >  return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features);
> >  }
> >
> > -static int vhost_kernel_get_features(struct vhost_dev *dev,
> > +int vhost_kernel_get_features(struct vhost_dev *dev,
> >   uint64_t *features)
> >  {
> >  return vhost_kernel_call(dev, VHOST_GET_FEATURES, features);
> >  }
> >
> > -static int vhost_kernel_set_owner(struct vhost_dev *dev)
> > +int vhost_kernel_set_owner(struct vhost_dev *dev)
> >  {
> >  return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL);
> >  }
> > diff --git a/include/hw/virtio/vhost-backend.h 
> > b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..300b59c172 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -172,4 +172,32 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev 
> > *dev,
> >
> >  int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd);
> >
> > +
> > +int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base,
> > + struct vhost_log *log);
> > +
> > +int vhost_kernel_set_vring_addr(struct vhost_dev *dev,
> > +   struct vhost_vring_addr *addr);
> > +
> > +i

[PATCH v2 2/2] target/m68k: consolidate physical translation offset into get_physical_address()

2020-06-29 Thread Mark Cave-Ayland

Since all callers to get_physical_address() now apply the same page offset to
the translation result, move the logic into get_physical_address() itself to
avoid duplication.

Suggested-by: Philippe Mathieu-Daudé 
Signed-off-by: Mark Cave-Ayland 
---
 target/m68k/helper.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index 631eab7774..ddd8a2667e 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -771,7 +771,8 @@ static int get_physical_address(CPUM68KState *env, hwaddr 
*physical,
 }
 *page_size = 1 << page_bits;
 page_mask = ~(*page_size - 1);
-*physical = next & page_mask;
+address &= TARGET_PAGE_MASK;
+*physical = (next & page_mask) + (address & (*page_size - 1));
 
 if (access_type & ACCESS_PTEST) {
 env->mmu.mmusr |= next & M68K_MMU_SR_MASK_040;
@@ -826,8 +827,6 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr 
addr)
 return -1;
 }
 
-addr &= TARGET_PAGE_MASK;
-phys_addr += addr & (page_size - 1);
 return phys_addr;
 }
 
@@ -891,10 +890,7 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 ret = get_physical_address(&cpu->env, &physical, &prot,
address, access_type, &page_size);
 if (likely(ret == 0)) {
-address &= TARGET_PAGE_MASK;
-physical += address & (page_size - 1);
-tlb_set_page(cs, address, physical,
- prot, mmu_idx, TARGET_PAGE_SIZE);
+tlb_set_page(cs, address, physical, prot, mmu_idx, page_size);
 return true;
 }
 
@@ -1383,8 +1379,6 @@ void HELPER(ptest)(CPUM68KState *env, uint32_t addr, 
uint32_t is_read)
 ret = get_physical_address(env, &physical, &prot, addr,
access_type, &page_size);
 if (ret == 0) {
-addr &= TARGET_PAGE_MASK;
-physical += addr & (page_size - 1);
 tlb_set_page(env_cpu(env), addr, physical,
  prot, access_type & ACCESS_SUPER ?
  MMU_KERNEL_IDX : MMU_USER_IDX, page_size);
-- 
2.20.1

[PATCH v2 1/2] target/m68k: fix physical address translation in m68k_cpu_get_phys_page_debug()

2020-06-29 Thread Mark Cave-Ayland

The result of the get_physical_address() function should be combined with the
offset of the original page access before being returned. Otherwise the
m68k_cpu_get_phys_page_debug() function can round to the wrong page causing
incorrect lookups in gdbstub and various "Disassembler disagrees with
translator over instruction decoding" warnings to appear at translation time.

Fixes: 88b2fef6c3 ("target/m68k: add MC68040 MMU")
Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Laurent Vivier 
---
 target/m68k/helper.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index 79b0b10ea9..631eab7774 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -820,10 +820,14 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr 
addr)
 if (env->sr & SR_S) {
 access_type |= ACCESS_SUPER;
 }
+
 if (get_physical_address(env, &phys_addr, &prot,
  addr, access_type, &page_size) != 0) {
 return -1;
 }
+
+addr &= TARGET_PAGE_MASK;
+phys_addr += addr & (page_size - 1);
 return phys_addr;
 }
 
-- 
2.20.1

[PATCH v2 0/2] target/m68k: fix physical address translation in m68k_cpu_get_phys_page_debug()

2020-06-29 Thread Mark Cave-Ayland

The first patch in the series fixes the original bug, whilst the second patch
implements the suggestion by Philippe to consolidate the translation offset
logic into get_physical_address() itself now that all callers are identical.

Signed-off-by: Mark Cave-Ayland 


v2:
- Add R-B tags from Philippe and Laurent
- Add patch 2 to consolidate the translation offset logic into 
get_physical_address()


Mark Cave-Ayland (2):
  target/m68k: fix physical address translation in
m68k_cpu_get_phys_page_debug()
  target/m68k: consolidate physical translation offset into
get_physical_address()

 target/m68k/helper.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

-- 
2.20.1

Re: [PATCH v2 2/4] smbus: Fix spd_data_generate() error API violation

On 6/29/20 11:31 PM, BALATON Zoltan wrote:
> On Mon, 29 Jun 2020, Philippe Mathieu-DaudÃÂ© wrote:
>> On 6/27/20 9:17 AM, Markus Armbruster wrote:
>>> BALATON Zoltan  writes:
 On Wed, 22 Apr 2020, BALATON Zoltan wrote:
> On Wed, 22 Apr 2020, Philippe Mathieu-DaudÃÂÃÂ© wrote:
>> On 4/22/20 4:27 PM, BALATON Zoltan wrote:
>>> On Wed, 22 Apr 2020, Markus Armbruster wrote:
 The Error ** argument must be NULL, &error_abort, &error_fatal,
 or a
 pointer to a variable containing NULL.ÃÂÃÂ  Passing an
 argument of the
 latter kind twice without clearing it in between is wrong: if the
 first call sets an error, it no longer points to NULL for the
 second
 call.

 spd_data_generate() can pass @errp to error_setg() more than
 once when
 it adjusts both memory size and type.ÃÂÃÂ  Harmless, because
 no caller
 passes anything that needs adjusting.ÃÂÃÂ  Until the previous
 commit,
 sam460ex passed types that needed adjusting, but not sizes.

 spd_data_generate()'s contract is rather awkward:

 ÃÂÃÂ ÃÂÃÂ  If everything's fine, return non-null and don't
 set an error.

 ÃÂÃÂ ÃÂÃÂ  Else, if memory size or type need adjusting,
 return non-null and
 ÃÂÃÂ ÃÂÃÂ  set an error describing the adjustment.

 ÃÂÃÂ ÃÂÃÂ  Else, return null and set an error reporting
 why no data can be
 ÃÂÃÂ ÃÂÃÂ  generated.

 Its callers treat the error as a warning even when null is
 returned.
 They don't create the "smbus-eeprom" device then.ÃÂÃÂ 
 Suspicious.

 Since the previous commit, only "everything's fine" can actually
 happen.ÃÂÃÂ  Drop the unused code and simplify the
 callers.ÃÂÃÂ  This gets rid
 of the error API violation.
>>>
>>> This leaves board code no chance to recover from values given by
>>> user that won't fit without duplicating checks that this function
>>> does. Also this will abort without giving meaningful errors if an
>>> invalid value does get through and result in a crash which is not
>>> used friendly. So I don't like this but if others think this is
>>> acceptable maybe at least unit test should be adjusted to make
>>> sure aborts cannot be triggered by user for values that are not
>>> usually tested during development.
>>
>> Agreed. Do you have an example (or more) to better show Markus this
>> code use? So we can add tests.
>
> After Markus's patches probably nothing uses it any more but this
> comes with the result that previously giving some random value such
> as -m 100 did produce a working sam460ex machine after some warnings
> but now it just thows back some errors to the user which may or may
> not be helpful to them.
>
>> Personally I'd use a script to generate a dumb static array of all
>> possible sizes...
>
> Maybe testing with the biggest valid value such as -m 2048 (that's
> commonly used probably) and an invalid value such as -m 100 might be
> enough. Testing all possible values might take too long and would
> not test what happens with invalid values. Ideally those invalud
> values should also work like before a0258e4afa but should at least
> give a meaningful warning so the user can fix the command line
> without too much head scratching. Actually that commit was from Igor
> not from Marcus so sorry for attributing that to Marcus too, I
> remembered wrong.
>
> By the way you could argue that on real machine you cannot plug
> certain combinations of memory modules so it's enough to model that
> but I think QEMU does not have to be that strict and also support
> configs that cannot happen on real hadware but would work. This
> might be useful for example if you have some ammount of memory to
> set aside for a VM on a host but that's not a size that exists in
> memory modules on real hardware. This also works on pc machine in
> qemu-system-i386 for example: it accepts -m 100 and does its best to
> create a machine with such unrealistic size. The sam460ex did the
> same (within SoC's limits) and before a0258e4afa -m 100 was fixed up
> to 96 MB which is now not possible due to change in QEMU internal
> APIs. This probably isn't important enough to worth the extra effort
> to support but would have been nice to preserve.

 Besides the above here's another use case of the fix ups that I wanted
 to keep:

 https://patchew.org/QEMU/cover.1592315226.git.bala...@eik.bme.hu/b5f4598529a77f15f554c593e9be2d0ff9e5fab3.1592315226.git.bala...@eik.bme.hu/


 This board normally uses OpenBIOS which gets RAM size from fw_cfg and
 so works w

Re: [PATCH] disas/sh4: Add missing fallthrough annotations

2020-06-29 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20200630055953.9309-1-th...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH] disas/sh4: Add missing fallthrough annotations
Type: series
Message-id: 20200630055953.9309-1-th...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
   e765115..fc1bff9  master -> master
 - [tag update]  patchew/20200629173821.22037-1-f4...@amsat.org -> 
patchew/20200629173821.22037-1-f4...@amsat.org
 * [new tag] patchew/20200630055953.9309-1-th...@redhat.com -> 
patchew/20200630055953.9309-1-th...@redhat.com
Switched to a new branch 'test'
82643b9 disas/sh4: Add missing fallthrough annotations

=== OUTPUT BEGIN ===
ERROR: code indent should never use tabs
#23: FILE: disas/sh4.c:1966:
+^I  /* fallthrough */$

ERROR: code indent should never use tabs
#31: FILE: disas/sh4.c:1976:
+^I  /* fallthrough */$

total: 2 errors, 0 warnings, 14 lines checked

Commit 82643b95d2e7 (disas/sh4: Add missing fallthrough annotations) has style 
problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20200630055953.9309-1-th...@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH] disas/sh4: Add missing fallthrough annotations

2020-06-29 Thread Thomas Huth

Add fallthrough annotations to be able to compile the code without
warnings with -Wimplicit-fallthrough. Looking at the code, it seems
like the fallthrough is indeed intended here, so the comments should
be appropriate.

Signed-off-by: Thomas Huth 
---
 Note: The new lines use TABs since all the surounding code uses TABs, too.
 Please ignore the checkpatch warnings.

 disas/sh4.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/disas/sh4.c b/disas/sh4.c
index 55ef865a36..dcdbdf26d8 100644
--- a/disas/sh4.c
+++ b/disas/sh4.c
@@ -1963,6 +1963,7 @@ print_insn_sh (bfd_vma memaddr, struct disassemble_info 
*info)
  fprintf_fn (stream, "xd%d", rn & ~1);
  break;
}
+ /* fallthrough */
case D_REG_N:
  fprintf_fn (stream, "dr%d", rn);
  break;
@@ -1972,6 +1973,7 @@ print_insn_sh (bfd_vma memaddr, struct disassemble_info 
*info)
  fprintf_fn (stream, "xd%d", rm & ~1);
  break;
}
+ /* fallthrough */
case D_REG_M:
  fprintf_fn (stream, "dr%d", rm);
  break;
-- 
2.18.1

Re: [REPORT] [GSoC - TCG Continuous Benchmarking] [#2] Dissecting QEMU Into Three Main Parts

2020-06-29 Thread Yonggang Luo

Wonderful work, May I reproduce the work on my local machine?

On Mon, Jun 29, 2020 at 6:26 PM Ahmed Karaman 
wrote:

> Hi,
>
> The second report of the TCG Continuous Benchmarking series builds
> upon the QEMU performance metrics calculated in the previous report.
> This report presents a method to dissect the number of instructions
> executed by a QEMU invocation into three main phases:
> - Code Generation
> - JIT Execution
> - Helpers Execution
> It devises a Python script that automates this process.
>
> After that, the report presents an experiment for comparing the
> output of running the script on 17 different targets. Many conclusions
> can be drawn from the results and two of them are discussed in the
> analysis section.
>
> Report link:
>
> https://ahmedkrmn.github.io/TCG-Continuous-Benchmarking/Dissecting-QEMU-Into-Three-Main-Parts/
>
> Previous reports:
> Report 1 - Measuring Basic Performance Metrics of QEMU:
> https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg06692.html
>
> Best regards,
> Ahmed Karaman
>


-- 
 此致
礼
罗勇刚
Yours
sincerely,
Yonggang Luo

Re: Building in Solaris 11.4

2020-06-29 Thread Thomas Huth


On 29/06/2020 22.25, Michele Denber wrote:

On 06-29-2020 8:12 AM, Thomas Huth wrote:

...
It's not the same bug as last year, but a new one: Seems like newer 
versions of Solaris now have this functions in their libraries!
Yes - I just checked. Solaris 10 does not have openpty, but Solaris 11.4 
indeed does have it

So what you want is something like this (completely untested):


So just to make sure I have this right - I save that code into a patch 
file and then apply it where?  At the qemu-5.0.0 level?


Yes. I used the current git master branch, but I assume that it will 
also still apply to the 5.0 release.



  Then run  configure again?


Yes. Please let me know if it works, then I can try to get the patch 
submitted.


 HTH,
  Thomas

Re: [PATCH v2 05/18] hw/block/nvme: Introduce the Namespace Types definitions

On Jun 18 06:34, Dmitry Fomichev wrote:
> From: Niklas Cassel 
> 
> Define the structures and constants required to implement
> Namespace Types support.
> 
> Signed-off-by: Niklas Cassel 
> Signed-off-by: Dmitry Fomichev 
> ---
>  hw/block/nvme.h  |  3 ++
>  include/block/nvme.h | 75 +---
>  2 files changed, 73 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 4f0dac39ae..4fd155c409 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -63,6 +63,9 @@ typedef struct NvmeCQueue {
>  
>  typedef struct NvmeNamespace {
>  NvmeIdNsid_ns;
> +uint32_tnsid;
> +uint8_t csi;
> +QemuUUIDuuid;
>  } NvmeNamespace;
>  
>  static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 6a58bac0c2..5a1e5e137c 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -50,6 +50,11 @@ enum NvmeCapMask {
>  CAP_PMR_MASK   = 0x1,
>  };
>  
> +enum NvmeCapCssBits {
> +CAP_CSS_NVM= 0x01,
> +CAP_CSS_CSI_SUPP   = 0x40,
> +};
> +
>  #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
>  #define NVME_CAP_CQR(cap)   (((cap) >> CAP_CQR_SHIFT)& CAP_CQR_MASK)
>  #define NVME_CAP_AMS(cap)   (((cap) >> CAP_AMS_SHIFT)& CAP_AMS_MASK)
> @@ -101,6 +106,12 @@ enum NvmeCcMask {
>  CC_IOCQES_MASK  = 0xf,
>  };
>  
> +enum NvmeCcCss {
> +CSS_NVM_ONLY= 0,
> +CSS_ALL_NSTYPES = 6,

Maybe we could call this CSS_CSI, since it just specifies that one or
more command sets are supported, not that ALL namespace types are
supported.

Otherwise,
Reviewed-by: Klaus Jensen 

> +CSS_ADMIN_ONLY  = 7,
> +};
> +
>  #define NVME_CC_EN(cc) ((cc >> CC_EN_SHIFT) & CC_EN_MASK)
>  #define NVME_CC_CSS(cc)((cc >> CC_CSS_SHIFT)& CC_CSS_MASK)
>  #define NVME_CC_MPS(cc)((cc >> CC_MPS_SHIFT)& CC_MPS_MASK)
> @@ -109,6 +120,21 @@ enum NvmeCcMask {
>  #define NVME_CC_IOSQES(cc) ((cc >> CC_IOSQES_SHIFT) & CC_IOSQES_MASK)
>  #define NVME_CC_IOCQES(cc) ((cc >> CC_IOCQES_SHIFT) & CC_IOCQES_MASK)
>  
> +#define NVME_SET_CC_EN(cc, val) \
> +(cc |= (uint32_t)((val) & CC_EN_MASK) << CC_EN_SHIFT)
> +#define NVME_SET_CC_CSS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_CSS_MASK) << CC_CSS_SHIFT)
> +#define NVME_SET_CC_MPS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_MPS_MASK) << CC_MPS_SHIFT)
> +#define NVME_SET_CC_AMS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_AMS_MASK) << CC_AMS_SHIFT)
> +#define NVME_SET_CC_SHN(cc, val)\
> +(cc |= (uint32_t)((val) & CC_SHN_MASK) << CC_SHN_SHIFT)
> +#define NVME_SET_CC_IOSQES(cc, val) \
> +(cc |= (uint32_t)((val) & CC_IOSQES_MASK) << CC_IOSQES_SHIFT)
> +#define NVME_SET_CC_IOCQES(cc, val) \
> +(cc |= (uint32_t)((val) & CC_IOCQES_MASK) << CC_IOCQES_SHIFT)
> +
>  enum NvmeCstsShift {
>  CSTS_RDY_SHIFT  = 0,
>  CSTS_CFS_SHIFT  = 1,
> @@ -482,10 +508,41 @@ typedef struct NvmeIdentify {
>  uint64_trsvd2[2];
>  uint64_tprp1;
>  uint64_tprp2;
> -uint32_tcns;
> -uint32_trsvd11[5];
> +uint8_t cns;
> +uint8_t rsvd4;
> +uint16_tctrlid;
> +uint16_tnvmsetid;
> +uint8_t rsvd3;
> +uint8_t csi;
> +uint32_trsvd12[4];
>  } NvmeIdentify;
>  
> +typedef struct NvmeNsIdDesc {
> +uint8_t nidt;
> +uint8_t nidl;
> +uint16_trsvd2;
> +} NvmeNsIdDesc;
> +
> +enum NvmeNidType {
> +NVME_NIDT_EUI64 = 0x01,
> +NVME_NIDT_NGUID = 0x02,
> +NVME_NIDT_UUID  = 0x03,
> +NVME_NIDT_CSI   = 0x04,
> +};
> +
> +enum NvmeNidLength {
> +NVME_NIDL_EUI64 = 8,
> +NVME_NIDL_NGUID = 16,
> +NVME_NIDL_UUID  = 16,
> +NVME_NIDL_CSI   = 1,
> +};
> +
> +enum NvmeCsi {
> +NVME_CSI_NVM= 0x00,
> +};
> +
> +#define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
> +
>  typedef struct NvmeRwCmd {
>  uint8_t opcode;
>  uint8_t flags;
> @@ -603,6 +660,7 @@ enum NvmeStatusCodes {
>  NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
>  NVME_INVALID_NSID   = 0x000b,
>  NVME_CMD_SEQ_ERROR  = 0x000c,
> +NVME_CMD_SET_CMB_REJECTED   = 0x002b,
>  NVME_LBA_RANGE  = 0x0080,
>  NVME_CAP_EXCEEDED   = 0x0081,
>  NVME_NS_NOT_READY   = 0x0082,
> @@ -729,9 +787,14 @@ typedef struct NvmePSD {
>  #define NVME_IDENTIFY_DATA_SIZE 4096
>  
>  enum {
> -NVME_ID_CNS_NS = 0x0,
> -NVME_ID_CNS_CTRL   = 0x1,
> -NVME_ID_CNS_NS_ACTIVE_LIST = 0x2,
> +NVME_ID_CNS_NS= 0x0,
> +NVME_ID_CNS_CTRL  = 0x1,
> +NVME_ID_CNS_NS_ACTIVE_LIST= 0x2,
> +NVME_ID_CNS_NS_DESC_LIST  = 0x03,
> +NVME_ID_CNS_CS_NS = 0x05,
> +NVME_ID_CNS_CS_CTRL

Re: [PATCH v2 04/18] hw/block/nvme: Add Commands Supported and Effects log

On Jun 18 06:34, Dmitry Fomichev wrote:
> This log page becomes necessary to implement to allow checking for
> Zone Append command support in Zoned Namespace Command Set.
> 
> This commit adds the code to report this log page for NVM Command
> Set only. The parts that are specific to zoned operation will be
> added later in the series.
> 
> Signed-off-by: Dmitry Fomichev 
> ---
>  hw/block/nvme.c   | 62 +++
>  hw/block/trace-events |  4 +++
>  include/block/nvme.h  | 18 +
>  3 files changed, 84 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a1bbc9acde..03b8deee85 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -871,6 +871,66 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  return NVME_SUCCESS;
>  }
>  
> +static uint16_t nvme_handle_cmd_effects(NvmeCtrl *n, NvmeCmd *cmd,
> +uint64_t prp1, uint64_t prp2, uint64_t ofs, uint32_t len)
> +{
> +   NvmeEffectsLog cmd_eff_log = {};
> +   uint32_t *iocs = cmd_eff_log.iocs;
> +
> +trace_pci_nvme_cmd_supp_and_effects_log_read();
> +
> +if (ofs != 0) {
> +trace_pci_nvme_err_invalid_effects_log_offset(ofs);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +if (len != sizeof(cmd_eff_log)) {
> +trace_pci_nvme_err_invalid_effects_log_len(len);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}

I don't see why you cannot request a subset of the page like any log
page?

> +
> +iocs[NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFFECTS_CSUPP;

These are admin commands and should go to acs.

> +
> +iocs[NVME_CMD_FLUSH] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_WRITE_ZEROS] = NVME_CMD_EFFECTS_CSUPP |
> + NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_WRITE] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_READ] = NVME_CMD_EFFECTS_CSUPP;
> +
> +return nvme_dma_read_prp(n, (uint8_t *)&cmd_eff_log, len, prp1, prp2);
> +}
> +
> +static uint16_t nvme_get_log_page(NvmeCtrl *n, NvmeCmd *cmd)
> +{
> +uint64_t prp1 = le64_to_cpu(cmd->prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->prp2);
> +uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint64_t dw12 = le32_to_cpu(cmd->cdw12);
> +uint64_t dw13 = le32_to_cpu(cmd->cdw13);
> +uint64_t ofs = (dw13 << 32) | dw12;
> +uint32_t numdl, numdu, len;
> +uint16_t lid = dw10 & 0xff;
> +
> +numdl = dw10 >> 16;
> +numdu = dw11 & 0x;
> +len = (((numdu << 16) | numdl) + 1) << 2;
> +
> +switch (lid) {
> +case NVME_LOG_CMD_EFFECTS:
> +return nvme_handle_cmd_effects(n, cmd, prp1, prp2, ofs, len);
> +}
> +
> +trace_pci_nvme_unsupported_log_page(lid);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}

The controller should set bit 2 of the LPA field to indicate support for
extended data.

> +
>  static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>  switch (cmd->opcode) {
> @@ -888,6 +948,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  return nvme_set_feature(n, cmd, req);
>  case NVME_ADM_CMD_GET_FEATURES:
>  return nvme_get_feature(n, cmd, req);
> +case NVME_ADM_CMD_GET_LOG_PAGE:
> +return nvme_get_log_page(n, cmd);
>  default:
>  trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
>  return NVME_INVALID_OPCODE | NVME_DNR;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 958fcc5508..423d491e27 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -58,6 +58,7 @@ pci_nvme_mmio_start_success(void) "setting controller 
> enable bit succeeded"
>  pci_nvme_mmio_stopped(void) "cleared controller enable bit"
>  pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
>  pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
> +pci_nvme_cmd_supp_and_effects_log_read(void) "commands supported and effects 
> log read"
>  
>  # nvme traces for error conditions
>  pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
> @@ -69,6 +70,8 @@ pci_nvme_err_invalid_ns(uint32_t ns, uint32_t limit) 
> "invalid namespace %u not w
>  pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
>  pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
>  pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) 
> "Invalid LBA start=%"PRIu64" len=%"PRIu

Re: [PATCH v2 03/18] hw/block/nvme: Clean up unused AER definitions

On Jun 18 06:34, Dmitry Fomichev wrote:
> Removed unused struct NvmeAerResult and SMART-related async event
> codes. All other event codes are now categorized by their type.
> This avoids having to define the same values in a single enum,
> NvmeAsyncEventRequest, that is now removed.
> 
> Later commits in this series will define additional values in some
> of these enums. No functional change.
> 
> Signed-off-by: Dmitry Fomichev 
> ---
>  hw/block/nvme.h  |  1 -
>  include/block/nvme.h | 43 ++-
>  2 files changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 0460cc0e62..4f0dac39ae 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -13,7 +13,6 @@ typedef struct NvmeParams {
>  
>  typedef struct NvmeAsyncEvent {
>  QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry;
> -NvmeAerResult result;
>  } NvmeAsyncEvent;
>  
>  enum NvmeRequestFlags {
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 9c3a04dcd7..3099df99eb 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -553,28 +553,30 @@ typedef struct NvmeDsmRange {
>  uint64_tslba;
>  } NvmeDsmRange;
>  
> -enum NvmeAsyncEventRequest {
> -NVME_AER_TYPE_ERROR = 0,
> -NVME_AER_TYPE_SMART = 1,
> -NVME_AER_TYPE_IO_SPECIFIC   = 6,
> -NVME_AER_TYPE_VENDOR_SPECIFIC   = 7,
> -NVME_AER_INFO_ERR_INVALID_SQ= 0,
> -NVME_AER_INFO_ERR_INVALID_DB= 1,
> -NVME_AER_INFO_ERR_DIAG_FAIL = 2,
> -NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
> -NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR= 4,
> -NVME_AER_INFO_ERR_FW_IMG_LOAD_ERR   = 5,
> -NVME_AER_INFO_SMART_RELIABILITY = 0,
> -NVME_AER_INFO_SMART_TEMP_THRESH = 1,
> -NVME_AER_INFO_SMART_SPARE_THRESH= 2,
> +enum NvmeAsyncEventType {
> +NVME_AER_TYPE_ERROR = 0x00,
> +NVME_AER_TYPE_SMART = 0x01,
> +NVME_AER_TYPE_NOTICE= 0x02,
> +NVME_AER_TYPE_CMDSET_SPECIFIC   = 0x06,
> +NVME_AER_TYPE_VENDOR_SPECIFIC   = 0x07,
>  };
>  
> -typedef struct NvmeAerResult {
> -uint8_t event_type;
> -uint8_t event_info;
> -uint8_t log_page;
> -uint8_t resv;
> -} NvmeAerResult;
> +enum NvmeAsyncErrorInfo {
> +NVME_AER_ERR_INVALID_SQ = 0x00,
> +NVME_AER_ERR_INVALID_DB = 0x01,

Since we are moving this around, can we change it to
NVME_AER_INVALID_DB_REGISTER and NVME_AER_INVALID_DB_VALUE instead? I
believe those are the terms used in the spec.

Otherwise,

Reviewed-by: Klaus Jensen 

> +NVME_AER_ERR_DIAG_FAIL  = 0x02,
> +NVME_AER_ERR_PERS_INTERNAL_ERR  = 0x03,
> +NVME_AER_ERR_TRANS_INTERNAL_ERR = 0x04,
> +NVME_AER_ERR_FW_IMG_LOAD_ERR= 0x05,
> +};
> +
> +enum NvmeAsyncNoticeInfo {
> +NVME_AER_NOTICE_NS_CHANGED  = 0x00,
> +};
> +
> +enum NvmeAsyncEventCfg {
> +NVME_AEN_CFG_NS_ATTR= 1 << 8,
> +};
>  
>  typedef struct NvmeCqe {
>  union {
> @@ -881,7 +883,6 @@ enum NvmeIdNsDps {
>  
>  static inline void _nvme_check_size(void)
>  {
> -QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
> -- 
> 2.21.0
> 
>

Re: [REPORT] [GSoC - TCG Continuous Benchmarking] [#2] Dissecting QEMU Into Three Main Parts

2020-06-29 Thread Lukáš Doktor

Dne 29. 06. 20 v 12:25 Ahmed Karaman napsal(a):
> Hi,
> 
> The second report of the TCG Continuous Benchmarking series builds
> upon the QEMU performance metrics calculated in the previous report.
> This report presents a method to dissect the number of instructions
> executed by a QEMU invocation into three main phases:
> - Code Generation
> - JIT Execution
> - Helpers Execution
> It devises a Python script that automates this process.
> 
> After that, the report presents an experiment for comparing the
> output of running the script on 17 different targets. Many conclusions
> can be drawn from the results and two of them are discussed in the
> analysis section.
> 
> Report link:
> https://ahmedkrmn.github.io/TCG-Continuous-Benchmarking/Dissecting-QEMU-Into-Three-Main-Parts/
> 
> Previous reports:
> Report 1 - Measuring Basic Performance Metrics of QEMU:
> https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg06692.html
> 
> Best regards,
> Ahmed Karaman

Hello Ahmed,

very nice reading, both reports so far. One thing that could be better 
displayed is the system you used this to generate. This would come handy 
especially later when you move from examples to actual reports. I think it'd 
make sense to add a section with a clear definition of the machine as well as 
the operation system, qemu version and eventually other deps (like compiler, 
flags, ...). For this report something like:

architecture: x86_64
cpu_codename: Kaby Lake
cpu: i7-8650U
ram: 32GB DDR4
os: Fedora 32
qemu: 470dd165d152ff7ceac61c7b71c2b89220b3aad7
compiler: gcc-10.1.1-1.fc32.x86_64
flags: 
--target-list="x86_64-softmmu,ppc64-softmmu,aarch64-softmmu,s390x-softmmu,riscv64-softmmu"
 --disable-werror --disable-sparse --enable-sdl --enable-kvm  
--enable-vhost-net --enable-vhost-net --enable-attr  --enable-kvm  --enable-fdt 
  --enable-vnc --enable-seccomp 
--block-drv-rw-whitelist="vmdk,null-aio,quorum,null-co,blkverify,file,nbd,raw,blkdebug,host_device,qed,nbd,iscsi,gluster,rbd,qcow2,throttle,copy-on-read"
 --python=/usr/bin/python3 --enable-linux-io-uring

would do. Maybe it'd be even a good idea to create a script to report this 
basic set of information and add it after each of the perf scripts so people 
don't forget to double-check the conditions, but others might disagree so take 
this only as a suggestion.

Regards,
Lukáš

PS: Automated cpu codenames, hosts OSes and such could be tricky, but one can 
use other libraries or just best-effort-approach with fallback to "unknown" to 
let people filling it manually or adding their branch to your script.

Regards,
Lukáš



signature.asc
Description: OpenPGP digital signature

[PATCH 3/3] hw/block/nvme: add trace event for requests with non-zero status code

From: Klaus Jensen 

If a command results in a non-zero status code, trace it.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 5 +
 hw/block/trace-events | 1 +
 2 files changed, 6 insertions(+)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9f1a1ba03b8a..25d79bcd0bc9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -813,6 +813,11 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, 
NvmeRequest *req)
 trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
   req->status);
 
+if (req->status) {
+trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
+  req->status, req->cmd.opcode);
+}
+
 QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
 QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
 timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index c570c7d0e2a5..ed21609f1a4f 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -94,6 +94,7 @@ pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 # nvme traces for error conditions
 pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %"PRIu64""
 pci_nvme_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t 
offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p blk 
\"%s\" offset %"PRIu64" opc \"%s\" req %p status 0x%"PRIx16""
+pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t 
opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
 pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
 pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
 pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 
0x%"PRIx8""
-- 
2.27.0

[PATCH 2/3] hw/block/nvme: add commands supported and effects log page

From: Gollu Appalanaidu 

This is to support for the Commands Supported and Effects log page. See
NVM Express Spec 1.3d, sec. 5.14.1.5 ("Commands Supported and Effects")

Signed-off-by: Gollu Appalanaidu 
Co-authored-by: Klaus Jensen 
---
 hw/block/nvme.c  | 20 +++-
 hw/block/nvme.h  | 25 +
 include/block/nvme.h | 21 +
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 07ac409f37c9..9f1a1ba03b8a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -67,6 +67,7 @@
 #define NVME_TEMPERATURE_WARNING 0x157
 #define NVME_TEMPERATURE_CRITICAL 0x175
 #define NVME_NUM_FW_SLOTS 1
+#define NVME_MAX_ADM_IO_CMDS 0xFF
 
 #define NVME_GUEST_ERR(trace, fmt, ...) \
 do { \
@@ -1471,6 +1472,21 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t 
rae, uint32_t buf_len,
 DMA_DIRECTION_FROM_DEVICE, req);
 }
 
+static uint16_t nvme_effects_log(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
+NvmeRequest *req)
+{
+uint32_t trans_len;
+
+if (off > sizeof(nvme_effects)) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+trans_len = MIN(sizeof(nvme_effects) - off, buf_len);
+
+return nvme_dma(n, (uint8_t *)&nvme_effects + off, trans_len,
+DMA_DIRECTION_FROM_DEVICE, req);
+}
+
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeCmd *cmd = &req->cmd;
@@ -1514,6 +1530,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
*req)
 return nvme_smart_info(n, rae, len, off, req);
 case NVME_LOG_FW_SLOT_INFO:
 return nvme_fw_log_info(n, len, off, req);
+case NVME_LOG_EFFECTS:
+return nvme_effects_log(n, len, off, req);
 default:
 trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
 return NVME_INVALID_FIELD | NVME_DNR;
@@ -2927,7 +2945,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->acl = 3;
 id->aerl = n->params.aerl;
 id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
-id->lpa = NVME_LPA_EXTENDED;
+id->lpa = NVME_LPA_EFFECTS_LOG | NVME_LPA_EXTENDED;
 
 /* recommended default value (~70 C) */
 id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 66187902b7cf..e62bcd12a7a8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -18,6 +18,31 @@ typedef struct NvmeParams {
 bool use_intel_id;
 } NvmeParams;
 
+static const NvmeEffectsLog nvme_effects = {
+.acs = {
+[NVME_ADM_CMD_DELETE_SQ]= NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_CREATE_SQ]= NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_DELETE_CQ]= NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_CREATE_CQ]= NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_IDENTIFY] = NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_ABORT]= NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_SET_FEATURES] = NVME_EFFECTS_CSUPP | NVME_EFFECTS_CCC |
+NVME_EFFECTS_NIC | NVME_EFFECTS_NCC,
+[NVME_ADM_CMD_GET_FEATURES] = NVME_EFFECTS_CSUPP,
+[NVME_ADM_CMD_FORMAT_NVM]   = NVME_EFFECTS_CSUPP | NVME_EFFECTS_LBCC |
+NVME_EFFECTS_NCC | NVME_EFFECTS_NIC | NVME_EFFECTS_CSE_MULTI,
+[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_EFFECTS_CSUPP,
+},
+
+.iocs = {
+[NVME_CMD_FLUSH]= NVME_EFFECTS_CSUPP,
+[NVME_CMD_WRITE]= NVME_EFFECTS_CSUPP | NVME_EFFECTS_LBCC,
+[NVME_CMD_READ] = NVME_EFFECTS_CSUPP,
+[NVME_CMD_WRITE_ZEROES] = NVME_EFFECTS_CSUPP | NVME_EFFECTS_LBCC,
+},
+};
+
 typedef struct NvmeAsyncEvent {
 QTAILQ_ENTRY(NvmeAsyncEvent) entry;
 NvmeAerResult result;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index b27be237cd33..040e4ef36ddc 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -824,6 +824,24 @@ typedef struct NvmeSmartLog {
 uint8_t reserved2[320];
 } NvmeSmartLog;
 
+typedef struct NvmeEffectsLog {
+uint32_t acs[256];
+uint32_t iocs[256];
+uint8_t  rsvd2048[2048];
+} NvmeEffectsLog;
+
+enum {
+NVME_EFFECTS_CSUPP  = 1 <<  0,
+NVME_EFFECTS_LBCC   = 1 <<  1,
+NVME_EFFECTS_NCC= 1 <<  2,
+NVME_EFFECTS_NIC= 1 <<  3,
+NVME_EFFECTS_CCC= 1 <<  4,
+NVME_EFFECTS_CSE_SINGLE = 1 << 16,
+NVME_EFFECTS_CSE_MULTI  = 1 << 17,
+NVME_EFFECTS_CSE_MASK   = 3 << 16,
+NVME_EFFECTS_UUID_SEL   = 1 << 19,
+};
+
 enum NvmeSmartWarn {
 NVME_SMART_SPARE  = 1 << 0,
 NVME_SMART_TEMPERATURE= 1 << 1,
@@ -854,6 +872,7 @@ enum NvmeLogIdentifier {
 NVME_LOG_ERROR_INFO = 0x01,
 NVME_LOG_SMART_INFO = 0x02,
 NVME_LOG_FW_SLOT_INFO   = 0x03,
+NVME_LOG_EFFECTS= 0x05,
 };
 
 typedef struct NvmePSD {
@@ -980,6 +999,7 @@ enum NvmeIdCtrlFrmw {
 };
 
 enum NvmeIdCtrlLpa {
+NVME_LPA_EFFECTS_LOG  =

[PATCH 0/3] hw/block/nvme: bump to v1.4

From: Klaus Jensen 

This bumps the supported version to v1.4 and adds the CSE log page.

Based-on: <20200630042304.1305269-1-...@irrelevant.dk>
("[PATCH] hw/block/nvme: add support for dulbe")

Gollu Appalanaidu (1):
  hw/block/nvme: add commands supported and effects log page

Klaus Jensen (2):
  hw/block/nvme: add NVMe 1.4 specific fields
  hw/block/nvme: add trace event for requests with non-zero status code

 hw/block/nvme.c   |  28 +-
 hw/block/nvme.h   |  25 +
 hw/block/trace-events |   1 +
 include/block/nvme.h  | 216 +-
 4 files changed, 243 insertions(+), 27 deletions(-)

-- 
2.27.0

[PATCH 1/3] hw/block/nvme: add NVMe 1.4 specific fields

From: Klaus Jensen 

Add new fields from NVM Express v1.4.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c  |   3 +-
 include/block/nvme.h | 195 +--
 2 files changed, 172 insertions(+), 26 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 8e147b667c81..07ac409f37c9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -60,7 +60,7 @@
 #define NVME_MAX_IOQPAIRS 0x
 #define NVME_REG_SIZE 0x1000
 #define NVME_DB_SIZE  4
-#define NVME_SPEC_VER 0x00010300
+#define NVME_SPEC_VER 0x00010400
 #define NVME_CMB_BIR 2
 #define NVME_PMR_BIR 2
 #define NVME_TEMPERATURE 0x143
@@ -2910,6 +2910,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->ieee[2] = 0xb3;
 id->mdts = n->params.mdts;
 id->ver = cpu_to_le32(NVME_SPEC_VER);
+id->cntrltype = 0x1;
 id->oacs = cpu_to_le16(0);
 
 /*
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 2a9c5e95bfd2..b27be237cd33 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -7,7 +7,7 @@ typedef struct NvmeBar {
 uint32_tintms;
 uint32_tintmc;
 uint32_tcc;
-uint32_trsvd1;
+uint8_t rsvd24[4];
 uint32_tcsts;
 uint32_tnssrc;
 uint32_taqa;
@@ -15,14 +15,20 @@ typedef struct NvmeBar {
 uint64_tacq;
 uint32_tcmbloc;
 uint32_tcmbsz;
-uint8_t padding[3520]; /* not used by QEMU */
+uint32_tbpinfo;
+uint32_tbprsel;
+uint64_tbpmbl;
+uint64_tcmbmsc;
+uint32_tcmbsts;
+uint8_t rsvd92[3492];
 uint32_tpmrcap;
 uint32_tpmrctl;
 uint32_tpmrsts;
 uint32_tpmrebs;
 uint32_tpmrswtp;
-uint32_tpmrmsc;
-} NvmeBar;
+uint64_tpmrmsc;
+uint8_t rsvd3612[484];
+} QEMU_PACKED NvmeBar;
 
 enum NvmeCapShift {
 CAP_MQES_SHIFT = 0,
@@ -34,7 +40,8 @@ enum NvmeCapShift {
 CAP_CSS_SHIFT  = 37,
 CAP_MPSMIN_SHIFT   = 48,
 CAP_MPSMAX_SHIFT   = 52,
-CAP_PMR_SHIFT  = 56,
+CAP_PMRS_SHIFT = 56,
+CAP_CMBS_SHIFT = 57,
 };
 
 enum NvmeCapMask {
@@ -47,7 +54,8 @@ enum NvmeCapMask {
 CAP_CSS_MASK   = 0xff,
 CAP_MPSMIN_MASK= 0xf,
 CAP_MPSMAX_MASK= 0xf,
-CAP_PMR_MASK   = 0x1,
+CAP_PMRS_MASK  = 0x1,
+CAP_CMBS_MASK  = 0x1,
 };
 
 #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
@@ -59,6 +67,8 @@ enum NvmeCapMask {
 #define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)& CAP_CSS_MASK)
 #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
 #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
+#define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
+#define NVME_CAP_CMBS(cap)  (((cap) >> CAP_CMBS_SHIFT)   & CAP_CMBS_MASK)
 
 #define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
<< CAP_MQES_SHIFT)
@@ -78,8 +88,10 @@ enum NvmeCapMask {
<< CAP_MPSMIN_SHIFT)
 #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMAX_MASK)\
 << 
CAP_MPSMAX_SHIFT)
-#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
-<< CAP_PMR_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMRS_MASK)\
+<< CAP_PMRS_SHIFT)
+#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMBS_MASK)\
+<< CAP_CMBS_SHIFT)
 
 enum NvmeCcShift {
 CC_EN_SHIFT = 0,
@@ -151,22 +163,58 @@ enum NvmeAqaMask {
 #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK)
 
 enum NvmeCmblocShift {
-CMBLOC_BIR_SHIFT  = 0,
-CMBLOC_OFST_SHIFT = 12,
+CMBLOC_BIR_SHIFT = 0,
+CMBLOC_CQMMS_SHIFT   = 3,
+CMBLOC_CQPDS_SHIFT   = 4,
+CMBLOC_CDPMLS_SHIFT  = 5,
+CMBLOC_CDPCILS_SHIFT = 6,
+CMBLOC_CDMMMS_SHIFT  = 7,
+CMBLOC_CQDA_SHIFT= 8,
+CMBLOC_OFST_SHIFT= 12,
 };
 
 enum NvmeCmblocMask {
-CMBLOC_BIR_MASK  = 0x7,
-CMBLOC_OFST_MASK = 0xf,
+CMBLOC_BIR_MASK = 0x7,
+CMBLOC_CQMMS_MASK   = 0x1,
+CMBLOC_CQPDS_MASK   = 0x1,
+CMBLOC_CDPMLS_MASK  = 0x1,
+CMBLOC_CDPCILS_MASK = 0x1,
+CMBLOC_CDMMMS_MASK  = 0x1,
+CMBLOC_CQDA_MASK= 0x1,
+CMBLOC_OFST_MASK= 0xf,
 };
 
-#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT)  & \
- CMBLOC_BIR_MASK)
-#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \
- CMBLOC_OFST_MASK)
+#define NVME_CMBLOC_BIR(cmbloc) \
+((cmbloc >> CMBLOC_BIR_SHIFT) & CMBLOC_BIR_MASK)
+#define NVME_CMBLOC_CQMMS(cmbloc) \
+(

[Bug 1869858] Re: qemu can't start Windows10arm64 19H1(with kvm)

2020-06-29 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1869858

Title:
  qemu can't start Windows10arm64 19H1(with kvm)

Status in QEMU:
  Expired

Bug description:
  My cpu's model is arm64(cortex-a53),I want start Win10arm64 with kvm,Because 
is fast than x86.But it's did'nt work.The screnn is card in Uefi's logo. But I 
am use ramfb now,So it has nothing to do with the graphics card.But if I 
discard kvm,It can start now.But its so slowly.But I use the uefi and kvm can 
start with Debian arm64 buster. So who's the problem?qemu or kvm or 
Microsoft?But others use it to start successfully. I don't know what I would 
like to do
  This is start command(Qemu version is 4.1')
  qemu-system-aarch64 -hda /win10.vhdx -cdrom /win10arm.iso -m 1G -accel kvm 
-smp 4 -cpu host -pflash efi.img -pflash var.img -device ramfb -device 
qemu-xhci -device usb-kbd -device usb-mouse -device usb-tablet
  If I replace the above three parameters with "- CPU cortex-a53" and "- accel 
TCG" and "- device VGA", I can start normally. What's the matter?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1869858/+subscriptions

[PATCH] hw/block/nvme: add support for dulbe

From: Klaus Jensen 

This adds support for reporting the Deallocated or Unwritten Logical
Block error (DULBE). This requires tracking the allocated/deallocated
status of all logical blocks.

Introduce a bitmap that does this. The bitmap is persisted on the new
'state' drive that is associated with a namespace. If no such drive is
attached, the controller will not indicate support for DULBE.

Signed-off-by: Klaus Jensen 
---
Based-on: <20200630041956.1304473-1-...@irrelevant.dk>
("[PATCH] hw/block/nvme: make lba data size configurable")

 hw/block/nvme-ns.c| 103 +
 hw/block/nvme-ns.h|  12 +
 hw/block/nvme.c   | 117 --
 hw/block/nvme.h   |   4 +-
 hw/block/trace-events |   3 ++
 include/block/nvme.h  |   5 ++
 6 files changed, 240 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index d6ec55860a5e..7c825c38c69d 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -28,6 +28,35 @@
 #include "nvme.h"
 #include "nvme-ns.h"
 
+static int nvme_ns_blk_resize(BlockBackend *blk, size_t len, Error **errp)
+{
+   Error *local_err = NULL;
+   int ret;
+   uint64_t perm, shared_perm;
+
+   blk_get_perm(blk, &perm, &shared_perm);
+
+   ret = blk_set_perm(blk, perm | BLK_PERM_RESIZE, shared_perm, 
&local_err);
+   if (ret < 0) {
+   error_propagate_prepend(errp, local_err, "blk_set_perm: ");
+   return ret;
+   }
+
+   ret = blk_truncate(blk, len, false, PREALLOC_MODE_OFF, 0, &local_err);
+   if (ret < 0) {
+   error_propagate_prepend(errp, local_err, "blk_truncate: ");
+   return ret;
+   }
+
+   ret = blk_set_perm(blk, perm, shared_perm, &local_err);
+   if (ret < 0) {
+   error_propagate_prepend(errp, local_err, "blk_set_perm: ");
+   return ret;
+   }
+
+   return 0;
+}
+
 static void nvme_ns_init(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = &ns->id_ns;
@@ -41,6 +70,66 @@ static void nvme_ns_init(NvmeNamespace *ns)
 id_ns->nuse = id_ns->ncap;
 }
 
+static int nvme_ns_init_blk_state(NvmeNamespace *ns, Error **errp)
+{
+BlockBackend *blk = ns->blk_state;
+uint64_t perm, shared_perm;
+int64_t len, state_len;
+
+Error *local_err = NULL;
+int ret;
+
+perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+shared_perm = BLK_PERM_ALL;
+
+ns->utilization = bitmap_new(nvme_ns_nlbas(ns));
+
+ret = blk_set_perm(blk, perm, shared_perm, &local_err);
+if (ret) {
+error_propagate_prepend(errp, local_err, "blk_set_perm: ");
+return ret;
+}
+
+state_len = nvme_ns_blk_state_len(ns);
+
+len = blk_getlength(blk);
+if (len < 0) {
+error_setg_errno(errp, -len, "blk_getlength: ");
+return len;
+}
+
+if (len) {
+if (len != state_len) {
+error_setg(errp, "state size mismatch "
+"(expected %"PRIu64" bytes; was %"PRIu64" bytes)",
+state_len, len);
+error_append_hint(errp,
+"Did you change the 'lbads' parameter? "
+"Or re-formatted the namespace using Format NVM?\n");
+return -1;
+}
+
+ret = blk_pread(blk, 0, ns->utilization, state_len);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "blk_pread: ");
+return ret;
+} else if (ret != state_len) {
+error_setg(errp, "blk_pread: short read");
+return -1;
+}
+
+return 0;
+}
+
+ret = nvme_ns_blk_resize(blk, state_len, &local_err);
+if (ret < 0) {
+error_propagate_prepend(errp, local_err, "nvme_ns_blk_resize: ");
+return ret;
+}
+
+return 0;
+}
+
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id,
 Error **errp)
 {
@@ -111,6 +200,19 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 }
 
 nvme_ns_init(ns);
+
+if (ns->blk_state) {
+if (nvme_ns_init_blk_state(ns, errp)) {
+return -1;
+}
+
+/*
+ * With a state file in place we can enable the Deallocated or
+ * Unwritten Logical Block Error feature.
+ */
+ns->id_ns.nsfeat |= 0x4;
+}
+
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
 }
@@ -136,6 +238,7 @@ static Property nvme_ns_props[] = {
 DEFINE_PROP_DRIVE("drive", NvmeNamespace, blk),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UINT8("lbads", NvmeNamespace, params.lbads, BDRV_SECTOR_BITS),
+DEFINE_PROP_DRIVE("state", NvmeNamespace, blk_state),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index bee46b32efa5..eb901acc912b 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -27,11 +27,18 @@ typedef struct NvmeNamespaceParams {
 typedef struct NvmeN

[PATCH] hw/block/nvme: make lba data size configurable

From: Klaus Jensen 

Allos the LBA data size (lbads) to be set between 9 and 12.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
Reviewed-by: Philippe Mathieu-Daudé 
---
Based-on: <20200629214051.1282060-1-...@irrelevant.dk>
("[PATCH v2 0/4] hw/block/nvme: support multiple namespaces")

 hw/block/nvme-ns.c | 8 +++-
 hw/block/nvme-ns.h | 1 +
 hw/block/nvme.c| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 28ce5e011568..d6ec55860a5e 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -32,7 +32,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = &ns->id_ns;
 
-id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+id_ns->lbaf[0].ds = ns->params.lbads;
 
 id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
 
@@ -92,6 +92,11 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, 
Error **errp)
 return -1;
 }
 
+if (ns->params.lbads < 9 || ns->params.lbads > 12) {
+error_setg(errp, "unsupported lbads (supported: 9-12)");
+return -1;
+}
+
 return 0;
 }
 
@@ -130,6 +135,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 static Property nvme_ns_props[] = {
 DEFINE_PROP_DRIVE("drive", NvmeNamespace, blk),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
+DEFINE_PROP_UINT8("lbads", NvmeNamespace, params.lbads, BDRV_SECTOR_BITS),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 16fa8b1e3fb9..bee46b32efa5 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -21,6 +21,7 @@
 
 typedef struct NvmeNamespaceParams {
 uint32_t nsid;
+uint8_t  lbads;
 } NvmeNamespaceParams;
 
 typedef struct NvmeNamespace {
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 164c5e294d06..9e512c88656d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2877,6 +2877,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 if (n->namespace.blk) {
 ns = &n->namespace;
 ns->params.nsid = 1;
+ns->params.lbads = BDRV_SECTOR_BITS;
 
 if (nvme_ns_setup(n, ns, errp)) {
 return;
-- 
2.27.0

Re: [PATCH v2 02/18] hw/block/nvme: Define 64 bit cqe.result

On Jun 18 06:33, Dmitry Fomichev wrote:
> From: Ajay Joshi 
> 
> A new write command, Zone Append, is added as a part of Zoned
> Namespace Command Set. Upon successful completion of this command,
> the controller returns the start LBA of the performed write operation
> in cqe.result field. Therefore, the maximum size of this variable
> needs to be changed from 32 to 64 bit, consuming the reserved 32 bit
> field that follows the result in CQE struct. Since the existing
> commands are expected to return a 32 bit LE value, two separate
> variables, result32 and result64, are now kept in a union.
> 
> Signed-off-by: Ajay Joshi 
> Signed-off-by: Dmitry Fomichev 

Reviewed-by: Klaus Jensen 

> ---
>  block/nvme.c | 2 +-
>  block/trace-events   | 2 +-
>  hw/block/nvme.c  | 6 +++---
>  include/block/nvme.h | 6 --
>  4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index eb2f54dd9d..ca245ec574 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -287,7 +287,7 @@ static inline int nvme_translate_error(const NvmeCqe *c)
>  {
>  uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF;
>  if (status) {
> -trace_nvme_error(le32_to_cpu(c->result),
> +trace_nvme_error(le64_to_cpu(c->result64),
>   le16_to_cpu(c->sq_head),
>   le16_to_cpu(c->sq_id),
>   le16_to_cpu(c->cid),
> diff --git a/block/trace-events b/block/trace-events
> index 29dff8881c..05c1393943 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -156,7 +156,7 @@ vxhs_get_creds(const char *cacert, const char 
> *client_key, const char *client_ce
>  # nvme.c
>  nvme_kick(void *s, int queue) "s %p queue %d"
>  nvme_dma_flush_queue_wait(void *s) "s %p"
> -nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) 
> "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
> +nvme_error(uint64_t cmd_specific, int sq_head, int sqid, int cid, int 
> status) "cmd_specific %ld sq_head %d sqid %d cid %d status 0x%x"
>  nvme_process_completion(void *s, int index, int inflight) "s %p queue %d 
> inflight %d"
>  nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d"
>  nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3ed9f3d321..a1bbc9acde 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -823,7 +823,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -req->cqe.result = result;
> +req->cqe.result32 = result;
>  return NVME_SUCCESS;
>  }
>  
> @@ -859,8 +859,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  ((dw11 >> 16) & 0x) + 1,
>  n->params.max_ioqpairs,
>  n->params.max_ioqpairs);
> -req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
> -  ((n->params.max_ioqpairs - 1) << 16));
> +req->cqe.result32 = cpu_to_le32((n->params.max_ioqpairs - 1) |
> +((n->params.max_ioqpairs - 1) << 
> 16));
>  break;
>  case NVME_TIMESTAMP:
>  return nvme_set_feature_timestamp(n, cmd);
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 1720ee1d51..9c3a04dcd7 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -577,8 +577,10 @@ typedef struct NvmeAerResult {
>  } NvmeAerResult;
>  
>  typedef struct NvmeCqe {
> -uint32_tresult;
> -uint32_trsvd;
> +union {
> +uint64_t result64;
> +uint32_t result32;
> +};
>  uint16_tsq_head;
>  uint16_tsq_id;
>  uint16_tcid;
> -- 
> 2.21.0
> 
>

Re: [PATCH v2 01/18] hw/block/nvme: Move NvmeRequest has_sg field to a bit flag

On Jun 18 06:33, Dmitry Fomichev wrote:
> In addition to the existing has_sg flag, a few more Boolean
> NvmeRequest flags are going to be introduced in subsequent patches.
> Convert "has_sg" variable to "flags" and define NvmeRequestFlags
> enum for individual flag values.
> 
> Signed-off-by: Dmitry Fomichev 

Reviewed-by: Klaus Jensen 

> ---
>  hw/block/nvme.c | 8 +++-
>  hw/block/nvme.h | 6 +-
>  2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 1aee042d4c..3ed9f3d321 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -350,7 +350,7 @@ static void nvme_rw_cb(void *opaque, int ret)
>  block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
>  req->status = NVME_INTERNAL_DEV_ERROR;
>  }
> -if (req->has_sg) {
> +if (req->flags & NVME_REQ_FLG_HAS_SG) {
>  qemu_sglist_destroy(&req->qsg);
>  }
>  nvme_enqueue_req_completion(cq, req);
> @@ -359,7 +359,6 @@ static void nvme_rw_cb(void *opaque, int ret)
>  static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>  NvmeRequest *req)
>  {
> -req->has_sg = false;
>  block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>   BLOCK_ACCT_FLUSH);
>  req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
> @@ -383,7 +382,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, 
> NvmeNamespace *ns, NvmeCmd *cmd,
>  return NVME_LBA_RANGE | NVME_DNR;
>  }
>  
> -req->has_sg = false;
>  block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>   BLOCK_ACCT_WRITE);
>  req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
> @@ -422,14 +420,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  
>  dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
>  if (req->qsg.nsg > 0) {
> -req->has_sg = true;
> +req->flags |= NVME_REQ_FLG_HAS_SG;
>  req->aiocb = is_write ?
>  dma_blk_write(n->conf.blk, &req->qsg, data_offset, 
> BDRV_SECTOR_SIZE,
>nvme_rw_cb, req) :
>  dma_blk_read(n->conf.blk, &req->qsg, data_offset, 
> BDRV_SECTOR_SIZE,
>   nvme_rw_cb, req);
>  } else {
> -req->has_sg = false;
>  req->aiocb = is_write ?
>  blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, 
> nvme_rw_cb,
>  req) :
> @@ -917,6 +914,7 @@ static void nvme_process_sq(void *opaque)
>  QTAILQ_REMOVE(&sq->req_list, req, entry);
>  QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
>  memset(&req->cqe, 0, sizeof(req->cqe));
> +req->flags = 0;
>  req->cqe.cid = cmd.cid;
>  
>  status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 1d30c0bca2..0460cc0e62 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -16,11 +16,15 @@ typedef struct NvmeAsyncEvent {
>  NvmeAerResult result;
>  } NvmeAsyncEvent;
>  
> +enum NvmeRequestFlags {
> +NVME_REQ_FLG_HAS_SG   = 1 << 0,
> +};
> +
>  typedef struct NvmeRequest {
>  struct NvmeSQueue   *sq;
>  BlockAIOCB  *aiocb;
>  uint16_tstatus;
> -boolhas_sg;
> +uint16_tflags;
>  NvmeCqe cqe;
>  BlockAcctCookie acct;
>  QEMUSGList  qsg;
> -- 
> 2.21.0
> 
>

RE: [PATCH 1/2] hw/386: Fix uninitialized memory with -device and CPU hotplug

2020-06-29 Thread Babu Moger




> -Original Message-
> From: Moger, Babu 
> Sent: Thursday, June 25, 2020 5:55 PM
> To: Igor Mammedov 
> Cc: ehabk...@redhat.com; m...@redhat.com; qemu-devel@nongnu.org;
> pbonz...@redhat.com; r...@twiddle.net
> Subject: Re: [PATCH 1/2] hw/386: Fix uninitialized memory with -device and CPU
> hotplug
> 
> 
> 
> On 6/25/20 1:32 PM, Igor Mammedov wrote:
> > On Thu, 25 Jun 2020 11:41:25 -0500
> > Babu Moger  wrote:
> >
> >> Igor,
> >>
> >>> -Original Message-
> >>> From: Igor Mammedov 
> >>> Sent: Thursday, June 25, 2020 10:19 AM
> >>> To: Moger, Babu 
> >>> Cc: ehabk...@redhat.com; m...@redhat.com; qemu-devel@nongnu.org;
> >>> pbonz...@redhat.com; r...@twiddle.net
> >>> Subject: Re: [PATCH 1/2] hw/386: Fix uninitialized memory with -device and
> CPU
> >>> hotplug
> >>>
> >>> On Wed, 24 Jun 2020 12:35:59 -0500
> >>> Babu Moger  wrote:
> >>>
> > -Original Message-
> > From: Igor Mammedov 
> > Sent: Wednesday, June 24, 2020 8:48 AM
> > To: Moger, Babu 
> > Cc: ehabk...@redhat.com; m...@redhat.com; qemu-devel@nongnu.org;
> > pbonz...@redhat.com; r...@twiddle.net
> > Subject: Re: [PATCH 1/2] hw/386: Fix uninitialized memory with -device
> and
> >>> CPU
> > hotplug
> >
> > On Tue, 16 Jun 2020 12:18:56 -0500
> > Babu Moger  wrote:
> >
> >>> -Original Message-
> >>> From: Igor Mammedov 
> >>> Sent: Tuesday, June 16, 2020 5:59 AM
> >>> To: Moger, Babu 
> >>> Cc: pbonz...@redhat.com; r...@twiddle.net; ehabk...@redhat.com;
> >>> m...@redhat.com; marcel.apfelb...@gmail.com; qemu-
> >>> de...@nongnu.org
> >>> Subject: Re: [PATCH 1/2] hw/386: Fix uninitialized memory with -device
> >>> and
> > CPU
> >>> hotplug
> >>>
> >>> On Mon, 08 Jun 2020 15:18:50 -0500
> >>> Babu Moger  wrote:
> >>>
>  Noticed the following command failure while testing CPU hotplug.
> 
>  $ qemu-system-x86_64 -machine q35,accel=kvm -smp 1,maxcpus=2,
>    cores=1, threads=1,sockets=2 -cpu EPYC -device EPYC-x86_64-
>    cpu,core-id=0,socket-id=1,thread-id=0
> 
>    qemu-system-x86_64: -device EPYC-x86_64-cpu,core-id=0,socket-
> >>> id=1,
>    thread-id=0: Invalid CPU [socket: 21855, die: 0, core: 0, thread: 
>  0]
>    with APIC ID 21855, valid index range 0:1
> 
>  This happens because APIC ID is calculated using uninitialized
> memory.
>  This is happening after the addition of new field node_id in
> > X86CPUTopoIDs
>  structure. The node_id field is uninitialized while calling
>  apicid_from_topo_ids. The problem is discussed in the thread below.
> 
> >>>
> >
> >>>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.ker
> >>> nel.org%2Fqemu-
> >>>
> >
> >>>
> devel%2F20200602171838.GG51%40habkost.net%2F&data=02%7C01
> >>>
> >
> >>>
> %7Cbabu.moger%40amd.com%7C02200d75fd8b48d1955608d811e44f5b%7C3d
> >>>
> >
> >>>
> d8961fe4884e608e11a82d994e183d%7C0%7C0%7C637279019564311233&
> >>>
> >
> >>>
> ;sdata=ry3QO0Z5dxLPoRxkYVkOsVm3nl%2BxfCGv8be%2BMHdoUPY%3D&r
> >>> eserved=0
> 
>  Fix the problem by initializing the node_id properly.
> 
>  Signed-off-by: Babu Moger 
>  ---
>   hw/i386/pc.c   |2 ++
>   include/hw/i386/topology.h |   11 +++
>   2 files changed, 13 insertions(+)
> 
>  diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>  index 2128f3d6fe..974cc30891 100644
>  --- a/hw/i386/pc.c
>  +++ b/hw/i386/pc.c
>  @@ -1585,6 +1585,8 @@ static void
> pc_cpu_pre_plug(HotplugHandler
> >>> *hotplug_dev,
>   topo_ids.die_id = cpu->die_id;
>   topo_ids.core_id = cpu->core_id;
>   topo_ids.smt_id = cpu->thread_id;
>  +topo_ids.node_id = cpu_x86_use_epyc_apic_id_encoding(ms-
> >> cpu_type)
> >>> ?
>  +   x86_node_id_for_epyc(&topo_info, 
>  &topo_ids) : 0;
> >>>
> >>> I'd rather not calculate some default value here,
> >>> this is the branch where we check user provided topology info and
> error
> >>> out
> >>> asking
> >>> to provide missing bits.
> >> Noticed that cpu->node_id is initialized to
> >>> 0xFF(NUMA_NODE_UNASSIGNED).
> >> We can initialize cpu->node_id to default node like how we do it in
> >> x86_get_default_cpu_node_id.  We can use it to initialize
> >>> topo_ids.node_id.
> >> This is consistent with other fields core_id, die_id etc..
> >>
> >>>
> >>> I also wonder if we should force user to specify numa nodes on CLI if
> >>> EPYC
> > cpu is
> >>> used.
> >>> (i.e. I'm assuming that EPYC always requires numa)
> >>
> >> That is not true. Without numa all the cpus will be configured under 
> >> one
> >> def

Re: [RFC v2 1/1] memory: Delete assertion in memory_region_unregister_iommu_notifier

2020-06-29 Thread Jason Wang




On 2020/6/29 下午9:34, Peter Xu wrote:

On Mon, Jun 29, 2020 at 01:51:47PM +0800, Jason Wang wrote:

On 2020/6/28 下午10:47, Peter Xu wrote:

On Sun, Jun 28, 2020 at 03:03:41PM +0800, Jason Wang wrote:

On 2020/6/27 上午5:29, Peter Xu wrote:

Hi, Eugenio,

(CCing Eric, Yan and Michael too)

On Fri, Jun 26, 2020 at 08:41:22AM +0200, Eugenio Pérez wrote:

diff --git a/memory.c b/memory.c
index 2f15a4b250..7f789710d2 100644
--- a/memory.c
+++ b/memory.c
@@ -1915,8 +1915,6 @@ void memory_region_notify_one(IOMMUNotifier *notifier,
return;
}
-assert(entry->iova >= notifier->start && entry_end <= notifier->end);

I can understand removing the assertion should solve the issue, however imho
the major issue is not about this single assertion but the whole addr_mask
issue behind with virtio...

I don't get here, it looks to the the range was from guest IOMMU drivers.

Yes.  Note that I didn't mean that it's a problem in virtio, it's just the fact
that virtio is the only one I know that would like to support arbitrary address
range for the translated region.  I don't know about tcg, but vfio should still
need some kind of page alignment in both the address and the addr_mask.  We
have that assumption too across the memory core when we do translations.


Right but it looks to me the issue is not the alignment.



A further cause of the issue is the MSI region when vIOMMU enabled - currently
we implemented the interrupt region using another memory region so it split the
whole DMA region into two parts.  That's really a clean approach to IR
implementation, however that's also a burden to the invalidation part because
then we'll need to handle things like this when the listened range is not page
alighed at all (neither 0-0xfed, nor 0xfef-MAX).  If without the IR
region (so the whole iommu address range will be a single FlatRange),


Is this a bug? I remember that at least for vtd, it won't do any DMAR on the
intrrupt address range

I don't think it's a bug, at least it's working as how I understand...  that
interrupt range is using an IR region, that's why I said the IR region splits
the DMAR region into two pieces, so we have two FlatRange for the same
IOMMUMemoryRegion.



I don't check the qemu code but if "a single FlatRange" means 
0xFEEx_ is subject to DMA remapping, OS need to setup passthrough 
mapping for that range in order to get MSI to work. This is not what vtd 
spec said:


"""

3.14 Handling Requests to Interrupt Address Range

Requests without PASID to address range 0xFEEx_ are treated as
potential interrupt requests and are not subjected to DMA remapping
(even if translation structures specify a mapping for this
range). Instead, remapping hardware can be enabled to subject such
interrupt requests to interrupt remapping.

"""

My understanding is vtd won't do any DMA translation on 0xFEEx_ even 
if IR is not enabled.








   I think
we probably don't need most of the logic in vtd_address_space_unmap() at all,
then we can directly deliver all the IOTLB invalidations without splitting into
small page aligned ranges to all the iommu notifiers.  Sadly, so far I still
don't have ideal solution for it, because we definitely need IR.


Another possible (theoretical) issue (for vhost) is that it can't trigger
interrupt through the interrupt range.

Hmm.. Could you explain?  When IR is enabled, all devices including virtio
who send interrupt to 0xfeeX should be trapped by IR.



I meant vhost not virtio, if you teach vhost to DMA to 0xFEEx_, it 
can't generate any interrupts as expected.








For normal IOTLB invalidations, we were trying our best to always make
IOMMUTLBEntry contain a valid addr_mask to be 2**N-1.  E.g., that's what we're
doing with the loop in vtd_address_space_unmap().

I'm sure such such assumption can work for any type of IOMMU.



But this is not the first time that we may want to break this assumption for
virtio so that we make the IOTLB a tuple of (start, len), then that len can be
not a address mask any more.  That seems to be more efficient for things like
vhost because iotlbs there are not page based, so it'll be inefficient if we
always guarantee the addr_mask because it'll be quite a lot more roundtrips of
the same range of invalidation.  Here we've encountered another issue of
triggering the assertion with virtio-net, but only with the old RHEL7 guest.

I'm thinking whether we can make the IOTLB invalidation configurable by
specifying whether the backend of the notifier can handle arbitary address
range in some way.  So we still have the guaranteed addr_masks by default
(since I still don't think totally break the addr_mask restriction is wise...),
however we can allow the special backends to take adavantage of using arbitary
(start, len) ranges for reasons like performance.

To do that, a quick idea is to introduce a flag IOMMU_NOTIFIER_ARBITRARY_MASK
to IOMMUNotifierFlag, to declare that the iommu notifier (and its backend) can
t

Re: [PATCH v2 05/18] hw/block/nvme: Introduce the Namespace Types definitions

On Wed, Jun 17, 2020 at 2:47 PM Dmitry Fomichev  wrote:
>
> From: Niklas Cassel 
>
> Define the structures and constants required to implement
> Namespace Types support.
>
> Signed-off-by: Niklas Cassel 
> Signed-off-by: Dmitry Fomichev 
> ---
>  hw/block/nvme.h  |  3 ++
>  include/block/nvme.h | 75 +---
>  2 files changed, 73 insertions(+), 5 deletions(-)
>
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 4f0dac39ae..4fd155c409 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -63,6 +63,9 @@ typedef struct NvmeCQueue {
>
>  typedef struct NvmeNamespace {
>  NvmeIdNsid_ns;
> +uint32_tnsid;
> +uint8_t csi;
> +QemuUUIDuuid;
>  } NvmeNamespace;
>
>  static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 6a58bac0c2..5a1e5e137c 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -50,6 +50,11 @@ enum NvmeCapMask {
>  CAP_PMR_MASK   = 0x1,
>  };
>
> +enum NvmeCapCssBits {
> +CAP_CSS_NVM= 0x01,
> +CAP_CSS_CSI_SUPP   = 0x40,
> +};
> +
>  #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
>  #define NVME_CAP_CQR(cap)   (((cap) >> CAP_CQR_SHIFT)& CAP_CQR_MASK)
>  #define NVME_CAP_AMS(cap)   (((cap) >> CAP_AMS_SHIFT)& CAP_AMS_MASK)
> @@ -101,6 +106,12 @@ enum NvmeCcMask {
>  CC_IOCQES_MASK  = 0xf,
>  };
>
> +enum NvmeCcCss {
> +CSS_NVM_ONLY= 0,
> +CSS_ALL_NSTYPES = 6,
> +CSS_ADMIN_ONLY  = 7,
> +};
> +
>  #define NVME_CC_EN(cc) ((cc >> CC_EN_SHIFT) & CC_EN_MASK)
>  #define NVME_CC_CSS(cc)((cc >> CC_CSS_SHIFT)& CC_CSS_MASK)
>  #define NVME_CC_MPS(cc)((cc >> CC_MPS_SHIFT)& CC_MPS_MASK)
> @@ -109,6 +120,21 @@ enum NvmeCcMask {
>  #define NVME_CC_IOSQES(cc) ((cc >> CC_IOSQES_SHIFT) & CC_IOSQES_MASK)
>  #define NVME_CC_IOCQES(cc) ((cc >> CC_IOCQES_SHIFT) & CC_IOCQES_MASK)
>
> +#define NVME_SET_CC_EN(cc, val) \
> +(cc |= (uint32_t)((val) & CC_EN_MASK) << CC_EN_SHIFT)
> +#define NVME_SET_CC_CSS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_CSS_MASK) << CC_CSS_SHIFT)
> +#define NVME_SET_CC_MPS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_MPS_MASK) << CC_MPS_SHIFT)
> +#define NVME_SET_CC_AMS(cc, val)\
> +(cc |= (uint32_t)((val) & CC_AMS_MASK) << CC_AMS_SHIFT)
> +#define NVME_SET_CC_SHN(cc, val)\
> +(cc |= (uint32_t)((val) & CC_SHN_MASK) << CC_SHN_SHIFT)
> +#define NVME_SET_CC_IOSQES(cc, val) \
> +(cc |= (uint32_t)((val) & CC_IOSQES_MASK) << CC_IOSQES_SHIFT)
> +#define NVME_SET_CC_IOCQES(cc, val) \
> +(cc |= (uint32_t)((val) & CC_IOCQES_MASK) << CC_IOCQES_SHIFT)
> +
>  enum NvmeCstsShift {
>  CSTS_RDY_SHIFT  = 0,
>  CSTS_CFS_SHIFT  = 1,
> @@ -482,10 +508,41 @@ typedef struct NvmeIdentify {
>  uint64_trsvd2[2];
>  uint64_tprp1;
>  uint64_tprp2;
> -uint32_tcns;
> -uint32_trsvd11[5];
> +uint8_t cns;
> +uint8_t rsvd4;
> +uint16_tctrlid;

Shouldn't this be CNTID?

Alistair

> +uint16_tnvmsetid;
> +uint8_t rsvd3;
> +uint8_t csi;
> +uint32_trsvd12[4];
>  } NvmeIdentify;
>
> +typedef struct NvmeNsIdDesc {
> +uint8_t nidt;
> +uint8_t nidl;
> +uint16_trsvd2;
> +} NvmeNsIdDesc;
> +
> +enum NvmeNidType {
> +NVME_NIDT_EUI64 = 0x01,
> +NVME_NIDT_NGUID = 0x02,
> +NVME_NIDT_UUID  = 0x03,
> +NVME_NIDT_CSI   = 0x04,
> +};
> +
> +enum NvmeNidLength {
> +NVME_NIDL_EUI64 = 8,
> +NVME_NIDL_NGUID = 16,
> +NVME_NIDL_UUID  = 16,
> +NVME_NIDL_CSI   = 1,
> +};
> +
> +enum NvmeCsi {
> +NVME_CSI_NVM= 0x00,
> +};
> +
> +#define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
> +
>  typedef struct NvmeRwCmd {
>  uint8_t opcode;
>  uint8_t flags;
> @@ -603,6 +660,7 @@ enum NvmeStatusCodes {
>  NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
>  NVME_INVALID_NSID   = 0x000b,
>  NVME_CMD_SEQ_ERROR  = 0x000c,
> +NVME_CMD_SET_CMB_REJECTED   = 0x002b,
>  NVME_LBA_RANGE  = 0x0080,
>  NVME_CAP_EXCEEDED   = 0x0081,
>  NVME_NS_NOT_READY   = 0x0082,
> @@ -729,9 +787,14 @@ typedef struct NvmePSD {
>  #define NVME_IDENTIFY_DATA_SIZE 4096
>
>  enum {
> -NVME_ID_CNS_NS = 0x0,
> -NVME_ID_CNS_CTRL   = 0x1,
> -NVME_ID_CNS_NS_ACTIVE_LIST = 0x2,
> +NVME_ID_CNS_NS= 0x0,
> +NVME_ID_CNS_CTRL  = 0x1,
> +NVME_ID_CNS_NS_ACTIVE_LIST= 0x2,
> +NVME_ID_CNS_NS_DESC_LIST  = 0x03,
> +NVME_ID_CNS_CS_NS = 0x05,
> +NVME_ID_CNS_CS_CTRL   = 0x06,
> +NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
> +NVME_ID_CNS_IO_COMMAND_SET= 0x1c,
>  };
>
>  typedef struct NvmeIdCtrl {
> @@ -825,6

Re: [PATCH v2 04/18] hw/block/nvme: Add Commands Supported and Effects log

On Wed, Jun 17, 2020 at 3:05 PM Dmitry Fomichev  wrote:
>
> This log page becomes necessary to implement to allow checking for
> Zone Append command support in Zoned Namespace Command Set.
>
> This commit adds the code to report this log page for NVM Command
> Set only. The parts that are specific to zoned operation will be
> added later in the series.
>
> Signed-off-by: Dmitry Fomichev 

Acked-by: Alistair Francis 

Alistair

> ---
>  hw/block/nvme.c   | 62 +++
>  hw/block/trace-events |  4 +++
>  include/block/nvme.h  | 18 +
>  3 files changed, 84 insertions(+)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a1bbc9acde..03b8deee85 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -871,6 +871,66 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  return NVME_SUCCESS;
>  }
>
> +static uint16_t nvme_handle_cmd_effects(NvmeCtrl *n, NvmeCmd *cmd,
> +uint64_t prp1, uint64_t prp2, uint64_t ofs, uint32_t len)
> +{
> +   NvmeEffectsLog cmd_eff_log = {};
> +   uint32_t *iocs = cmd_eff_log.iocs;
> +
> +trace_pci_nvme_cmd_supp_and_effects_log_read();
> +
> +if (ofs != 0) {
> +trace_pci_nvme_err_invalid_effects_log_offset(ofs);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +if (len != sizeof(cmd_eff_log)) {
> +trace_pci_nvme_err_invalid_effects_log_len(len);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +iocs[NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFFECTS_CSUPP;
> +iocs[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFFECTS_CSUPP;
> +
> +iocs[NVME_CMD_FLUSH] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_WRITE_ZEROS] = NVME_CMD_EFFECTS_CSUPP |
> + NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_WRITE] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
> +iocs[NVME_CMD_READ] = NVME_CMD_EFFECTS_CSUPP;
> +
> +return nvme_dma_read_prp(n, (uint8_t *)&cmd_eff_log, len, prp1, prp2);
> +}
> +
> +static uint16_t nvme_get_log_page(NvmeCtrl *n, NvmeCmd *cmd)
> +{
> +uint64_t prp1 = le64_to_cpu(cmd->prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->prp2);
> +uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint64_t dw12 = le32_to_cpu(cmd->cdw12);
> +uint64_t dw13 = le32_to_cpu(cmd->cdw13);
> +uint64_t ofs = (dw13 << 32) | dw12;
> +uint32_t numdl, numdu, len;
> +uint16_t lid = dw10 & 0xff;
> +
> +numdl = dw10 >> 16;
> +numdu = dw11 & 0x;
> +len = (((numdu << 16) | numdl) + 1) << 2;
> +
> +switch (lid) {
> +case NVME_LOG_CMD_EFFECTS:
> +return nvme_handle_cmd_effects(n, cmd, prp1, prp2, ofs, len);
> +}
> +
> +trace_pci_nvme_unsupported_log_page(lid);
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
>  static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>  switch (cmd->opcode) {
> @@ -888,6 +948,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  return nvme_set_feature(n, cmd, req);
>  case NVME_ADM_CMD_GET_FEATURES:
>  return nvme_get_feature(n, cmd, req);
> +case NVME_ADM_CMD_GET_LOG_PAGE:
> +return nvme_get_log_page(n, cmd);
>  default:
>  trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
>  return NVME_INVALID_OPCODE | NVME_DNR;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 958fcc5508..423d491e27 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -58,6 +58,7 @@ pci_nvme_mmio_start_success(void) "setting controller 
> enable bit succeeded"
>  pci_nvme_mmio_stopped(void) "cleared controller enable bit"
>  pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
>  pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
> +pci_nvme_cmd_supp_and_effects_log_read(void) "commands supported and effects 
> log read"
>
>  # nvme traces for error conditions
>  pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
> @@ -69,6 +70,8 @@ pci_nvme_err_invalid_ns(uint32_t ns, uint32_t limit) 
> "invalid namespace %u not w
>  pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
>  pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
>  pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) 
> "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
> +pci_nvme_err_invalid_effects_log_offset(uint64_t ofs) "commands supported 
> and effects log offset must be 0, got %"PRIu64""
> +pci_nvme_err

Re: [PATCH v2 2/4] smbus: Fix spd_data_generate() error API violation


On Mon, 29 Jun 2020, Markus Armbruster wrote:

BALATON Zoltan  writes:

On Sat, 27 Jun 2020, Markus Armbruster wrote:

Quick reply without having thought through the issues at all: I'm not


Does that mean you'll reply later with more detail or this is all you
had to say about this? (Just to know if I should wait for another
reply.)


Not sure I can find the time before the soft freeze.  Best not to wait
for me.


OK I'm not sure my mac_oldworld patches can be finished or would be merged 
before the freeze anyway and this was already broken in 5.0 so it's not 
that urgent now but I'll need this in the future so eventually should 
find some way to come to an agreement.



opposed to you doing work to enable additional or even arbitrary memory
sizes where these actually work.  I'm first and foremost opposed to me
wasting time on "improving" code that is not used for anything.  That's
why I dumbed down spd_data_generate().


It was used by sam460ex until moving ram allocation to memdev broke it.


Secondly, I'm opposed to QEMU "correcting" user configuration.  I want
QEMU do exactly what it's told, and fail with a clear error message when
that is not possible.  The error message may include hints for the user
on how to correct the configuration.


I don't agree with that. It's already hard enough for non-expert users
to figure out the needed command line switches, making that even
harder by throwing back an error for everything that could work just
not exactly specified is needlessly annoying them further. To the
point of chasing them away from using QEMU. A lot of people prefer
VMWare or VirtualBox for this reason and only try QEMU if there's no
other way.


We don't have to agree on everything.  I'm not the QEMU CLI dictator.
The status quo is pretty clear, though:

   $ qemu-system-ppc64 -help
   [...]
   -m [size=]megs[,slots=n,maxmem=size]
   configure guest RAM
   size: initial amount of guest memory
   [...]

It says "Initial amount of guest memory", not "Approximate amount of
guest memory" or something like that.

If we decide we want to change it from "Initial amount of guest memory"
to some "do what I mean" behavior, then that behavior needs to be
documented.


This is sufficiently vague that it says "initial" which to me means it's 
not absolute and can change while the VM is running so a change due to fix 
up fits in that in my opinion :-)



Moreover, if DWIM is appropriate for one machine, it's probably
appropriate for all of them.  The CLI should be as consistent as we can
make it across machines.


That's the point. Rummimg e.g. qemu-system-x86_64 -m 1000 does not abort 
but runs the VM with an odd RAM size even though that's not possible on 
real hardware. Other machines should behave the same, within their limits: 
for sam460ex that means we need to truncate memory size to largest valid 
value becuause of SoC limits, for mac_oldworld that means with OpenBIOS it 
will see all RAM and with firmware ROM somewhat less. I've implemented 
that originally both for consistency and user convenience but this was 
"cleaned up" afterwrds and also made impossible to implement again without 
duplicating code in boards or reverting to some previous state and fixing 
the problems in a way that allows my use case as well.


Regards,
BALATON Zoltan

Re: [PATCH v2 03/18] hw/block/nvme: Clean up unused AER definitions

On Wed, Jun 17, 2020 at 2:48 PM Dmitry Fomichev  wrote:
>
> Removed unused struct NvmeAerResult and SMART-related async event
> codes. All other event codes are now categorized by their type.
> This avoids having to define the same values in a single enum,
> NvmeAsyncEventRequest, that is now removed.
>
> Later commits in this series will define additional values in some
> of these enums. No functional change.
>
> Signed-off-by: Dmitry Fomichev 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/block/nvme.h  |  1 -
>  include/block/nvme.h | 43 ++-
>  2 files changed, 22 insertions(+), 22 deletions(-)
>
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 0460cc0e62..4f0dac39ae 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -13,7 +13,6 @@ typedef struct NvmeParams {
>
>  typedef struct NvmeAsyncEvent {
>  QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry;
> -NvmeAerResult result;
>  } NvmeAsyncEvent;
>
>  enum NvmeRequestFlags {
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 9c3a04dcd7..3099df99eb 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -553,28 +553,30 @@ typedef struct NvmeDsmRange {
>  uint64_tslba;
>  } NvmeDsmRange;
>
> -enum NvmeAsyncEventRequest {
> -NVME_AER_TYPE_ERROR = 0,
> -NVME_AER_TYPE_SMART = 1,
> -NVME_AER_TYPE_IO_SPECIFIC   = 6,
> -NVME_AER_TYPE_VENDOR_SPECIFIC   = 7,
> -NVME_AER_INFO_ERR_INVALID_SQ= 0,
> -NVME_AER_INFO_ERR_INVALID_DB= 1,
> -NVME_AER_INFO_ERR_DIAG_FAIL = 2,
> -NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
> -NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR= 4,
> -NVME_AER_INFO_ERR_FW_IMG_LOAD_ERR   = 5,
> -NVME_AER_INFO_SMART_RELIABILITY = 0,
> -NVME_AER_INFO_SMART_TEMP_THRESH = 1,
> -NVME_AER_INFO_SMART_SPARE_THRESH= 2,
> +enum NvmeAsyncEventType {
> +NVME_AER_TYPE_ERROR = 0x00,
> +NVME_AER_TYPE_SMART = 0x01,
> +NVME_AER_TYPE_NOTICE= 0x02,
> +NVME_AER_TYPE_CMDSET_SPECIFIC   = 0x06,
> +NVME_AER_TYPE_VENDOR_SPECIFIC   = 0x07,
>  };
>
> -typedef struct NvmeAerResult {
> -uint8_t event_type;
> -uint8_t event_info;
> -uint8_t log_page;
> -uint8_t resv;
> -} NvmeAerResult;
> +enum NvmeAsyncErrorInfo {
> +NVME_AER_ERR_INVALID_SQ = 0x00,
> +NVME_AER_ERR_INVALID_DB = 0x01,
> +NVME_AER_ERR_DIAG_FAIL  = 0x02,
> +NVME_AER_ERR_PERS_INTERNAL_ERR  = 0x03,
> +NVME_AER_ERR_TRANS_INTERNAL_ERR = 0x04,
> +NVME_AER_ERR_FW_IMG_LOAD_ERR= 0x05,
> +};
> +
> +enum NvmeAsyncNoticeInfo {
> +NVME_AER_NOTICE_NS_CHANGED  = 0x00,
> +};
> +
> +enum NvmeAsyncEventCfg {
> +NVME_AEN_CFG_NS_ATTR= 1 << 8,
> +};
>
>  typedef struct NvmeCqe {
>  union {
> @@ -881,7 +883,6 @@ enum NvmeIdNsDps {
>
>  static inline void _nvme_check_size(void)
>  {
> -QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
>  QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
> --
> 2.21.0
>
>

Re: [PATCH v2 02/18] hw/block/nvme: Define 64 bit cqe.result

On Wed, Jun 17, 2020 at 2:44 PM Dmitry Fomichev  wrote:
>
> From: Ajay Joshi 
>
> A new write command, Zone Append, is added as a part of Zoned
> Namespace Command Set. Upon successful completion of this command,
> the controller returns the start LBA of the performed write operation
> in cqe.result field. Therefore, the maximum size of this variable
> needs to be changed from 32 to 64 bit, consuming the reserved 32 bit
> field that follows the result in CQE struct. Since the existing
> commands are expected to return a 32 bit LE value, two separate
> variables, result32 and result64, are now kept in a union.
>
> Signed-off-by: Ajay Joshi 
> Signed-off-by: Dmitry Fomichev 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  block/nvme.c | 2 +-
>  block/trace-events   | 2 +-
>  hw/block/nvme.c  | 6 +++---
>  include/block/nvme.h | 6 --
>  4 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/block/nvme.c b/block/nvme.c
> index eb2f54dd9d..ca245ec574 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -287,7 +287,7 @@ static inline int nvme_translate_error(const NvmeCqe *c)
>  {
>  uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF;
>  if (status) {
> -trace_nvme_error(le32_to_cpu(c->result),
> +trace_nvme_error(le64_to_cpu(c->result64),
>   le16_to_cpu(c->sq_head),
>   le16_to_cpu(c->sq_id),
>   le16_to_cpu(c->cid),
> diff --git a/block/trace-events b/block/trace-events
> index 29dff8881c..05c1393943 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -156,7 +156,7 @@ vxhs_get_creds(const char *cacert, const char 
> *client_key, const char *client_ce
>  # nvme.c
>  nvme_kick(void *s, int queue) "s %p queue %d"
>  nvme_dma_flush_queue_wait(void *s) "s %p"
> -nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) 
> "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
> +nvme_error(uint64_t cmd_specific, int sq_head, int sqid, int cid, int 
> status) "cmd_specific %ld sq_head %d sqid %d cid %d status 0x%x"
>  nvme_process_completion(void *s, int index, int inflight) "s %p queue %d 
> inflight %d"
>  nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d"
>  nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3ed9f3d321..a1bbc9acde 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -823,7 +823,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>
> -req->cqe.result = result;
> +req->cqe.result32 = result;
>  return NVME_SUCCESS;
>  }
>
> @@ -859,8 +859,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  ((dw11 >> 16) & 0x) + 1,
>  n->params.max_ioqpairs,
>  n->params.max_ioqpairs);
> -req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
> -  ((n->params.max_ioqpairs - 1) << 16));
> +req->cqe.result32 = cpu_to_le32((n->params.max_ioqpairs - 1) |
> +((n->params.max_ioqpairs - 1) << 
> 16));
>  break;
>  case NVME_TIMESTAMP:
>  return nvme_set_feature_timestamp(n, cmd);
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 1720ee1d51..9c3a04dcd7 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -577,8 +577,10 @@ typedef struct NvmeAerResult {
>  } NvmeAerResult;
>
>  typedef struct NvmeCqe {
> -uint32_tresult;
> -uint32_trsvd;
> +union {
> +uint64_t result64;
> +uint32_t result32;
> +};
>  uint16_tsq_head;
>  uint16_tsq_id;
>  uint16_tcid;
> --
> 2.21.0
>
>

Re: [PATCH v2 01/18] hw/block/nvme: Move NvmeRequest has_sg field to a bit flag

On Wed, Jun 17, 2020 at 2:43 PM Dmitry Fomichev  wrote:
>
> In addition to the existing has_sg flag, a few more Boolean
> NvmeRequest flags are going to be introduced in subsequent patches.
> Convert "has_sg" variable to "flags" and define NvmeRequestFlags
> enum for individual flag values.
>
> Signed-off-by: Dmitry Fomichev 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/block/nvme.c | 8 +++-
>  hw/block/nvme.h | 6 +-
>  2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 1aee042d4c..3ed9f3d321 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -350,7 +350,7 @@ static void nvme_rw_cb(void *opaque, int ret)
>  block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
>  req->status = NVME_INTERNAL_DEV_ERROR;
>  }
> -if (req->has_sg) {
> +if (req->flags & NVME_REQ_FLG_HAS_SG) {
>  qemu_sglist_destroy(&req->qsg);
>  }
>  nvme_enqueue_req_completion(cq, req);
> @@ -359,7 +359,6 @@ static void nvme_rw_cb(void *opaque, int ret)
>  static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>  NvmeRequest *req)
>  {
> -req->has_sg = false;
>  block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>   BLOCK_ACCT_FLUSH);
>  req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
> @@ -383,7 +382,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, 
> NvmeNamespace *ns, NvmeCmd *cmd,
>  return NVME_LBA_RANGE | NVME_DNR;
>  }
>
> -req->has_sg = false;
>  block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>   BLOCK_ACCT_WRITE);
>  req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
> @@ -422,14 +420,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>
>  dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
>  if (req->qsg.nsg > 0) {
> -req->has_sg = true;
> +req->flags |= NVME_REQ_FLG_HAS_SG;
>  req->aiocb = is_write ?
>  dma_blk_write(n->conf.blk, &req->qsg, data_offset, 
> BDRV_SECTOR_SIZE,
>nvme_rw_cb, req) :
>  dma_blk_read(n->conf.blk, &req->qsg, data_offset, 
> BDRV_SECTOR_SIZE,
>   nvme_rw_cb, req);
>  } else {
> -req->has_sg = false;
>  req->aiocb = is_write ?
>  blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, 
> nvme_rw_cb,
>  req) :
> @@ -917,6 +914,7 @@ static void nvme_process_sq(void *opaque)
>  QTAILQ_REMOVE(&sq->req_list, req, entry);
>  QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
>  memset(&req->cqe, 0, sizeof(req->cqe));
> +req->flags = 0;
>  req->cqe.cid = cmd.cid;
>
>  status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 1d30c0bca2..0460cc0e62 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -16,11 +16,15 @@ typedef struct NvmeAsyncEvent {
>  NvmeAerResult result;
>  } NvmeAsyncEvent;
>
> +enum NvmeRequestFlags {
> +NVME_REQ_FLG_HAS_SG   = 1 << 0,
> +};
> +
>  typedef struct NvmeRequest {
>  struct NvmeSQueue   *sq;
>  BlockAIOCB  *aiocb;
>  uint16_tstatus;
> -boolhas_sg;
> +uint16_tflags;
>  NvmeCqe cqe;
>  BlockAcctCookie acct;
>  QEMUSGList  qsg;
> --
> 2.21.0
>
>

Re: [PATCH v3 2/3] RISC-V: Copy the fdt in dram instead of ROM

2020-06-29 Thread Bin Meng

On Tue, Jun 30, 2020 at 2:50 AM Atish Patra  wrote:
>
> On Sat, Jun 27, 2020 at 2:55 AM Bin Meng  wrote:
> >
> > On Sat, Jun 27, 2020 at 12:37 PM Atish Patra  wrote:
> > >
> > > On Fri, Jun 26, 2020 at 7:54 PM Bin Meng  wrote:
> > > >
> > > > Hi Atish,
> > > >
> > > > On Sat, Jun 27, 2020 at 12:58 AM Atish Patra  
> > > > wrote:
> > > > >
> > > > > On Fri, Jun 26, 2020 at 4:50 AM Bin Meng  wrote:
> > > > > >
> > > > > > Hi Atish,
> > > > > >
> > > > > > On Fri, Jun 26, 2020 at 8:33 AM Atish Patra  
> > > > > > wrote:
> > > > > > >
> > > > > > > Currently, the fdt is copied to the ROM after the reset vector. 
> > > > > > > The firmware
> > > > > > > has to copy it to DRAM. Instead of this, directly copy the device 
> > > > > > > tree to a
> > > > > > > pre-computed dram address. The device tree load address should be 
> > > > > > > as far as
> > > > > > > possible from kernel and initrd images. That's why it is kept at 
> > > > > > > the end of
> > > > > > > the DRAM or 4GB whichever is lesser.
> > > > > > >
> > > > > > > Signed-off-by: Atish Patra 
> > > > > > > Reviewed-by: Alistair Francis 
> > > > > > > ---
> > > > > > >  hw/riscv/boot.c | 57 
> > > > > > > +
> > > > > > >  hw/riscv/sifive_u.c | 32 +++
> > > > > > >  hw/riscv/spike.c|  7 -
> > > > > > >  hw/riscv/virt.c |  7 -
> > > > > > >  include/hw/riscv/boot.h |  5 +++-
> > > > > > >  5 files changed, 71 insertions(+), 37 deletions(-)
> > > > > > >
> > > > > > > diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> > > > > > > index 482b78147993..02c4018a8105 100644
> > > > > > > --- a/hw/riscv/boot.c
> > > > > > > +++ b/hw/riscv/boot.c
> > > > > > > @@ -159,44 +159,67 @@ hwaddr riscv_load_initrd(const char 
> > > > > > > *filename, uint64_t mem_size,
> > > > > > >  return *start + size;
> > > > > > >  }
> > > > > > >
> > > > > > > +hwaddr riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void 
> > > > > > > *fdt)
> > > > > >
> > > > > > I think we should use uint32_t for the return value type, since it 
> > > > > > is
> > > > > > always below 4GiB
> > > > > >
> > > > >
> > > > > You are correct. I will update it. Thanks.
> > > > > > > +{
> > > > > > > +hwaddr temp, fdt_addr;
> > > > > > > +hwaddr dram_end = dram_base + mem_size;
> > > > > > > +int fdtsize = fdt_totalsize(fdt);
> > > > > > > +
> > > > > > > +if (fdtsize <= 0) {
> > > > > > > +error_report("invalid device-tree");
> > > > > > > +exit(1);
> > > > > > > +}
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * We should put fdt as far as possible to avoid 
> > > > > > > kernel/initrd overwriting
> > > > > > > + * its content. But it should be addressable by 32 bit 
> > > > > > > system as well.
> > > > > > > + * Thus, put it at an aligned address that less than fdt 
> > > > > > > size from end of
> > > > > > > + * dram or 4GB whichever is lesser.
> > > > > > > + */
> > > > > > > +temp = MIN(dram_end, 4096 * MiB);
> > > > > > > +fdt_addr = QEMU_ALIGN_DOWN(temp - fdtsize, 2 * MiB);
> > > > > > > +
> > > > > > > +fdt_pack(fdt);
> > > > > > > +/* copy in the device tree */
> > > > > > > +qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
> > > > > >
> > > > > > Use fdtsize
> > > > > >
> > > > >
> > > > > Sure.
> > > > > > > +
> > > > > > > +rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
> > > > > > > +  &address_space_memory);
> > > > > > > +
> > > > > > > +return fdt_addr;
> > > > > > > +}
> > > > > > > +
> > > > > > >  void riscv_setup_rom_reset_vec(hwaddr start_addr, hwaddr 
> > > > > > > rom_base,
> > > > > > > -   hwaddr rom_size, void *fdt)
> > > > > > > +   hwaddr rom_size,
> > > > > > > +   hwaddr fdt_load_addr, void *fdt)
> > > > > > >  {
> > > > > > >  int i;
> > > > > > >  /* reset vector */
> > > > > > > -uint32_t reset_vec[8] = {
> > > > > > > -0x0297,  /* 1:  auipc  t0, 
> > > > > > > %pcrel_hi(dtb) */
> > > > > > > -0x02028593,  /* addi   a1, t0, 
> > > > > > > %pcrel_lo(1b) */
> > > > > > > +uint32_t reset_vec[10] = {
> > > > > > > +0x0297,  /* 1:  auipc  t0, 
> > > > > > > %pcrel_hi(fw_dyn) */
> > > > > >
> > > > > > fw_dync should be introduced in the next patch, so this line should 
> > > > > > be
> > > > > > kept unchanged in this patch
> > > > > >
> > > > > As we have fdt_laddr, keeping it unchanged may create confusion with
> > > > > another dtb label.
> > > > > I will change the label to "end" in the next version.
> > > > >
> > > > > > >  0xf1402573,  /* csrr   a0, mhartid  
> > > > > > > */
> > > > > > >  #if defined(TARGET_RISCV32)
> > > > > > > +0x0202a583,  /* lw a1, 32(t0) */
> > > > > > >  0x0182a283,

Re: [PATCH] hw/ppc/ppc4xx: Only accept (combination of) pow2 DDR sizes


On Mon, 29 Jun 2020, Philippe Mathieu-Daudé wrote:

Use popcount instruction to count the number of bits set in
the RAM size. Allow at most 1 bit for each bank. This avoid
using invalid hardware configurations.

Signed-off-by: Philippe Mathieu-Daudé 
---
hw/ppc/ppc4xx_devs.c | 9 +
1 file changed, 9 insertions(+)

diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index f1651e04d9..c2484a5695 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -687,6 +687,15 @@ void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
int i;
int j;

+if (ctpop64(size_left) > nr_banks) {
+if (nr_banks) {
+error_report("RAM size must be a power of 2");
+} else {
+error_report("RAM size must be the combination of %d powers of 2",
+ nr_banks);
+}
+exit(1);


What is this supposed to fix? Is it a good idea to exit() from a helper? I 
don't think so because the board code should be in control in my opinion 
to decide what it can work with or what it cannot handle and wants to 
abort. So maybe it's better to return error in some way and let board code 
handle it. (We already exit from this function but that was added in 
commit a0258e4afa1 when the size fix up was removed due to memdev. That 
exit uses EXIT_FAILURE constant.)


Regards,
BALATON Zoltan


+}
for (i = 0; i < nr_banks; i++) {
for (j = 0; sdram_bank_sizes[j] != 0; j++) {
bank_size = sdram_bank_sizes[j];

[PATCH 8/9] Skipping sys/syscall.h inclusion as only used in qemu_signalfd anyway

>From b821b7e9bbf1f327058ee858a92c7a7ee6740e63 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:20:39 +
Subject: [PATCH 8/9] Skipping sys/syscall.h inclusion as only used in
 qemu_signalfd anyway.

Signed-off-by: David Carlier 
---
 util/compatfd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/util/compatfd.c b/util/compatfd.c
index c296f55d14..ee47dd8089 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -16,7 +16,9 @@
 #include "qemu/osdep.h"
 #include "qemu/thread.h"

+#if defined(CONFIG_SIGNALFD)
 #include 
+#endif

 struct sigfd_compat_info
 {
--
2.26.0

[PATCH 5/9] Define SIGIO constant with SIGPOLL equivalence

>From 93c001e7da19c76a73a687dc6584bc31385a2693 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:18:32 +
Subject: [PATCH 5/9] Define SIGIO constant with SIGPOLL equivalence,

Signed-off-by: David Carlier 
---
 include/qemu/osdep.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 6e0cf9132d..e090ead826 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -425,6 +425,10 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define HAVE_CHARDEV_PARPORT 1
 #endif

+#if defined(__HAIKU__)
+#define SIGIO SIGPOLL
+#endif
+
 #if defined(CONFIG_LINUX)
 #ifndef BUS_MCEERR_AR
 #define BUS_MCEERR_AR 4
--
2.26.0

[PATCH 6/9] Including endian.h for bswap operations

>From 2f65cab41a8f4508532b3893815f222bf2293463 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:19:34 +
Subject: [PATCH 6/9] Including endian.h for bswap operations.

Signed-off-by: David Carlier 
---
 include/qemu/bswap.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 2a9f3fe783..1d3e4c24e4 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -8,6 +8,8 @@
 # include 
 #elif defined(__FreeBSD__)
 # include 
+#elif defined(__HAIKU__)
+# include 
 #elif defined(CONFIG_BYTESWAP_H)
 # include 

-- 
2.26.0

[PATCH 9/9] Implementing qemu_init_exec_dir.

>From fcdb25804af98329b52a04e7a4e5191135aac4f6 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:36:03 +
Subject: [PATCH 9/9] Implementing qemu_init_exec_dir.

Signed-off-by: David Carlier 
---
 util/oslib-posix.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 7ad9195c44..72907d4d7f 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -60,6 +60,10 @@
 #include 
 #endif

+#ifdef __HAIKU__
+#include 
+#endif
+
 #include "qemu/mmap-alloc.h"

 #ifdef CONFIG_DEBUG_STACK_USAGE
@@ -389,6 +393,21 @@ void qemu_init_exec_dir(const char *argv0)
 }
 }
 }
+#elif defined(__HAIKU__)
+{
+image_info ii;
+int32_t c = 0;
+
+*buf = '\0';
+while (get_next_image_info(0, &c, &ii) == B_OK) {
+if (ii.type == B_APP_IMAGE) {
+strncpy(buf, ii.name, sizeof(buf));
+buf[sizeof(buf) - 1] = 0;
+p = buf;
+break;
+}
+}
+}
 #endif
 /* If we don't have any way of figuring out the actual executable
location then try argv[0].  */
--
2.26.0

[PATCH 2/9] Enabling *pty api

>From 8b205a027dacad08ce9187474d9490667904a4e2 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:14:53 +
Subject: [PATCH 2/9] Enabling *pty api

Signed-off-by: David Carlier 
---
 configure   | 9 +
 util/qemu-openpty.c | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index d81931ba5e..50b559238f 100755
--- a/configure
+++ b/configure
@@ -2386,6 +2386,12 @@ else
   l2tpv3=no
 fi

+if check_include "pty.h" ; then
+  pty_h=yes
+else
+  pty_h=no
+fi
+
 #
 # vhost interdependencies and host support

@@ -7856,6 +7862,9 @@ fi
 if test "$sheepdog" = "yes" ; then
   echo "CONFIG_SHEEPDOG=y" >> $config_host_mak
 fi
+if test "$pty_h" = "yes" ; then
+  echo "CONFIG_PTY=y" >> $config_host_mak
+fi
 if test "$fuzzing" = "yes" ; then
   if test "$have_fuzzer" = "yes"; then
 FUZZ_LDFLAGS=" -fsanitize=address,fuzzer"
diff --git a/util/qemu-openpty.c b/util/qemu-openpty.c
index 2e8b43bdf5..9d8ad6905e 100644
--- a/util/qemu-openpty.c
+++ b/util/qemu-openpty.c
@@ -35,7 +35,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"

-#if defined(__GLIBC__)
+#if defined CONFIG_PTY
 # include 
 #elif defined CONFIG_BSD
 # include 
--
2.26.0

[PATCH 4/9] Checking sys/signal.h presence.

>From 9d43c8cd1611d0347db9066b1df1dc34431b2028 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:17:53 +
Subject: [PATCH 4/9] Checking sys/signal.h presence.

Signed-off-by: David Carlier 
---
 configure   | 8 
 hw/xen/xen-legacy-backend.c | 1 -
 include/qemu/osdep.h| 2 +-
 util/oslib-posix.c  | 1 -
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 0b278f1646..83d8365d23 100755
--- a/configure
+++ b/configure
@@ -3212,6 +3212,11 @@ if ! check_include "ifaddrs.h" ; then
   have_ifaddrs_h=no
 fi

+have_sys_signal_h=no
+if check_include "sys/signal.h" ; then
+  have_sys_signal_h=yes
+fi
+
 ##
 # VTE probe

@@ -7398,6 +7403,9 @@ fi
 if test "$have_broken_size_max" = "yes" ; then
 echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak
 fi
+if test "$have_sys_signal_h" = "yes" ; then
+echo "CONFIG_SYS_SIGNAL=y" >> $config_host_mak
+fi

 # Work around a system header bug with some kernel/XFS header
 # versions where they both try to define 'struct fsxattr':
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 2335ee2e65..92f08fca29 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -23,7 +23,6 @@
  */

 #include "qemu/osdep.h"
-#include 

 #include "hw/sysbus.h"
 #include "hw/boards.h"
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 0d26a1b9bd..6e0cf9132d 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -104,7 +104,7 @@ extern int daemon(int, int);
 #include 
 #include 

-#ifdef __OpenBSD__
+#ifdef CONFIG_SYS_SIGNAL
 #include 
 #endif

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 39ddc77c85..7ad9195c44 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -38,7 +38,6 @@
 #include "qemu/sockets.h"
 #include "qemu/thread.h"
 #include 
-#include 
 #include "qemu/cutils.h"

 #ifdef CONFIG_LINUX
--
2.26.0

PATCH [0/9]: haiku build fix

>From fcdb25804af98329b52a04e7a4e5191135aac4f6 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:36:32 +
Subject: [PATCH 0/9] *** SUBJECT HERE ***

*** BLURB HERE ***

David Carlier (9):
  Enabling BSD symbols.
  Enabling *pty api
  Checking mlockall symbol presence.
  Checking sys/signal.h presence.
  Define SIGIO constant with SIGPOLL equivalence,
  Including endian.h for bswap operations.
  Skipping drm build, unsupported.
  Skipping sys/syscall.h inclusion as only used in qemu_signalfd anyway.
  Implementing qemu_init_exec_dir.

 configure   | 36 ++--
 hw/xen/xen-legacy-backend.c |  1 -
 include/qemu/bswap.h|  2 ++
 include/qemu/osdep.h|  6 +-
 os-posix.c  |  4 
 util/Makefile.objs  |  2 +-
 util/compatfd.c |  2 ++
 util/oslib-posix.c  | 20 +++-
 util/qemu-openpty.c |  2 +-
 9 files changed, 68 insertions(+), 7 deletions(-)

-- 
2.26.0

[PATC 7/9] Skipping drm build, unsupported

>From 157a0374093371719de42e99364352d64190f52a Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:20:06 +
Subject: [PATCH 7/9] Skipping drm build, unsupported.

Signed-off-by: David Carlier 
---
 util/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/Makefile.objs b/util/Makefile.objs
index cc5e37177a..faebc13fac 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -39,7 +39,7 @@ util-obj-y += qsp.o
 util-obj-y += range.o
 util-obj-y += stats64.o
 util-obj-y += systemd.o
-util-obj-$(CONFIG_POSIX) += drm.o
+util-obj-$(CONFIG_LINUX) += drm.o
 util-obj-y += guest-random.o
 util-obj-$(CONFIG_GIO) += dbus.o
 dbus.o-cflags = $(GIO_CFLAGS)
--
2.26.0

[PATCH 1/9] Enabling BSD symbols

>From 5c6022f21289eb6e78e93d584c766db82165dced Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:13:35 +
Subject: [PATCH 1/9] Enabling BSD symbols.

Signed-off-by: David Carlier 
---
 configure | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 4a22dcd563..d81931ba5e 100755
--- a/configure
+++ b/configure
@@ -903,8 +903,8 @@ SunOS)
 ;;
 Haiku)
   haiku="yes"
-  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS $QEMU_CFLAGS"
-  LIBS="-lposix_error_mapper -lnetwork $LIBS"
+  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -DBSD_SOURCE $QEMU_CFLAGS"
+  LIBS="-lposix_error_mapper -lnetwork -lbsd $LIBS"
 ;;
 Linux)
   audio_drv_list="try-pa oss"
--
2.26.0

[PATCH 3/9] Checking mlockall symbol presence

>From ac450c4abe03e0e461fede18727500e616d9f7e2 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Mon, 29 Jun 2020 22:15:21 +
Subject: [PATCH 3/9] Checking mlockall symbol presence.

Signed-off-by: David Carlier 
---
 configure  | 15 +++
 os-posix.c |  4 
 2 files changed, 19 insertions(+)

diff --git a/configure b/configure
index 50b559238f..0b278f1646 100755
--- a/configure
+++ b/configure
@@ -2392,6 +2392,18 @@ else
   pty_h=no
 fi

+cat > $TMPC <
+int main(int argc, char *argv[]) {
+return mlockall(MCL_FUTURE);
+}
+EOF
+if compile_prog "" "" ; then
+  have_mlockall=yes
+else
+  have_mlockall=no
+fi
+
 #
 # vhost interdependencies and host support

@@ -7865,6 +7877,9 @@ fi
 if test "$pty_h" = "yes" ; then
   echo "CONFIG_PTY=y" >> $config_host_mak
 fi
+if test "$have_mlockall" = "yes" ; then
+  echo "CONFIG_MLOCKALL=y" >> $config_host_mak
+fi
 if test "$fuzzing" = "yes" ; then
   if test "$have_fuzzer" = "yes"; then
 FUZZ_LDFLAGS=" -fsanitize=address,fuzzer"
diff --git a/os-posix.c b/os-posix.c
index 3cd52e1e70..e02b566940 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -337,6 +337,7 @@ bool is_daemonized(void)

 int os_mlock(void)
 {
+#if defined CONFIG_MLOCKALL
 int ret = 0;

 ret = mlockall(MCL_CURRENT | MCL_FUTURE);
@@ -345,4 +346,7 @@ int os_mlock(void)
 }

 return ret;
+#else
+return -ENOSYS;
+#endif
 }
--
2.26.0

Re: [PATCH 04/10] spice: Move all the spice-related code in spice-app.so

  Hi,

>  common-obj-y = audio.o audio_legacy.o noaudio.o wavaudio.o mixeng.o
> -common-obj-$(CONFIG_SPICE) += spiceaudio.o
> +spice-app.mo-objs += ../audio/spiceaudio.o

Hmm.  audio/audio.c will try to load audio-${backend}.so when you run
qemu -audiodev ${backend}, so I suspect this is not going to work ...

I guess we should better try to tackle modules depending on modules
problem.  Given that g_module_open() doesn't support a custom shared
object search path I suspect we can't simply link audio-spice.so against
ui-spice-core.so and let the dynamic linker handle this for us.
Probably qemu needs a list of dependencies it'll check on module load
...

take care,
  Gerd

Re: [PATCH 05/10] build: Avoid build failure when building drivers as modules

  Hi,

> > +#
> > +# common-obj-m has some crap here, probably as side effect from
> > +# filling obj-y.  Clear it.  Fixes suspicious dependency errors when
> > +# building devices as modules.
> > +#
> > +common-obj-m :=
> 
> This comment doesn't fill me with confidence - makes it feel like there's
> some more important root cause that needs addressing instead.

It's my fault, see also
  
https://patchwork.ozlabs.org/project/qemu-devel/patch/20200624131045.14512-5-kra...@redhat.com/

I think the underlying problem is that the functions building *-m
variables do not properly handle per-target objects.  Which has no bad
side effects as long as we never recurse into hw/, but if we want build
devices as modules we have to ...

Unless we want build per-target code as module the above should do the
trick I think.

Better suggestions are welcome of course.

take care,
  Gerd

Re: [PATCH 08/10] build: Add SPICE_CFLAGS and SPICE_LIBS to relevant files

  Hi,

>  obj-$(CONFIG_PC) += pc.o pc_sysfw.o
> +pc.o-cflags += $(SPICE_CFLAGS)

Hmm, looks strange.  Why does pc.c need spice?

> +qmp-cmds.o-cflags += $(SPICE_CFLAGS)
> +hmp-cmds.o-cflags += $(SPICE_CFLAGS)

spice monitor commands need this I guess?

> +misc.o-cflags += $(SPICE_CFLAGS)

Why this?

> +vl.o-cflags := $(GPROF_CFLAGS) $(SDL_CFLAGS) $(SPICE_CFLAGS)

spice init probably.

> -stub-obj-y += vmgenid.o
>  stub-obj-y += sysbus.o
>  stub-obj-y += tpm.o
>  stub-obj-y += trace-control.o
> +stub-obj-y += vmgenid.o

Huh?

> -spice-app.mo-cflags := $(GIO_CFLAGS)
> -spice-app.mo-libs := $(GIO_LIBS)
> +spice-app.mo-cflags := $(GIO_CFLAGS) $(SPICE_CFLAGS)
> +spice-app.mo-libs := $(GIO_LIBS) $(SPICE_LIBS)

Good.

> --- a/util/module.c
> +++ b/util/module.c
> @@ -22,11 +22,11 @@
>  #ifdef CONFIG_MODULE_UPGRADES
>  #include "qemu-version.h"
>  #endif
> -#ifdef CONFIG_TRACE_RECORDER
>  #include "trace/recorder.h"
> -#endif
>  
>  
> +RECORDER(modules, 16, "QEMU load modules");
> +
>  typedef struct ModuleEntry
>  {
>  void (*init)(void);
> @@ -85,6 +85,15 @@ void register_dso_module_init(void (*fn)(void), 
> module_init_type type)
>  {
>  ModuleEntry *e;
>  
> +#ifdef CONFIG_TRACE_RECORDER
> +static const char *name[] = {
> +"MIGRATION", "BLOCK", "OPTS", "QOM",
> +"TRACE", "XEN_BACKEND", "LIBQOS", "FUZZ_TARGET",
> +"MAX"
> +};
> +#endif
> +record(modules, "Register DSO module init %p type %u %+s",
> +   fn, type, name[type]);
>  init_lists();
>  
>  e = g_malloc0(sizeof(*e));

Unrelated change.

(the recorder stuff should probably integrate with qemu trace support,
so you can record any trace point qemu has, but that'll be another patch
series ...)

take care,
  Gerd

Re: [PATCH 09/10] spice: Put spice functions in a separate load module

  Hi,

> > If so the more normal approach would be to have a struct defining
> > a set of callbacks, that can be registered. Or if there's a natural
> > fit with QOM, then a QOM interface that can then have a QOM object
> > impl registered as a singleton.
> 
> That was my second attempt (after the weak symbols). I cleaned it up a bit
> and put it here: https://github.com/c3d/qemu/commits/spice-vtable.

I think this is the direction we should take.

> What made me switch to the approach in this series is the following
> considerations:
> 
> - A vtable is useful if there can be multiple values for a method, e.g. to
>   implement inheritance, or if you have multiple instances. This is not the
>   case here.

Well, we'll have two.  The normal functions.  And the stubs.

The stubs are inline functions right now, in include/ui/qemu-spice.h, in
the !CONFIG_SPICE section.  We can turn them into normal functions, move
them to some C file.  Let QemuSpiceOpts function pointers point to the
stubs initially.  When spice initializes (no matter whenever modular or
not) it'll set QemuSpiceOpts to the normal implementation.

That way we'll also remove some spice #ifdefs as part of the spice
modularization effort.

Things like the "using_spice" variable which don't depend on the spice
shared libraries can also be moved to the new C file with the spice
stubs.

I don't think we need to hide QemuSpiceOpts with inline functions like
qemu_spice_migrate_info().  I would simply use ...

struct QemuSpiceOps {
[ ... ]
int (*migrate_info)(...);
[ ... ]
} qemu_spice;

... then change the ...

qemu_spice_migrate_info(...)

.. callsites into ...

qemu_spice.migrate_info(...)

> - Overloading QOM for that purpose looked more confusing than anything else.
>   It looked like I was mixing unrelated concepts. Maybe that's just me.

Hmm?  Not sure what you mean.  There is no need for QOM here (and I
can't see anything like that in your spice-vtable branch either).

> - The required change with a vtable ends up being more extensive. Instead of
>   changing a single line to put an entry point in a DSO, you need to create
>   the vtable, add functions to it, add a register function, etc. I was
>   looking for an easier and more scalable way.

IMHO it isn't too much overhead, and I find the code is more readable
that way.

> - In particular, with a vtable, you cannot take advantage of the syntactic
>   trick I used here, which is that foo(x) is a shortcut for (*foo)(x).
>   So for a vtable, you need to manually write wrappers.

See above, I don't think we need wrappers.

take care,
  Gerd

QEMU | Pipeline #161336114 has failed for master | fc1bff95

2020-06-29 Thread GitLab via



Your pipeline has failed.

Project: QEMU ( https://gitlab.com/qemu-project/qemu )
Branch: master ( https://gitlab.com/qemu-project/qemu/-/commits/master )

Commit: fc1bff95 ( 
https://gitlab.com/qemu-project/qemu/-/commit/fc1bff958998910ec8d25db86cd2f53ff125f7ab
 )
Commit Message: hw/misc/pca9552: Add missing TypeInfo::class_si...
Commit Author: philmd ( https://gitlab.com/philmd )
Committed by: Peter Maydell ( https://gitlab.com/pm215 )

Pipeline #161336114 ( 
https://gitlab.com/qemu-project/qemu/-/pipelines/161336114 ) triggered by Alex 
Bennée ( https://gitlab.com/stsquad )
had 2 failed builds.

Job #616493725 ( https://gitlab.com/qemu-project/qemu/-/jobs/616493725/raw )

Stage: test
Name: build-disabled
Trace: qemu-system-i386: falling back to tcg
Could not access KVM kernel module: No such file or directory
qemu-system-i386: -accel kvm: failed to initialize kvm: No such file or 
directory
qemu-system-i386: falling back to tcg
Could not access KVM kernel module: No such file or directory
qemu-system-i386: -accel kvm: failed to initialize kvm: No such file or 
directory
qemu-system-i386: falling back to tcg
  TESTcheck-qtest-i386: tests/qtest/device-introspect-test
  TESTcheck-qtest-i386: tests/qtest/machine-none-test
  TESTcheck-qtest-i386: tests/qtest/qmp-test
  TESTcheck-qtest-i386: tests/qtest/qmp-cmd-test
  TESTcheck-qtest-i386: tests/qtest/qom-test
  TESTcheck-qtest-i386: tests/qtest/test-hmp
  TESTcheck-qtest-i386: tests/qtest/qos-test
  TESTcheck-qtest-mips64: tests/qtest/endianness-test
  TESTcheck-qtest-mips64: tests/qtest/display-vga-test
  TESTcheck-qtest-mips64: tests/qtest/cdrom-test
  TESTcheck-qtest-mips64: tests/qtest/device-introspect-test
  TESTcheck-qtest-mips64: tests/qtest/machine-none-test
  TESTcheck-qtest-mips64: tests/qtest/qmp-test
  TESTcheck-qtest-mips64: tests/qtest/qmp-cmd-test
  TESTcheck-qtest-mips64: tests/qtest/qom-test
  TESTcheck-qtest-mips64: tests/qtest/test-hmp
  TESTcheck-qtest-mips64: tests/qtest/qos-test
  TESTcheck-qtest-ppc64: tests/qtest/machine-none-test
  TESTcheck-qtest-ppc64: tests/qtest/qmp-test
  TESTcheck-qtest-ppc64: tests/qtest/qmp-cmd-test
  TESTcheck-qtest-ppc64: tests/qtest/qom-test
section_end:1593469431:step_script
[0K[31;1mERROR: Job failed: execution took longer than 1h0m0s seconds
[0;m

Job #616493726 ( https://gitlab.com/qemu-project/qemu/-/jobs/616493726/raw )

Stage: test
Name: build-tcg-disabled
Trace: 208  ...[21:45:15] ...  
208  [32mpass  [0m [21:45:15] [21:45:15]   0s   
209  ...[21:45:15] ...  
209  [32mpass  [0m [21:45:15] [21:45:16]   1s   
215  ...[21:45:16] ...  
215  [32mpass  [0m [21:45:16] [21:46:36]  79s   
216  ...[21:46:36] ...  
216  [32mpass  [0m [21:46:36] [21:46:43]   7s   
218  ...[21:46:43] ...  
218  [32mpass  [0m [21:46:43] [21:46:44]   1s   
222  ...[21:46:44] ...  
222  [32mpass  [0m [21:46:44] [21:46:45]   1s   
227  ...[21:46:45] ...  
227  [32mpass  [0m [21:46:45] [21:46:46]   1s   
234  ...[21:46:46] ...  
234  [32mpass  [0m [21:46:46] [21:46:46]   0s   
246  ...[21:46:46] ...  
246  [32mpass  [0m [21:46:46] [21:46:47]   1s   
247  ...[21:46:47] ...  
247  [32mpass  [0m [21:46:47] [21:46:48]   1s   
248  ...[21:46:48] ...  
248  [32mpass  [0m [21:46:48] [21:46:48]   0s   
250  ...[21:46:48] ...  
250  [32mpass  [0m [21:46:48] [21:46:49]   1s   
254  ...[21:46:49] ...  
254  [32mpass  [0m [21:46:49] [21:46:49]   0s   
255  ...[21:46:49] ...  
255  [32mpass  [0m [21:46:49] [21:46:51]   2s   
257  ...[21:46:51] ...  
257  [32mpass  [0m [21:46:51] [21:47:02]  11s   
258  ...[21:47:02] ...  
258  [32mpass  [0m [21:47:02] [21:47:02]   0s   
260  ...[21:47:02] ...  
260  [32mpass  [0m [21:47:02] [21:47:03]   1s   
261  ...[21:47:03] ...  
261  [32mpass  [0m [21:47:03] [21:47:29]  26s   
262  ...[21:47:29] ...  
262  [32mpass  [0m [21:47:29] [21:47:29]   0s

[PATCH v2 2/4] hw/block/nvme: support multiple namespaces

From: Klaus Jensen 

This adds support for multiple namespaces by introducing a new 'nvme-ns'
device model. The nvme device creates a bus named from the device name
('id'). The nvme-ns devices then connect to this and registers
themselves with the nvme device.

This changes how an nvme device is created. Example with two namespaces:

  -drive file=nvme0n1.img,if=none,id=disk1
  -drive file=nvme0n2.img,if=none,id=disk2
  -device nvme,serial=deadbeef,id=nvme0
  -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
  -device nvme-ns,drive=disk2,bus=nvme0,nsid=2

The drive property is kept on the nvme device to keep the change
backward compatible, but the property is now optional. Specifying a
drive for the nvme device will always create the namespace with nsid 1.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c | 172 +++
 hw/block/nvme-ns.h |  66 +++
 hw/block/nvme.c| 255 ++---
 hw/block/nvme.h|  44 +++
 hw/block/trace-events  |   8 +-
 6 files changed, 431 insertions(+), 116 deletions(-)
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index 8855c2265639..8c159bc56630 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -13,6 +13,6 @@ common-obj-$(CONFIG_SH4) += tc58128.o
 
 obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
 obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
+common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
 
 obj-y += dataplane/
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
new file mode 100644
index ..28ce5e011568
--- /dev/null
+++ b/hw/block/nvme-ns.c
@@ -0,0 +1,172 @@
+/*
+ * QEMU NVM Express Virtual Namespace
+ *
+ * Copyright (c) 2019 CNEX Labs
+ * Copyright (c) 2020 Samsung Electronics
+ *
+ * Authors:
+ *  Klaus Jensen  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+
+#include "nvme.h"
+#include "nvme-ns.h"
+
+static void nvme_ns_init(NvmeNamespace *ns)
+{
+NvmeIdNs *id_ns = &ns->id_ns;
+
+id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+
+id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
+
+/* no thin provisioning */
+id_ns->ncap = id_ns->nsze;
+id_ns->nuse = id_ns->ncap;
+}
+
+static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id,
+Error **errp)
+{
+uint64_t perm, shared_perm;
+
+Error *local_err = NULL;
+int ret;
+
+perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+BLK_PERM_GRAPH_MOD;
+
+ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err);
+if (ret) {
+error_propagate_prepend(errp, local_err,
+"could not set block permissions: ");
+return ret;
+}
+
+ns->size = blk_getlength(ns->blk);
+if (ns->size < 0) {
+error_setg_errno(errp, -ns->size, "could not get blockdev size");
+return -1;
+}
+
+switch (n->conf.wce) {
+case ON_OFF_AUTO_ON:
+n->features.vwc = 1;
+break;
+case ON_OFF_AUTO_OFF:
+n->features.vwc = 0;
+break;
+case ON_OFF_AUTO_AUTO:
+n->features.vwc = blk_enable_write_cache(ns->blk);
+break;
+default:
+abort();
+}
+
+blk_set_enable_write_cache(ns->blk, n->features.vwc);
+
+return 0;
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+if (!ns->blk) {
+error_setg(errp, "block backend not configured");
+return -1;
+}
+
+return 0;
+}
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+if (nvme_ns_check_constraints(ns, errp)) {
+return -1;
+}
+
+if (nvme_ns_init_blk(n, ns, &n->id_ctrl, errp)) {
+return -1;
+}
+
+nvme_ns_init(ns);
+if (nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+
+return 0;
+}
+
+static void nvme_ns_realize(DeviceState *dev, Error **errp)
+{
+NvmeNamespace *ns = NVME_NS(dev);
+BusState *s = qdev_get_parent_bus(dev);
+NvmeCtrl *n = NVME(s->parent);
+Error *local_err = NULL;
+
+if (nvme_ns_setup(n, ns, &local_err)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace: ");
+return;
+}
+}
+
+static Property nvme_ns_props[] = {
+DEFINE_PROP_DRIVE("drive", NvmeNamespace, blk),
+DE

[PATCH v2 3/4] pci: allocate pci id for nvme

From: Klaus Jensen 

The emulated nvme device (hw/block/nvme.c) is currently using an
internal Intel device id.

Prepare to change that by allocating a device id under the 1b36 (Red
Hat, Inc.) vendor id.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Acked-by: Gerd Hoffmann 
Reviewed-by: Maxim Levitsky 
---
 MAINTAINERS|  1 +
 docs/specs/nvme.txt| 23 +++
 docs/specs/pci-ids.txt |  1 +
 include/hw/pci/pci.h   |  1 +
 4 files changed, 26 insertions(+)
 create mode 100644 docs/specs/nvme.txt

diff --git a/MAINTAINERS b/MAINTAINERS
index dec252f38b1d..7d190ae364e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1796,6 +1796,7 @@ L: qemu-bl...@nongnu.org
 S: Supported
 F: hw/block/nvme*
 F: tests/qtest/nvme-test.c
+F: docs/specs/nvme.txt
 
 megasas
 M: Hannes Reinecke 
diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
new file mode 100644
index ..56d393884e7a
--- /dev/null
+++ b/docs/specs/nvme.txt
@@ -0,0 +1,23 @@
+NVM Express Controller
+==
+
+The nvme device (-device nvme) emulates an NVM Express Controller.
+
+
+Reference Specifications
+
+
+The device currently implements most mandatory features of NVMe v1.3d, see
+
+  https://nvmexpress.org/resources/specifications/
+
+for the specification.
+
+
+Known issues
+
+
+* The accounting numbers in the SMART/Health are reset across power cycles
+
+* Interrupt Coalescing is not supported and is disabled by default in volation
+  of the specification.
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 4d53e5c7d9d5..abbdbca6be38 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -63,6 +63,7 @@ PCI devices (other than virtio):
 1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
 1b36:000d  PCI xhci usb host adapter
 1b36:000f  mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
+1b36:0010  PCIe NVMe device (-device nvme)
 
 All these devices are documented in docs/specs.
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 2347dc36bfb5..7e565ba03262 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -104,6 +104,7 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_XHCI0x000d
 #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
 #define PCI_DEVICE_ID_REDHAT_MDPY0x000f
+#define PCI_DEVICE_ID_REDHAT_NVME0x0010
 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100
 
 #define FMT_PCIBUS  PRIx64
-- 
2.27.0

[PATCH v2 1/4] hw/block/nvme: refactor identify active namespace id list

From: Klaus Jensen 

Prepare to support inactive namespaces.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4bcd114f76b1..eaee420219fd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1573,16 +1573,16 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 uint32_t min_nsid = le32_to_cpu(c->nsid);
 uint32_t *list;
 uint16_t ret;
-int i, j = 0;
+int j = 0;
 
 trace_pci_nvme_identify_nslist(min_nsid);
 
 list = g_malloc0(data_len);
-for (i = 0; i < n->num_namespaces; i++) {
-if (i < min_nsid) {
+for (int i = 1; i <= n->num_namespaces; i++) {
+if (i <= min_nsid) {
 continue;
 }
-list[j++] = cpu_to_le32(i + 1);
+list[j++] = cpu_to_le32(i);
 if (j == data_len / sizeof(uint32_t)) {
 break;
 }
-- 
2.27.0

[PATCH v2 0/4] hw/block/nvme: support multiple namespaces

From: Klaus Jensen 

v2: bummer, v1 didn't apply cleanly to master (it applied to Kevin's
block tree); rebased to make patchew happy


This adds a new 'nvme-ns' device that attaches to the nvme device
through a bus. This decouples the nvme controller and nvme namespaces
such that multiple namespaces may be attached to the controller.

With this in place, we can allocate a fresh pci vendor/device id and get
rid of the Intel id pair that causes the Linux kernel to apply a bunch
of quirks that the device no longer has.

Based-on: <20200629203155.1236860-1-...@irrelevant.dk>
("[PATCH 0/3] hw/block/nvme: support scatter gather lists")

Klaus Jensen (4):
  hw/block/nvme: refactor identify active namespace id list
  hw/block/nvme: support multiple namespaces
  pci: allocate pci id for nvme
  hw/block/nvme: change controller pci id

 MAINTAINERS|   1 +
 docs/specs/nvme.txt|  23 
 docs/specs/pci-ids.txt |   1 +
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c | 172 ++
 hw/block/nvme-ns.h |  66 ++
 hw/block/nvme.c| 273 +++--
 hw/block/nvme.h|  45 +++
 hw/block/trace-events  |   8 +-
 hw/core/machine.c  |   1 +
 include/hw/pci/pci.h   |   1 +
 11 files changed, 472 insertions(+), 121 deletions(-)
 create mode 100644 docs/specs/nvme.txt
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

-- 
2.27.0

[PATCH v2 4/4] hw/block/nvme: change controller pci id

From: Klaus Jensen 

There are two reasons for changing this:

  1. The nvme device currently uses an internal Intel device id.

  2. Since commits "nvme: fix write zeroes offset and count" and "nvme:
 support multiple namespaces" the controller device no longer has
 the quirks that the Linux kernel think it has.

 As the quirks are applied based on pci vendor and device id, change
 them to get rid of the quirks.

To keep backward compatibility, add a new 'x-use-intel-id' parameter to
the nvme device to force use of the Intel vendor and device id. This is
off by default but add a compat property to set this for machines 5.0
and older.

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 12 ++--
 hw/block/nvme.h   |  1 +
 hw/core/machine.c |  1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9db8e4811433..164c5e294d06 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2756,6 +2756,15 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 
 pci_conf[PCI_INTERRUPT_PIN] = 1;
 pci_config_set_prog_interface(pci_conf, 0x2);
+
+if (n->params.use_intel_id) {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+pci_config_set_device_id(pci_conf, 0x5846);
+} else {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
+pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
+}
+
 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
 pcie_endpoint_cap_init(pci_dev, 0x80);
 
@@ -2908,6 +2917,7 @@ static Property nvme_props[] = {
 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
 DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+DEFINE_PROP_BOOL("x-use-intel-id", NvmeCtrl, params.use_intel_id, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2924,8 +2934,6 @@ static void nvme_class_init(ObjectClass *oc, void *data)
 pc->realize = nvme_realize;
 pc->exit = nvme_exit;
 pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
-pc->vendor_id = PCI_VENDOR_ID_INTEL;
-pc->device_id = 0x5845;
 pc->revision = 2;
 
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8d7610477e7d..8bf1a050497e 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -15,6 +15,7 @@ typedef struct NvmeParams {
 uint8_t  aerl;
 uint32_t aer_max_queued;
 uint8_t  mdts;
+bool use_intel_id;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 211b4e077aca..827f054ac29d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -34,6 +34,7 @@ GlobalProperty hw_compat_5_0[] = {
 { "vmport", "x-signal-unsupported-cmd", "off" },
 { "vmport", "x-report-vmx-type", "off" },
 { "vmport", "x-cmds-v2", "off" },
+{ "nvme", "x-use-intel-id", "on"},
 };
 const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0);
 
-- 
2.27.0

Re: [PATCH 2/5] hw/i2c: Rename i2c_try_create_slave() as i2c_slave_new()


On Mon, 29 Jun 2020, Philippe Mathieu-Daudé wrote:

We use "new" names for functions that allocate and initialize
device objects: pci_new(), isa_new(), usb_new().
Let's call this one i2c_slave_new(). Since we have to update
all the callers, also let it return a I2CSlave object.


All the callers now need a cast due to change to I2CSlave * instead of 
what they expect. Does that really worth it? Also this introduces 
inconsistency between i2c_create_slave and i2c_new so not sure about that 
part but I don't really mind either way. Maybe return what most callers 
expect so the calls are simple and don't need an additional cast in most 
of the cases?


Regards,
BALATON Zoltan


Suggested-by: Markus Armbruster 
Signed-off-by: Philippe Mathieu-Daudé 
---
include/hw/i2c/i2c.h | 2 +-
hw/arm/aspeed.c  | 4 ++--
hw/i2c/core.c| 9 -
3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index d6e3d85faf..18efc668f1 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -79,8 +79,8 @@ int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send);
int i2c_send(I2CBus *bus, uint8_t data);
uint8_t i2c_recv(I2CBus *bus);

+I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr);
-DeviceState *i2c_try_create_slave(const char *name, uint8_t addr);
bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp);

/* lm832x.c */
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 1285bf82c0..54ca36e0b6 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -513,7 +513,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
*bmc)
/* Bus 3: TODO bmp280@77 */
/* Bus 3: TODO max31785@52 */
/* Bus 3: TODO dps310@76 */
-dev = i2c_try_create_slave(TYPE_PCA9552, 0x60);
+dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
qdev_prop_set_string(dev, "description", "pca1");
i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 3),
  &error_fatal);
@@ -531,7 +531,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
*bmc)

smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 11), 0x51,
  eeprom_buf);
-dev = i2c_try_create_slave(TYPE_PCA9552, 0x60);
+dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
qdev_prop_set_string(dev, "description", "pca0");
i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 11),
  &error_fatal);
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index acf34a12d6..6eacb4a463 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -267,13 +267,13 @@ const VMStateDescription vmstate_i2c_slave = {
}
};

-DeviceState *i2c_try_create_slave(const char *name, uint8_t addr)
+I2CSlave *i2c_slave_new(const char *name, uint8_t addr)
{
DeviceState *dev;

dev = qdev_new(name);
qdev_prop_set_uint8(dev, "address", addr);
-return dev;
+return I2C_SLAVE(dev);
}

bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp)
@@ -283,10 +283,9 @@ bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, 
Error **errp)

DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr)
{
-DeviceState *dev;
+DeviceState *dev = DEVICE(i2c_slave_new(name, addr));

-dev = i2c_try_create_slave(name, addr);
-i2c_realize_and_unref(dev, bus, &error_fatal);
+i2c_realize_and_unref(I2C_SLAVE(dev), bus, &error_fatal);

return dev;
}

Re: [PATCH 0/2] hw/block/nvme: handle transient dma errors

On Jun 29 14:07, no-re...@patchew.org wrote:
> Patchew URL: 
> https://patchew.org/QEMU/20200629202053.1223342-1-...@irrelevant.dk/
> 
> 
> 
> Hi,
> 
> This series failed the docker-quick@centos7 build test. Please find the 
> testing commands and
> their output below. If you have Docker installed, you can probably reproduce 
> it
> locally.
> 
> === TEST SCRIPT BEGIN ===
> #!/bin/bash
> make docker-image-centos7 V=1 NETWORK=1
> time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
> === TEST SCRIPT END ===
> 
> --- /tmp/qemu-test/src/tests/qemu-iotests/040.out   2020-06-29 
> 20:12:10.0 +
> +++ /tmp/qemu-test/build/tests/qemu-iotests/040.out.bad 2020-06-29 
> 20:58:48.288790818 +
> @@ -1,3 +1,5 @@
> +WARNING:qemu.machine:qemu received signal 9: 
> /tmp/qemu-test/build/tests/qemu-iotests/../../x86_64-softmmu/qemu-system-x86_64
>  -display none -vga none -chardev 
> socket,id=mon,path=/tmp/tmp.Jdol0fPScQ/qemu-21749-monitor.sock -mon 
> chardev=mon,mode=control -qtest 
> unix:path=/tmp/tmp.Jdol0fPScQ/qemu-21749-qtest.sock -accel qtest -nodefaults 
> -display none -accel qtest
> +WARNING:qemu.machine:qemu received signal 9: 
> /tmp/qemu-test/build/tests/qemu-iotests/../../x86_64-softmmu/qemu-system-x86_64
>  -display none -vga none -chardev 
> socket,id=mon,path=/tmp/tmp.Jdol0fPScQ/qemu-21749-monitor.sock -mon 
> chardev=mon,mode=control -qtest 
> unix:path=/tmp/tmp.Jdol0fPScQ/qemu-21749-qtest.sock -accel qtest -nodefaults 
> -display none -accel qtest


Hmm, I can't seem to reproduce this locally and the test succeeded on
the next series[1] that is based on this.

Is this a flaky test? Or a bad test runner? I'm of course worried when
a qcow2 test fails and I touch something else than the nvme device ;)


  [1]: https://patchew.org/QEMU/20200629203155.1236860-1-...@irrelevant.dk/

Re: [PATCH v2 2/4] smbus: Fix spd_data_generate() error API violation

On Mon, 29 Jun 2020, Philippe Mathieu-Daudé wrote:

On 6/27/20 9:17 AM, Markus Armbruster wrote:

BALATON Zoltan writes:

On Wed, 22 Apr 2020, BALATON Zoltan wrote:

On Wed, 22 Apr 2020, Philippe Mathieu-DaudÃ© wrote:

On 4/22/20 4:27 PM, BALATON Zoltan wrote:

On Wed, 22 Apr 2020, Markus Armbruster wrote:

The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL.Â Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.

spd_data_generate() can pass @errp to error_setg() more than once when
it adjusts both memory size and type.Â Harmless, because no caller
passes anything that needs adjusting.Â Until the previous commit,
sam460ex passed types that needed adjusting, but not sizes.

spd_data_generate()'s contract is rather awkward:

Â Â If everything's fine, return non-null and don't set an error.

Â Â Else, if memory size or type need adjusting, return non-null and
Â Â set an error describing the adjustment.

Â Â Else, return null and set an error reporting why no data can be
Â Â generated.

Its callers treat the error as a warning even when null is returned.
They don't create the "smbus-eeprom" device then.Â Suspicious.

Since the previous commit, only "everything's fine" can actually
happen.Â Drop the unused code and simplify the callers.Â This gets rid
of the error API violation.

This leaves board code no chance to recover from values given by
user that won't fit without duplicating checks that this function
does. Also this will abort without giving meaningful errors if an
invalid value does get through and result in a crash which is not
used friendly. So I don't like this but if others think this is
acceptable maybe at least unit test should be adjusted to make
sure aborts cannot be triggered by user for values that are not
usually tested during development.

Agreed. Do you have an example (or more) to better show Markus this
code use? So we can add tests.

After Markus's patches probably nothing uses it any more but this
comes with the result that previously giving some random value such
as -m 100 did produce a working sam460ex machine after some warnings
but now it just thows back some errors to the user which may or may
not be helpful to them.

Personally I'd use a script to generate a dumb static array of all
possible sizes...

Maybe testing with the biggest valid value such as -m 2048 (that's
commonly used probably) and an invalid value such as -m 100 might be
enough. Testing all possible values might take too long and would
not test what happens with invalid values. Ideally those invalud
values should also work like before a0258e4afa but should at least
give a meaningful warning so the user can fix the command line
without too much head scratching. Actually that commit was from Igor
not from Marcus so sorry for attributing that to Marcus too, I
remembered wrong.

By the way you could argue that on real machine you cannot plug
certain combinations of memory modules so it's enough to model that
but I think QEMU does not have to be that strict and also support
configs that cannot happen on real hadware but would work. This
might be useful for example if you have some ammount of memory to
set aside for a VM on a host but that's not a size that exists in
memory modules on real hardware. This also works on pc machine in
qemu-system-i386 for example: it accepts -m 100 and does its best to
create a machine with such unrealistic size. The sam460ex did the
same (within SoC's limits) and before a0258e4afa -m 100 was fixed up
to 96 MB which is now not possible due to change in QEMU internal
APIs. This probably isn't important enough to worth the extra effort
to support but would have been nice to preserve.

Besides the above here's another use case of the fix ups that I wanted
to keep:

https://patchew.org/QEMU/cover.1592315226.git.bala...@eik.bme.hu/b5f4598529a77f15f554c593e9be2d0ff9e5fab3.1592315226.git.bala...@eik.bme.hu/

This board normally uses OpenBIOS which gets RAM size from fw_cfg and
so works with whatever amount of RAM (also Linux booted with -kernel
probably does not care) so any -memory value is valid. However some
may want to also use original firmware ROM for compatibility which
detects RAM reading SPD eeproms (the i2c emulation needed for that is
not working yet but once that's fixed this will be the case). I want
to add smbus_eeproms for this but do not want to just abort for cases
where -memory given by user cannot be covered with SPD data. Instead a
warning and covering as much RAM as possible should be enough (the ROM
will detect less RAM than given with -m
but that's OK and better than just bailing out without a message
tripping an assert). But I don't want to replicate in board code the
calculation and checks the spd_data_generate() function does anyway
(that would just puzzle reviewers for every use of this

Re: [PATCH 5/5] hw/i2c: Document the I2C qdev helpers

On Mon, Jun 29, 2020 at 07:38:21PM +0200, Philippe Mathieu-Daudé wrote:
> In commit d88c42ff2c we added new prototype but neglected to
> add their documentation. Fix that.

Reviewed-by: Corey Minyard 

> 
> Reported-by: Peter Maydell 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  include/hw/i2c/i2c.h | 48 
>  1 file changed, 48 insertions(+)
> 
> diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
> index c533058998..fcc61e509b 100644
> --- a/include/hw/i2c/i2c.h
> +++ b/include/hw/i2c/i2c.h
> @@ -79,8 +79,56 @@ int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send);
>  int i2c_send(I2CBus *bus, uint8_t data);
>  uint8_t i2c_recv(I2CBus *bus);
>  
> +/**
> + * Create an I2C slave device on the heap.
> + * @name: a device type name
> + * @addr: I2C address of the slave when put on a bus
> + *
> + * This only initializes the device state structure and allows
> + * properties to be set. Type @name must exist. The device still
> + * needs to be realized. See qdev-core.h.
> + */
>  I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
> +
> +/**
> + * Create an I2C slave device on the heap.
> + * @bus: I2C bus to put it on
> + * @name: I2C slave device type name
> + * @addr: I2C address of the slave when put on a bus
> + *
> + * Create the device state structure, initialize it, put it on the
> + * specified @bus, and drop the reference to it (the device is realized).
> + * Any error aborts the process.
> + */
>  I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t 
> addr);
> +
> +/**
> + * i2c_slave_realize_and_unref: realize and unref an I2C slave device
> + * @dev: I2C slave device to realize
> + * @bus: I2C bus to put it on
> + * @addr: I2C address of the slave on the bus
> + * @errp: error pointer
> + *
> + * Call 'realize' on @dev, put it on the specified @bus, and drop the
> + * reference to it. Errors are reported via @errp and by returning
> + * false.
> + *
> + * This function is useful if you have created @dev via qdev_new(),
> + * i2c_slave_new() or i2c_slave_try_new() (which take a reference to
> + * the device it returns to you), so that you can set properties on it
> + * before realizing it. If you don't need to set properties then
> + * i2c_slave_create_simple() is probably better (as it does the create,
> + * init and realize in one step).
> + *
> + * If you are embedding the I2C slave into another QOM device and
> + * initialized it via some variant on object_initialize_child() then
> + * do not use this function, because that family of functions arrange
> + * for the only reference to the child device to be held by the parent
> + * via the child<> property, and so the reference-count-drop done here
> + * would be incorrect.  (Instead you would want i2c_slave_realize(),
> + * which doesn't currently exist but would be trivial to create if we
> + * had any code that wanted it.)
> + */
>  bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp);
>  
>  /* lm832x.c */
> -- 
> 2.21.3
> 
>

Re: [PATCH 4/5] hw/i2c: Rename i2c_create_slave() as i2c_slave_create_simple()

On Mon, Jun 29, 2020 at 07:38:20PM +0200, Philippe Mathieu-Daudé wrote:
> We use "create_simple" names for functions that allocate, initialize,
> configure and realize device objects: pci_create_simple(),
> isa_create_simple(), usb_create_simple(). For consistency, rename
> i2c_create_slave() as i2c_slave_create_simple(). Since we have
> to update all the callers, also let it return a I2CSlave object.

Reviewed-by: Corey Minyard 

> 
> Suggested-by: Markus Armbruster 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  include/hw/i2c/i2c.h |  2 +-
>  hw/arm/aspeed.c  | 58 ++--
>  hw/arm/musicpal.c|  4 +--
>  hw/arm/nseries.c |  8 +++---
>  hw/arm/pxa2xx.c  |  5 ++--
>  hw/arm/realview.c|  2 +-
>  hw/arm/spitz.c   |  4 +--
>  hw/arm/stellaris.c   |  2 +-
>  hw/arm/tosa.c|  2 +-
>  hw/arm/versatilepb.c |  2 +-
>  hw/arm/vexpress.c|  2 +-
>  hw/arm/z2.c  |  4 +--
>  hw/display/sii9022.c |  2 +-
>  hw/i2c/core.c|  6 ++---
>  hw/ppc/e500.c|  2 +-
>  hw/ppc/sam460ex.c|  2 +-
>  16 files changed, 54 insertions(+), 53 deletions(-)
> 
> diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
> index cb7211f027..c533058998 100644
> --- a/include/hw/i2c/i2c.h
> +++ b/include/hw/i2c/i2c.h
> @@ -80,7 +80,7 @@ int i2c_send(I2CBus *bus, uint8_t data);
>  uint8_t i2c_recv(I2CBus *bus);
>  
>  I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
> -DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr);
> +I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t 
> addr);
>  bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp);
>  
>  /* lm832x.c */
> diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
> index ed14e79f57..5fa95f0f02 100644
> --- a/hw/arm/aspeed.c
> +++ b/hw/arm/aspeed.c
> @@ -385,14 +385,14 @@ static void palmetto_bmc_i2c_init(AspeedMachineState 
> *bmc)
>  
>  /* The palmetto platform expects a ds3231 RTC but a ds1338 is
>   * enough to provide basic RTC features. Alarms will be missing */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 0), "ds1338", 0x68);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 0), "ds1338", 
> 0x68);
>  
>  smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 0), 0x50,
>eeprom_buf);
>  
>  /* add a TMP423 temperature sensor */
> -dev = i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 2),
> -   "tmp423", 0x4c);
> +dev = DEVICE(i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2),
> + "tmp423", 0x4c));
>  object_property_set_int(OBJECT(dev), 31000, "temperature0", 
> &error_abort);
>  object_property_set_int(OBJECT(dev), 28000, "temperature1", 
> &error_abort);
>  object_property_set_int(OBJECT(dev), 2, "temperature2", 
> &error_abort);
> @@ -408,12 +408,12 @@ static void ast2500_evb_i2c_init(AspeedMachineState 
> *bmc)
>eeprom_buf);
>  
>  /* The AST2500 EVB expects a LM75 but a TMP105 is compatible */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 7),
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7),
>   TYPE_TMP105, 0x4d);
>  
>  /* The AST2500 EVB does not have an RTC. Let's pretend that one is
>   * plugged on the I2C bus header */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 0x32);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 
> 0x32);
>  }
>  
>  static void ast2600_evb_i2c_init(AspeedMachineState *bmc)
> @@ -428,36 +428,36 @@ static void romulus_bmc_i2c_init(AspeedMachineState 
> *bmc)
>  
>  /* The romulus board expects Epson RX8900 I2C RTC but a ds1338 is
>   * good enough */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 0x32);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 
> 0x32);
>  }
>  
>  static void swift_bmc_i2c_init(AspeedMachineState *bmc)
>  {
>  AspeedSoCState *soc = &bmc->soc;
>  
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 3), "pca9552", 0x60);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 3), "pca9552", 
> 0x60);
>  
>  /* The swift board expects a TMP275 but a TMP105 is compatible */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 7), "tmp105", 0x48);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "tmp105", 
> 0x48);
>  /* The swift board expects a pca9551 but a pca9552 is compatible */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9552", 0x60);
> +i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9552", 
> 0x60);
>  
>  /* The swift board expects an Epson RX8900 RTC but a ds1338 is 
> compatible */
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 8), "ds1338", 0x32);
> -i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 8), "pca9552", 0x60);
> +i2c_

Re: [PATCH 3/5] hw/i2c: Rename i2c_realize_and_unref() as i2c_slave_realize_and_unref()

On Mon, Jun 29, 2020 at 07:38:19PM +0200, Philippe Mathieu-Daudé wrote:
> The other i2c functions are called i2c_slave_FOO(). Rename as
> i2c_slave_realize_and_unref() to be consistent.

Reviewed-by: Corey Minyard 

> 
> Suggested-by: Markus Armbruster 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  include/hw/i2c/i2c.h |  2 +-
>  hw/arm/aspeed.c  | 10 ++
>  hw/i2c/core.c|  6 +++---
>  3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
> index 18efc668f1..cb7211f027 100644
> --- a/include/hw/i2c/i2c.h
> +++ b/include/hw/i2c/i2c.h
> @@ -81,7 +81,7 @@ uint8_t i2c_recv(I2CBus *bus);
>  
>  I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
>  DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr);
> -bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp);
> +bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp);
>  
>  /* lm832x.c */
>  void lm832x_key_event(DeviceState *dev, int key, int state);
> diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
> index 54ca36e0b6..ed14e79f57 100644
> --- a/hw/arm/aspeed.c
> +++ b/hw/arm/aspeed.c
> @@ -515,8 +515,9 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
> *bmc)
>  /* Bus 3: TODO dps310@76 */
>  dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
>  qdev_prop_set_string(dev, "description", "pca1");
> -i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 3),
> -  &error_fatal);
> +i2c_slave_realize_and_unref(I2C_SLAVE(dev),
> +aspeed_i2c_get_bus(&soc->i2c, 3),
> +&error_fatal);
>  
>  i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 4), "tmp423", 0x4c);
>  i2c_create_slave(aspeed_i2c_get_bus(&soc->i2c, 5), "tmp423", 0x4c);
> @@ -533,8 +534,9 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
> *bmc)
>eeprom_buf);
>  dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
>  qdev_prop_set_string(dev, "description", "pca0");
> -i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 11),
> -  &error_fatal);
> +i2c_slave_realize_and_unref(I2C_SLAVE(dev),
> +aspeed_i2c_get_bus(&soc->i2c, 11),
> +&error_fatal);
>  /* Bus 11: TODO ucd90160@64 */
>  }
>  
> diff --git a/hw/i2c/core.c b/hw/i2c/core.c
> index 6eacb4a463..135ea56036 100644
> --- a/hw/i2c/core.c
> +++ b/hw/i2c/core.c
> @@ -276,16 +276,16 @@ I2CSlave *i2c_slave_new(const char *name, uint8_t addr)
>  return I2C_SLAVE(dev);
>  }
>  
> -bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp)
> +bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp)
>  {
> -return qdev_realize_and_unref(dev, &bus->qbus, errp);
> +return qdev_realize_and_unref(&dev->qdev, &bus->qbus, errp);
>  }
>  
>  DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr)
>  {
>  DeviceState *dev = DEVICE(i2c_slave_new(name, addr));
>  
> -i2c_realize_and_unref(I2C_SLAVE(dev), bus, &error_fatal);
> +i2c_slave_realize_and_unref(I2C_SLAVE(dev), bus, &error_fatal);
>  
>  return dev;
>  }
> -- 
> 2.21.3
> 
>

Re: [PATCH 2/5] hw/i2c: Rename i2c_try_create_slave() as i2c_slave_new()

On Mon, Jun 29, 2020 at 07:38:18PM +0200, Philippe Mathieu-Daudé wrote:
> We use "new" names for functions that allocate and initialize
> device objects: pci_new(), isa_new(), usb_new().
> Let's call this one i2c_slave_new(). Since we have to update
> all the callers, also let it return a I2CSlave object.

Reviewed-by: Corey Minyard 


> 
> Suggested-by: Markus Armbruster 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  include/hw/i2c/i2c.h | 2 +-
>  hw/arm/aspeed.c  | 4 ++--
>  hw/i2c/core.c| 9 -
>  3 files changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
> index d6e3d85faf..18efc668f1 100644
> --- a/include/hw/i2c/i2c.h
> +++ b/include/hw/i2c/i2c.h
> @@ -79,8 +79,8 @@ int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send);
>  int i2c_send(I2CBus *bus, uint8_t data);
>  uint8_t i2c_recv(I2CBus *bus);
>  
> +I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
>  DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr);
> -DeviceState *i2c_try_create_slave(const char *name, uint8_t addr);
>  bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp);
>  
>  /* lm832x.c */
> diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
> index 1285bf82c0..54ca36e0b6 100644
> --- a/hw/arm/aspeed.c
> +++ b/hw/arm/aspeed.c
> @@ -513,7 +513,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
> *bmc)
>  /* Bus 3: TODO bmp280@77 */
>  /* Bus 3: TODO max31785@52 */
>  /* Bus 3: TODO dps310@76 */
> -dev = i2c_try_create_slave(TYPE_PCA9552, 0x60);
> +dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
>  qdev_prop_set_string(dev, "description", "pca1");
>  i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 3),
>&error_fatal);
> @@ -531,7 +531,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
> *bmc)
>  
>  smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 11), 0x51,
>eeprom_buf);
> -dev = i2c_try_create_slave(TYPE_PCA9552, 0x60);
> +dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
>  qdev_prop_set_string(dev, "description", "pca0");
>  i2c_realize_and_unref(dev, aspeed_i2c_get_bus(&soc->i2c, 11),
>&error_fatal);
> diff --git a/hw/i2c/core.c b/hw/i2c/core.c
> index acf34a12d6..6eacb4a463 100644
> --- a/hw/i2c/core.c
> +++ b/hw/i2c/core.c
> @@ -267,13 +267,13 @@ const VMStateDescription vmstate_i2c_slave = {
>  }
>  };
>  
> -DeviceState *i2c_try_create_slave(const char *name, uint8_t addr)
> +I2CSlave *i2c_slave_new(const char *name, uint8_t addr)
>  {
>  DeviceState *dev;
>  
>  dev = qdev_new(name);
>  qdev_prop_set_uint8(dev, "address", addr);
> -return dev;
> +return I2C_SLAVE(dev);
>  }
>  
>  bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp)
> @@ -283,10 +283,9 @@ bool i2c_realize_and_unref(DeviceState *dev, I2CBus 
> *bus, Error **errp)
>  
>  DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr)
>  {
> -DeviceState *dev;
> +DeviceState *dev = DEVICE(i2c_slave_new(name, addr));
>  
> -dev = i2c_try_create_slave(name, addr);
> -i2c_realize_and_unref(dev, bus, &error_fatal);
> +i2c_realize_and_unref(I2C_SLAVE(dev), bus, &error_fatal);
>  
>  return dev;
>  }
> -- 
> 2.21.3
> 
>

Re: [PATCH 0/5] hw/i2c: Rename method names for consistency and add documentation

On Mon, Jun 29, 2020 at 07:38:16PM +0200, Philippe Mathieu-Daudé wrote:
> In commit d88c42ff2c we added 2 methods: i2c_try_create_slave()
> and i2c_realize_and_unref().
> Markus noted their name could be improved for consistency [1],
> and Peter reported the lack of documentation [2]. Fix that now.

Looking over these, I don't see an issue.  I didn't review the aspeed
device changes (patch 1); that's probably better for the aspeed
maintainer to review.

But I do like the improvement in consistency.

-corey

> 
> [1] https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg07060.html
> [2] https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg08997.html
> 
> Philippe Mathieu-Daudé (5):
>   hw/i2c/aspeed_i2c: Simplify aspeed_i2c_get_bus()
>   hw/i2c: Rename i2c_try_create_slave() as i2c_slave_new()
>   hw/i2c: Rename i2c_realize_and_unref() as
> i2c_slave_realize_and_unref()
>   hw/i2c: Rename i2c_create_slave() as i2c_slave_create_simple()
>   hw/i2c: Document the I2C qdev helpers
> 
>  include/hw/i2c/aspeed_i2c.h |  2 +-
>  include/hw/i2c/i2c.h| 54 ++--
>  hw/arm/aspeed.c | 82 +++--
>  hw/arm/musicpal.c   |  4 +-
>  hw/arm/nseries.c|  8 ++--
>  hw/arm/pxa2xx.c |  5 ++-
>  hw/arm/realview.c   |  2 +-
>  hw/arm/spitz.c  |  4 +-
>  hw/arm/stellaris.c  |  2 +-
>  hw/arm/tosa.c   |  2 +-
>  hw/arm/versatilepb.c|  2 +-
>  hw/arm/vexpress.c   |  2 +-
>  hw/arm/z2.c |  4 +-
>  hw/display/sii9022.c|  2 +-
>  hw/i2c/aspeed_i2c.c |  3 +-
>  hw/i2c/core.c   | 15 ---
>  hw/ppc/e500.c   |  2 +-
>  hw/ppc/sam460ex.c   |  2 +-
>  18 files changed, 123 insertions(+), 74 deletions(-)
> 
> -- 
> 2.21.3
> 
>

Re: [REPORT] [GSoC - TCG Continuous Benchmarking] [#2] Dissecting QEMU Into Three Main Parts

2020-06-29 Thread Ahmed Karaman

On Mon, Jun 29, 2020 at 6:03 PM Alex Bennée  wrote:
>
>
> Ahmed Karaman  writes:
>
> > Hi,
> >
> > The second report of the TCG Continuous Benchmarking series builds
> > upon the QEMU performance metrics calculated in the previous report.
> > This report presents a method to dissect the number of instructions
> > executed by a QEMU invocation into three main phases:
> > - Code Generation
> > - JIT Execution
> > - Helpers Execution
> > It devises a Python script that automates this process.
> >
> > After that, the report presents an experiment for comparing the
> > output of running the script on 17 different targets. Many conclusions
> > can be drawn from the results and two of them are discussed in the
> > analysis section.
>
> A couple of comments. One think I think is missing from your analysis is
> the total number of guest instructions being emulated. As you point out
> each guest will have different code efficiency in terms of it's
> generated code.
>
> Assuming your test case is constant execution (i.e. runs the same each
> time)
Yes indeed, the report utilizes Callgrind in the measurements so the
results are very stable.
>you could run in through a plugins build to extract the number of
> guest instructions, e.g.:
>
>   ./aarch64-linux-user/qemu-aarch64 -plugin tests/plugin/libinsn.so -d plugin 
> ./tests/tcg/aarch64-linux-user/sha1
>   SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
>   insns: 158603512
>
That's a very nice suggestion. Maybe this will be the idea of a whole
new report. I'll try to execute the provided command and will let you
know if I have any questions.
> I should have also pointed out in your last report that running FP heavy
> code will always be biased towards helper/softfloat code to the
> detriment of everything else. I think you need more of a mix of
> benchmarks to get a better view.
>
> When Emilio did the last set of analysis he used a suite he built out of
> nbench and a perl benchmark:
>
>   https://github.com/cota/dbt-bench
>
> As he quoted in his README:
>
>   NBench programs are small, with execution time dominated by small code
>   loops. Thus, when run under a DBT engine, the resulting performance
>   depends almost entirely on the quality of the output code.
>
>   The Perl benchmarks compile Perl code. As is common for compilation
>   workloads, they execute large amounts of code and show no particular
>   code execution hotspots. Thus, the resulting DBT performance depends
>   largely on code translation speed.
>
> by only having one benchmark you are going to miss out on the envelope
> of use cases.
>
Future reports will introduce a variety of benchmarks. This report -
and the previous one - are introductory reports. The benchmark used
was to only demonstrate the report ideas. It was not used as a strict
benchmarking program.
> >
> > Report link:
> >https://ahmedkrmn.github.io/TCG-Continuous-Benchmarking/Dissecting-QEMU-Into-Three-Main-Parts/
> >
> > Previous reports:
> > Report 1 - Measuring Basic Performance Metrics of QEMU:
> > https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg06692.html
> >
> > Best regards,
> > Ahmed Karaman
>
>
> --
> Alex Bennée

Re: [PATCH] hw/misc/pca9552: Add missing TypeInfo::class_size field

2020-06-29 Thread Peter Maydell

On Mon, 29 Jun 2020 at 08:47, Philippe Mathieu-Daudé  wrote:
>
> When adding the generic PCA955xClass in commit 736132e455, we
> forgot to set the class_size field. Fill it now to avoid:

Thanks; I've applied this to master since it fixes a memory
corruption that affects all arm targets and I'm not otherwise
planning an arm pullreq for a bit.

-- PMM

Re: [PATCH 0/2] hw/block/nvme: handle transient dma errors

2020-06-29 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20200629202053.1223342-1-...@irrelevant.dk/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

--- /tmp/qemu-test/src/tests/qemu-iotests/040.out   2020-06-29 
20:12:10.0 +
+++ /tmp/qemu-test/build/tests/qemu-iotests/040.out.bad 2020-06-29 
20:58:48.288790818 +
@@ -1,3 +1,5 @@
+WARNING:qemu.machine:qemu received signal 9: 
/tmp/qemu-test/build/tests/qemu-iotests/../../x86_64-softmmu/qemu-system-x86_64 
-display none -vga none -chardev 
socket,id=mon,path=/tmp/tmp.Jdol0fPScQ/qemu-21749-monitor.sock -mon 
chardev=mon,mode=control -qtest 
unix:path=/tmp/tmp.Jdol0fPScQ/qemu-21749-qtest.sock -accel qtest -nodefaults 
-display none -accel qtest
+WARNING:qemu.machine:qemu received signal 9: 
/tmp/qemu-test/build/tests/qemu-iotests/../../x86_64-softmmu/qemu-system-x86_64 
-display none -vga none -chardev 
socket,id=mon,path=/tmp/tmp.Jdol0fPScQ/qemu-21749-monitor.sock -mon 
chardev=mon,mode=control -qtest 
unix:path=/tmp/tmp.Jdol0fPScQ/qemu-21749-qtest.sock -accel qtest -nodefaults 
-display none -accel qtest
 ...
 --
 Ran 59 tests
---
Not run: 259
Failures: 040
Failed 1 of 119 iotests
make: *** [check-tests/check-block.sh] Error 1
make: *** Waiting for unfinished jobs
  TESTcheck-qtest-aarch64: tests/qtest/qos-test
Traceback (most recent call last):
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=da25eaa8bdd04cb783e2c427c6a5aa94', '-u', 
'1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-98l7koy2/src/docker-src.2020-06-29-16.51.46.20742:/var/tmp/qemu:z,ro',
 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=da25eaa8bdd04cb783e2c427c6a5aa94
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-98l7koy2/src'
make: *** [docker-run-test-quick@centos7] Error 2

real15m57.590s
user0m9.240s


The full log is available at
http://patchew.org/logs/20200629202053.1223342-1-...@irrelevant.dk/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH 2/4] hw/block/nvme: support multiple namespaces

From: Klaus Jensen 

This adds support for multiple namespaces by introducing a new 'nvme-ns'
device model. The nvme device creates a bus named from the device name
('id'). The nvme-ns devices then connect to this and registers
themselves with the nvme device.

This changes how an nvme device is created. Example with two namespaces:

  -drive file=nvme0n1.img,if=none,id=disk1
  -drive file=nvme0n2.img,if=none,id=disk2
  -device nvme,serial=deadbeef,id=nvme0
  -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
  -device nvme-ns,drive=disk2,bus=nvme0,nsid=2

The drive property is kept on the nvme device to keep the change
backward compatible, but the property is now optional. Specifying a
drive for the nvme device will always create the namespace with nsid 1.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c | 172 +++
 hw/block/nvme-ns.h |  66 +++
 hw/block/nvme.c| 255 ++---
 hw/block/nvme.h|  44 +++
 hw/block/trace-events  |   8 +-
 6 files changed, 431 insertions(+), 116 deletions(-)
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index 8855c2265639..8c159bc56630 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -13,6 +13,6 @@ common-obj-$(CONFIG_SH4) += tc58128.o
 
 obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
 obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
+common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
 
 obj-y += dataplane/
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
new file mode 100644
index ..28ce5e011568
--- /dev/null
+++ b/hw/block/nvme-ns.c
@@ -0,0 +1,172 @@
+/*
+ * QEMU NVM Express Virtual Namespace
+ *
+ * Copyright (c) 2019 CNEX Labs
+ * Copyright (c) 2020 Samsung Electronics
+ *
+ * Authors:
+ *  Klaus Jensen  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+
+#include "nvme.h"
+#include "nvme-ns.h"
+
+static void nvme_ns_init(NvmeNamespace *ns)
+{
+NvmeIdNs *id_ns = &ns->id_ns;
+
+id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+
+id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
+
+/* no thin provisioning */
+id_ns->ncap = id_ns->nsze;
+id_ns->nuse = id_ns->ncap;
+}
+
+static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id,
+Error **errp)
+{
+uint64_t perm, shared_perm;
+
+Error *local_err = NULL;
+int ret;
+
+perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+BLK_PERM_GRAPH_MOD;
+
+ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err);
+if (ret) {
+error_propagate_prepend(errp, local_err,
+"could not set block permissions: ");
+return ret;
+}
+
+ns->size = blk_getlength(ns->blk);
+if (ns->size < 0) {
+error_setg_errno(errp, -ns->size, "could not get blockdev size");
+return -1;
+}
+
+switch (n->conf.wce) {
+case ON_OFF_AUTO_ON:
+n->features.vwc = 1;
+break;
+case ON_OFF_AUTO_OFF:
+n->features.vwc = 0;
+break;
+case ON_OFF_AUTO_AUTO:
+n->features.vwc = blk_enable_write_cache(ns->blk);
+break;
+default:
+abort();
+}
+
+blk_set_enable_write_cache(ns->blk, n->features.vwc);
+
+return 0;
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+if (!ns->blk) {
+error_setg(errp, "block backend not configured");
+return -1;
+}
+
+return 0;
+}
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+if (nvme_ns_check_constraints(ns, errp)) {
+return -1;
+}
+
+if (nvme_ns_init_blk(n, ns, &n->id_ctrl, errp)) {
+return -1;
+}
+
+nvme_ns_init(ns);
+if (nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+
+return 0;
+}
+
+static void nvme_ns_realize(DeviceState *dev, Error **errp)
+{
+NvmeNamespace *ns = NVME_NS(dev);
+BusState *s = qdev_get_parent_bus(dev);
+NvmeCtrl *n = NVME(s->parent);
+Error *local_err = NULL;
+
+if (nvme_ns_setup(n, ns, &local_err)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace: ");
+return;
+}
+}
+
+static Property nvme_ns_props[] = {
+DEFINE_PROP_DRIVE("drive", NvmeNamespace, blk),
+DE

[PATCH 4/4] hw/block/nvme: change controller pci id

From: Klaus Jensen 

There are two reasons for changing this:

  1. The nvme device currently uses an internal Intel device id.

  2. Since commits "nvme: fix write zeroes offset and count" and "nvme:
 support multiple namespaces" the controller device no longer has
 the quirks that the Linux kernel think it has.

 As the quirks are applied based on pci vendor and device id, change
 them to get rid of the quirks.

To keep backward compatibility, add a new 'x-use-intel-id' parameter to
the nvme device to force use of the Intel vendor and device id. This is
off by default but add a compat property to set this for machines 5.0
and older.

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 12 ++--
 hw/block/nvme.h   |  1 +
 hw/core/machine.c |  1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9db8e4811433..164c5e294d06 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2756,6 +2756,15 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 
 pci_conf[PCI_INTERRUPT_PIN] = 1;
 pci_config_set_prog_interface(pci_conf, 0x2);
+
+if (n->params.use_intel_id) {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+pci_config_set_device_id(pci_conf, 0x5846);
+} else {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
+pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
+}
+
 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
 pcie_endpoint_cap_init(pci_dev, 0x80);
 
@@ -2908,6 +2917,7 @@ static Property nvme_props[] = {
 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
 DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+DEFINE_PROP_BOOL("x-use-intel-id", NvmeCtrl, params.use_intel_id, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2924,8 +2934,6 @@ static void nvme_class_init(ObjectClass *oc, void *data)
 pc->realize = nvme_realize;
 pc->exit = nvme_exit;
 pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
-pc->vendor_id = PCI_VENDOR_ID_INTEL;
-pc->device_id = 0x5845;
 pc->revision = 2;
 
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8d7610477e7d..8bf1a050497e 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -15,6 +15,7 @@ typedef struct NvmeParams {
 uint8_t  aerl;
 uint32_t aer_max_queued;
 uint8_t  mdts;
+bool use_intel_id;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1d80ab0e1daf..aedf5fd48631 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -30,6 +30,7 @@
 
 GlobalProperty hw_compat_5_0[] = {
 { "virtio-balloon-device", "page-poison", "false" },
+{ "nvme", "x-use-intel-id", "on"},
 };
 const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0);
 
-- 
2.27.0

[PATCH 0/4] hw/block/nvme: support multiple namespaces

From: Klaus Jensen 

This adds a new 'nvme-ns' device that attaches to the nvme device
through a bus. This decouples the nvme controller and nvme namespaces
such that multiple namespaces may be attached to the controller.

With this in place, we can allocate a fresh pci vendor/device id and get
rid of the Intel id pair that causes the Linux kernel to apply a bunch
of quirks that the device no longer has.

Based-on: <20200629203155.1236860-1-...@irrelevant.dk>
("[PATCH 0/3] hw/block/nvme: support scatter gather lists")

Klaus Jensen (4):
  hw/block/nvme: refactor identify active namespace id list
  hw/block/nvme: support multiple namespaces
  pci: allocate pci id for nvme
  hw/block/nvme: change controller pci id

 MAINTAINERS|   1 +
 docs/specs/nvme.txt|  23 
 docs/specs/pci-ids.txt |   1 +
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c | 172 ++
 hw/block/nvme-ns.h |  66 ++
 hw/block/nvme.c| 273 +++--
 hw/block/nvme.h|  45 +++
 hw/block/trace-events  |   8 +-
 hw/core/machine.c  |   1 +
 include/hw/pci/pci.h   |   1 +
 11 files changed, 472 insertions(+), 121 deletions(-)
 create mode 100644 docs/specs/nvme.txt
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

-- 
2.27.0

[PATCH 1/4] hw/block/nvme: refactor identify active namespace id list

From: Klaus Jensen 

Prepare to support inactive namespaces.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4bcd114f76b1..eaee420219fd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1573,16 +1573,16 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 uint32_t min_nsid = le32_to_cpu(c->nsid);
 uint32_t *list;
 uint16_t ret;
-int i, j = 0;
+int j = 0;
 
 trace_pci_nvme_identify_nslist(min_nsid);
 
 list = g_malloc0(data_len);
-for (i = 0; i < n->num_namespaces; i++) {
-if (i < min_nsid) {
+for (int i = 1; i <= n->num_namespaces; i++) {
+if (i <= min_nsid) {
 continue;
 }
-list[j++] = cpu_to_le32(i + 1);
+list[j++] = cpu_to_le32(i);
 if (j == data_len / sizeof(uint32_t)) {
 break;
 }
-- 
2.27.0

[PATCH 3/4] pci: allocate pci id for nvme

From: Klaus Jensen 

The emulated nvme device (hw/block/nvme.c) is currently using an
internal Intel device id.

Prepare to change that by allocating a device id under the 1b36 (Red
Hat, Inc.) vendor id.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Acked-by: Gerd Hoffmann 
Reviewed-by: Maxim Levitsky 
---
 MAINTAINERS|  1 +
 docs/specs/nvme.txt| 23 +++
 docs/specs/pci-ids.txt |  1 +
 include/hw/pci/pci.h   |  1 +
 4 files changed, 26 insertions(+)
 create mode 100644 docs/specs/nvme.txt

diff --git a/MAINTAINERS b/MAINTAINERS
index 955cc8dd5cd0..790faab64188 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1774,6 +1774,7 @@ L: qemu-bl...@nongnu.org
 S: Supported
 F: hw/block/nvme*
 F: tests/qtest/nvme-test.c
+F: docs/specs/nvme.txt
 
 megasas
 M: Hannes Reinecke 
diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
new file mode 100644
index ..56d393884e7a
--- /dev/null
+++ b/docs/specs/nvme.txt
@@ -0,0 +1,23 @@
+NVM Express Controller
+==
+
+The nvme device (-device nvme) emulates an NVM Express Controller.
+
+
+Reference Specifications
+
+
+The device currently implements most mandatory features of NVMe v1.3d, see
+
+  https://nvmexpress.org/resources/specifications/
+
+for the specification.
+
+
+Known issues
+
+
+* The accounting numbers in the SMART/Health are reset across power cycles
+
+* Interrupt Coalescing is not supported and is disabled by default in volation
+  of the specification.
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 4d53e5c7d9d5..abbdbca6be38 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -63,6 +63,7 @@ PCI devices (other than virtio):
 1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
 1b36:000d  PCI xhci usb host adapter
 1b36:000f  mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
+1b36:0010  PCIe NVMe device (-device nvme)
 
 All these devices are documented in docs/specs.
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 2347dc36bfb5..7e565ba03262 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -104,6 +104,7 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_XHCI0x000d
 #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
 #define PCI_DEVICE_ID_REDHAT_MDPY0x000f
+#define PCI_DEVICE_ID_REDHAT_NVME0x0010
 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100
 
 #define FMT_PCIBUS  PRIx64
-- 
2.27.0

[PATCH 1/3] hw/block/nvme: harden cmb access

From: Klaus Jensen 

Since the controller has only supported PRPs so far it has not been
required to check the ending address (addr + len - 1) of the CMB access
for validity since it has been guaranteed to be in range of the CMB.

This changes when the controller adds support for SGLs (next patch), so
add that check.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 94f5bf2a815f..191732692248 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -91,7 +91,12 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 
 static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
-if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
+hwaddr hi = addr + size - 1;
+if (hi < addr) {
+return 1;
+}
+
+if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
 return 0;
 }
-- 
2.27.0

[PATCH 2/3] hw/block/nvme: add support for scatter gather lists

From: Klaus Jensen 

For now, support the Data Block, Segment and Last Segment descriptor
types.

See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)").

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
---
 hw/block/nvme.c   | 331 ++
 hw/block/trace-events |   4 +
 include/block/nvme.h  |   6 +-
 3 files changed, 281 insertions(+), 60 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 191732692248..a9b0406d873f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -360,13 +360,263 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 return NVME_SUCCESS;
 }
 
-static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- uint64_t prp1, uint64_t prp2, DMADirection dir,
+/*
+ * Map 'nsgld' data descriptors from 'segment'. The function will subtract the
+ * number of bytes mapped in len.
+ */
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
+  QEMUIOVector *iov,
+  NvmeSglDescriptor *segment, uint64_t nsgld,
+  size_t *len, NvmeRequest *req)
+{
+dma_addr_t addr, trans_len;
+uint32_t dlen;
+uint16_t status;
+
+for (int i = 0; i < nsgld; i++) {
+uint8_t type = NVME_SGL_TYPE(segment[i].type);
+
+switch (type) {
+case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+break;
+case NVME_SGL_DESCR_TYPE_SEGMENT:
+case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
+default:
+return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
+}
+
+dlen = le32_to_cpu(segment[i].len);
+if (!dlen) {
+continue;
+}
+
+if (*len == 0) {
+/*
+ * All data has been mapped, but the SGL contains additional
+ * segments and/or descriptors. The controller might accept
+ * ignoring the rest of the SGL.
+ */
+uint16_t sgls = le16_to_cpu(n->id_ctrl.sgls);
+if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
+break;
+}
+
+trace_pci_nvme_err_invalid_sgl_excess_length(nvme_cid(req));
+return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+}
+
+trans_len = MIN(*len, dlen);
+addr = le64_to_cpu(segment[i].addr);
+
+if (UINT64_MAX - addr < dlen) {
+return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+}
+
+status = nvme_map_addr(n, qsg, iov, addr, trans_len);
+if (status) {
+return status;
+}
+
+*len -= trans_len;
+}
+
+return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+ NvmeSglDescriptor sgl, size_t len,
  NvmeRequest *req)
+{
+/*
+ * Read the segment in chunks of 256 descriptors (one 4k page) to avoid
+ * dynamically allocating a potentially huge SGL. The spec allows the SGL
+ * to be larger (as in number of bytes required to describe the SGL
+ * descriptors and segment chain) than the command transfer size, so it is
+ * not bounded by MDTS.
+ */
+const int SEG_CHUNK_SIZE = 256;
+
+NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+uint64_t nsgld;
+uint32_t seg_len;
+uint16_t status;
+bool sgl_in_cmb = false;
+hwaddr addr;
+int ret;
+
+sgld = &sgl;
+addr = le64_to_cpu(sgl.addr);
+
+trace_pci_nvme_map_sgl(nvme_cid(req), NVME_SGL_TYPE(sgl.type), req->nlb,
+   len);
+
+/*
+ * If the entire transfer can be described with a single data block it can
+ * be mapped directly.
+ */
+if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
+status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
+if (status) {
+goto unmap;
+}
+
+goto out;
+}
+
+/*
+ * If the segment is located in the CMB, the submission queue of the
+ * request must also reside there.
+ */
+if (nvme_addr_is_cmb(n, addr)) {
+if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) {
+return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+}
+
+sgl_in_cmb = true;
+}
+
+for (;;) {
+switch (NVME_SGL_TYPE(sgld->type)) {
+case NVME_SGL_DESCR_TYPE_SEGMENT:
+case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+break;
+default:
+return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+}
+
+seg_len = le32_to_cpu(sgld->len);
+
+/* check the length of the (Last) Segment descriptor */
+if (!seg_len || seg_len & 0xf) {
+return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+}
+
+if (UINT64_MAX - addr < seg_len) {
+return NVME_DATA_

[PATCH 0/3] hw/block/nvme: support scatter gather lists

From: Klaus Jensen 

This adds support for scatter gather lists (SGLs). The full flexibility
of SGLs require the device to be a bit more strict about CMB access,
hence the "hw/block/nvme: harden cmb access" patch.

Based-on: <20200629202053.1223342-1-...@irrelevant.dk>
("[PATCH 0/2] hw/block/nvme: handle transient dma errors")

Gollu Appalanaidu (1):
  hw/block/nvme: add support for sgl bit bucket descriptor

Klaus Jensen (2):
  hw/block/nvme: harden cmb access
  hw/block/nvme: add support for scatter gather lists

 hw/block/nvme.c   | 359 +++---
 hw/block/trace-events |   4 +
 include/block/nvme.h  |   6 +-
 3 files changed, 308 insertions(+), 61 deletions(-)

-- 
2.27.0

[PATCH 3/3] hw/block/nvme: add support for sgl bit bucket descriptor

From: Gollu Appalanaidu 

This adds support for SGL descriptor type 0x1 (bit bucket descriptor).
See the NVM Express v1.3d specification, Section 4.4 ("Scatter Gather
List (SGL)").

Signed-off-by: Gollu Appalanaidu 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 33 +++--
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a9b0406d873f..4bcd114f76b1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -377,6 +377,10 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 uint8_t type = NVME_SGL_TYPE(segment[i].type);
 
 switch (type) {
+case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
+if (nvme_req_is_write(req)) {
+continue;
+}
 case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
 break;
 case NVME_SGL_DESCR_TYPE_SEGMENT:
@@ -387,6 +391,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 }
 
 dlen = le32_to_cpu(segment[i].len);
+
 if (!dlen) {
 continue;
 }
@@ -407,6 +412,11 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 }
 
 trans_len = MIN(*len, dlen);
+
+if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) {
+goto next;
+}
+
 addr = le64_to_cpu(segment[i].addr);
 
 if (UINT64_MAX - addr < dlen) {
@@ -418,6 +428,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 return status;
 }
 
+next:
 *len -= trans_len;
 }
 
@@ -488,7 +499,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 seg_len = le32_to_cpu(sgld->len);
 
 /* check the length of the (Last) Segment descriptor */
-if (!seg_len || seg_len & 0xf) {
+if ((!seg_len || seg_len & 0xf) &&
+(NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) {
 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
 }
 
@@ -525,19 +537,27 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 
 last_sgld = &segment[nsgld - 1];
 
-/* if the segment ends with a Data Block, then we are done */
-if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
+/*
+ * If the segment ends with a Data Block or Bit Bucket Descriptor Type,
+ * then we are done.
+ */
+switch (NVME_SGL_TYPE(last_sgld->type)) {
+case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
 status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
 if (status) {
 goto unmap;
 }
 
 goto out;
+
+default:
+break;
 }
 
 /*
- * If the last descriptor was not a Data Block, then the current
- * segment must not be a Last Segment.
+ * If the last descriptor was not a Data Block or Bit Bucket, then the
+ * current segment must not be a Last Segment.
  */
 if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
 status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
@@ -2729,7 +2749,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES);
 
-id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT);
+id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT |
+   NVME_CTRL_SGLS_BITBUCKET);
 
 pstrcpy((char *) id->subnqn, sizeof(id->subnqn), "nqn.2019-08.org.qemu:");
 pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
-- 
2.27.0

RE: [PATCH v2 00/18] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2020-06-29 Thread Dmitry Fomichev

Bump... Any feedback on this series?

> -Original Message-
> From: Dmitry Fomichev 
> Sent: Wednesday, June 17, 2020 5:34 PM
> To: Kevin Wolf ; Keith Busch ;
> Philippe Mathieu-Daudé ; Maxim Levitsky
> 
> Cc: qemu-bl...@nongnu.org; qemu-devel@nongnu.org; Matias Bjorling
> ; Damien Le Moal ;
> Niklas Cassel ; Dmitry Fomichev
> 
> Subject: [PATCH v2 00/18] hw/block/nvme: Support Namespace Types and
> Zoned Namespace Command Set
> 
> v2: rebased on top of block-next/block branch
> 
> Zoned Namespace (ZNS) Command Set is a newly introduced command set
> published by the NVM Express, Inc. organization as TP 4053. The main
> design goals of ZNS are to provide hardware designers the means to
> reduce NVMe controller complexity and to allow achieving a better I/O
> latency and throughput. SSDs that implement this interface are
> commonly known as ZNS SSDs.
> 
> This command set is implementing a zoned storage model, similarly to
> ZAC/ZBC. As such, there is already support in Linux, allowing one to
> perform the majority of tasks needed for managing ZNS SSDs.
> 
> The Zoned Namespace Command Set relies on another TP, known as
> Namespace Types (NVMe TP 4056), which introduces support for having
> multiple command sets per namespace.
> 
> Both ZNS and Namespace Types specifications can be downloaded by
> visiting the following link -
> 
> https://nvmexpress.org/wp-content/uploads/NVM-Express-1.4-Ratified-
> TPs.zip
> 
> This patch series adds Namespace Types support and zoned namespace
> emulation capability to the existing NVMe PCI driver.
> 
> The patchset is organized as follows -
> 
> The first several patches are preparatory and are added to allow for
> an easier review of the subsequent commits. The group of patches that
> follows adds NS Types support with only NVM Command Set being
> available. Finally, the last group of commits makes definitions and
> adds new code to support Zoned Namespace Command Set.
> 
> Based-on: <20200609205944.3549240-1-ebl...@redhat.com>
> 
> Ajay Joshi (1):
>   hw/block/nvme: Define 64 bit cqe.result
> 
> Dmitry Fomichev (15):
>   hw/block/nvme: Move NvmeRequest has_sg field to a bit flag
>   hw/block/nvme: Clean up unused AER definitions
>   hw/block/nvme: Add Commands Supported and Effects log
>   hw/block/nvme: Define trace events related to NS Types
>   hw/block/nvme: Make Zoned NS Command Set definitions
>   hw/block/nvme: Define Zoned NS Command Set trace events
>   hw/block/nvme: Support Zoned Namespace Command Set
>   hw/block/nvme: Introduce max active and open zone limits
>   hw/block/nvme: Simulate Zone Active excursions
>   hw/block/nvme: Set Finish/Reset Zone Recommended attributes
>   hw/block/nvme: Generate zone AENs
>   hw/block/nvme: Support Zone Descriptor Extensions
>   hw/block/nvme: Add injection of Offline/Read-Only zones
>   hw/block/nvme: Use zone metadata file for persistence
>   hw/block/nvme: Document zoned parameters in usage text
> 
> Niklas Cassel (2):
>   hw/block/nvme: Introduce the Namespace Types definitions
>   hw/block/nvme: Add support for Namespace Types
> 
>  block/nvme.c  |2 +-
>  block/trace-events|2 +-
>  hw/block/nvme.c   | 2316
> -
>  hw/block/nvme.h   |  228 +++-
>  hw/block/trace-events |   56 +
>  include/block/nvme.h  |  282 -
>  6 files changed, 2820 insertions(+), 66 deletions(-)
> 
> --
> 2.21.0

Re: Building in Solaris 11.4

2020-06-29 Thread Michele Denber


On 06-29-2020 8:12 AM, Thomas Huth wrote:

...
It's not the same bug as last year, but a new one: Seems like newer
versions of Solaris now have this functions in their libraries!

Yes - I just checked. Solaris 10 does not have openpty, but Solaris 11.4
indeed does have it

So what you want is something like this (completely untested):


So just to make sure I have this right - I save that code into a patch
file and then apply it where?  At the qemu-5.0.0 level?  Then run
configure again?

Right now I'm looking at that "struct dirent" problem in gmake I
mentioned a few days ago.  I will try the Haiku patch offered by PMM.

- MIchele

[PATCH 2/2] hw/block/nvme: handle dma errors

From: Klaus Jensen 

Handling DMA errors gracefully is required for the device to pass the
block/011 test ("disable PCI device while doing I/O") in the blktests
suite.

With this patch the device passes the test by retrying "critical"
transfers (posting of completion entries and processing of submission
queue entries).

If DMA errors occur at any other point in the execution of the command
(say, while mapping the PRPs), the command is aborted with a Data
Transfer Error status code.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 43 ---
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  2 +-
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index fa0f8e802d9b..94f5bf2a815f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -89,14 +89,14 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 return addr >= low && addr < hi;
 }
 
-static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
-return;
+return 0;
 }
 
-pci_dma_read(&n->parent_obj, addr, buf, size);
+return pci_dma_read(&n->parent_obj, addr, buf, size);
 }
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
@@ -202,7 +202,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector 
*iov, hwaddr addr,
 trace_pci_nvme_map_addr_cmb(addr, len);
 
 if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
-return NVME_DATA_TRAS_ERROR;
+return NVME_DATA_TRANSFER_ERROR;
 }
 
 qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
@@ -257,6 +257,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 int num_prps = (len >> n->page_bits) + 1;
 uint16_t status;
 bool prp_list_in_cmb = false;
+int ret;
 
 trace_pci_nvme_map_prp(nvme_cid(req), trans_len, len, prp1, prp2,
num_prps);
@@ -295,7 +296,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+if (ret) {
+trace_pci_nvme_err_addr_read(prp2);
+return NVME_DATA_TRANSFER_ERROR;
+}
 while (len != 0) {
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
@@ -312,8 +317,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp_ent, (void *)prp_list,
-prp_trans);
+ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
+ prp_trans);
+if (ret) {
+trace_pci_nvme_err_addr_read(prp_ent);
+return NVME_DATA_TRANSFER_ERROR;
+}
 prp_ent = le64_to_cpu(prp_list[i]);
 }
 
@@ -487,6 +496,7 @@ static void nvme_post_cqes(void *opaque)
 NvmeCQueue *cq = opaque;
 NvmeCtrl *n = cq->ctrl;
 NvmeRequest *req, *next;
+int ret;
 
 QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
 NvmeSQueue *sq;
@@ -496,15 +506,21 @@ static void nvme_post_cqes(void *opaque)
 break;
 }
 
-QTAILQ_REMOVE(&cq->req_list, req, entry);
 sq = req->sq;
 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
 addr = cq->dma_addr + cq->tail * n->cqe_size;
+ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+sizeof(req->cqe));
+if (ret) {
+trace_pci_nvme_err_addr_write(addr);
+timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+  500 * SCALE_MS);
+break;
+}
+QTAILQ_REMOVE(&cq->req_list, req, entry);
 nvme_inc_cq_tail(cq);
-pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
-sizeof(req->cqe));
 nvme_req_clear(req);
 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
 }
@@ -1753,7 +1769,12 @@ static void nvme_process_sq(void *opaque)
 
 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
 addr = sq->dma_addr + sq->head * n-

[PATCH 1/2] pci: pass along the return value of dma_memory_rw

From: Klaus Jensen 

Some devices might want to know the return value of dma_memory_rw, so
pass it along instead of ignoring it.

There are no existing users of the return value, so this patch should be
safe.

Signed-off-by: Klaus Jensen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Acked-by: Keith Busch 
---
 include/hw/pci/pci.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index a4e9c3341615..2347dc36bfb5 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -786,8 +786,7 @@ static inline AddressSpace *pci_get_address_space(PCIDevice 
*dev)
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
  void *buf, dma_addr_t len, DMADirection dir)
 {
-dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
-return 0;
+return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
 }
 
 static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
-- 
2.27.0

[PATCH 0/2] hw/block/nvme: handle transient dma errors

From: Klaus Jensen 

QEMU actually respects that Bus Master Enabling for a PCI device gets
flipped, so in order to succesfully pass the block/011 test ("disable
PCI device while doing I/O") the nvme device needs to know if a dma
transfer was successful or not.

Based-on: <20200629195017.1217056-1-...@irrelevant.dk>
("[PATCH 00/17] hw/block/nvme: AIO and address mapping refactoring")

Klaus Jensen (2):
  pci: pass along the return value of dma_memory_rw
  hw/block/nvme: handle dma errors

 hw/block/nvme.c   | 43 ---
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  2 +-
 include/hw/pci/pci.h  |  3 +--
 4 files changed, 36 insertions(+), 14 deletions(-)

-- 
2.27.0

[Bug 1884990] Re: Cirrus graphics results in monochrome colour depth at 640x480 resolution

Yes, the maintainer will likely get the submodule updated before the
next release.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1884990

Title:
  Cirrus graphics results in monochrome colour depth at 640x480
  resolution

Status in QEMU:
  New

Bug description:
  Recently we upgraded to a distribution that bundled QEMU 4.2.0.  We
  were previously running on QEMU 3.0.0.  When booting Windows 10 VMs on
  x86_64, users experienced slow, monochrome graphics and the resolution
  was restricted to 640x480.  Reverting to the prior vgabios-cirrus.bin
  from the prior source tarball remediated the issue.

  An example QEMU command line is below, if needed:
  /bin/qemu-system-x86_64 -vnc 0.0.0.0:100 -device 
cirrus-vga,id=video0,bus=pci.0,addr=0x2  -machine 
pc-i440fx-4.2,accel=kvm,usb=off,dump-guest-core=off -cpu qemu64 -m 2048 
-overcommit mem-lock=off -smp 1,sockets=1,cores=1,threads=1 -no-user-config 
-nodefaults -hda test.raw &

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1884990/+subscriptions

Re: [Bug 1878645] Re: null-ptr dereference in tcg_handle_interrupt

2020-06-29 Thread Alexander Bulekov

On 200629 2000, Alex Bennée wrote:
> 
> Alexander Bulekov <1878...@bugs.launchpad.net> writes:
> 
> > I don't think this is a qtest-specific error: 
> > cat << EOF| qemu-system-i386 -M q35 -nographic -serial none -monitor stdio
> > o/4 0xcf8 0x8400f841
> > o/4 0xcfc 0xaa215d6d
> > o/4 0x6d30 0x2ef8ffbe
> > o/1 0xb2 0x20
> > EOF
> >
> > ...
> > Segmentation fault
> 
> Both this and the qtest have the same problem of depending on
> current_cpu which is a TLS variable which will never be correct from the
> qtest or monitor context. There are only a few other cases.

Ah that makes sense. It probably isn't a real issue, but I'll send
patches with the changes you suggested below.
Thank you

> sun4m:cpu_halt_signal does:
> 
> if (level && current_cpu) {
> cpu_interrupt(current_cpu, CPU_INTERRUPT_HALT);
> }
> 
> pxa2xx:pxa2xx_pwrmode_write does a bare:
> 
> /* Suspend */
> cpu_interrupt(current_cpu, CPU_INTERRUPT_HALT);
> 
> but given the context has a CPUARMState *env it could arguably use that
> to derive current_cpu but as it's only triggered by a system register
> write you can't actually trigger from a monitor/qtest command.
> 
> I would suggest either:
> 
> } else if (current_cpu) {
> cpu_interrupt(current_cpu, CPU_INTERRUPT_SMI);
> }
> 
> or possibly:
> 
> } else {
> cpu_interrupt(current_cpu ? current_cpu : first_cpu, 
> CPU_INTERRUPT_SMI);
> }
> 
> if you really care about triggering a real IRQ from outside the CPU context.
> 
> -- 
> Alex Bennée
>

[PATCH] hw/ppc/ppc4xx: Only accept (combination of) pow2 DDR sizes

Use popcount instruction to count the number of bits set in
the RAM size. Allow at most 1 bit for each bank. This avoid
using invalid hardware configurations.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/ppc/ppc4xx_devs.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index f1651e04d9..c2484a5695 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -687,6 +687,15 @@ void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
 int i;
 int j;
 
+if (ctpop64(size_left) > nr_banks) {
+if (nr_banks) {
+error_report("RAM size must be a power of 2");
+} else {
+error_report("RAM size must be the combination of %d powers of 2",
+ nr_banks);
+}
+exit(1);
+}
 for (i = 0; i < nr_banks; i++) {
 for (j = 0; sdram_bank_sizes[j] != 0; j++) {
 bank_size = sdram_bank_sizes[j];
-- 
2.21.3

[PATCH 12/17] hw/block/nvme: refactor NvmeRequest clearing

From: Klaus Jensen 

Move clearing of the structure from "clear before use" to "clear after
use". Also, carry a reference to the namespace the (I/O) command is
acting on instead of passing it around explicitly.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 28 
 hw/block/nvme.h |  1 +
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 12f1b6331c43..3d38f61b61e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -174,6 +174,12 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 }
 }
 
+static void nvme_req_clear(NvmeRequest *req)
+{
+req->ns = NULL;
+memset(&req->cqe, 0x0, sizeof(req->cqe));
+}
+
 static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
   size_t len)
 {
@@ -423,6 +429,7 @@ static void nvme_post_cqes(void *opaque)
 nvme_inc_cq_tail(cq);
 pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
 sizeof(req->cqe));
+nvme_req_clear(req);
 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
 }
 if (cq->tail != cq->head) {
@@ -569,8 +576,7 @@ static void nvme_rw_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(cq, req);
 }
 
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-NvmeRequest *req)
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
  BLOCK_ACCT_FLUSH);
@@ -579,10 +585,10 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace 
*ns, NvmeCmd *cmd,
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-NvmeRequest *req)
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+NvmeNamespace *ns = req->ns;
 const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
 uint64_t slba = le64_to_cpu(rw->slba);
@@ -606,10 +612,10 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, 
NvmeNamespace *ns, NvmeCmd *cmd,
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-NvmeRequest *req)
+static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+NvmeNamespace *ns = req->ns;
 uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
 
@@ -665,7 +671,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
-NvmeNamespace *ns;
 uint32_t nsid = le32_to_cpu(cmd->nsid);
 
 trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
@@ -675,15 +680,15 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 return NVME_INVALID_NSID | NVME_DNR;
 }
 
-ns = &n->namespaces[nsid - 1];
+req->ns = &n->namespaces[nsid - 1];
 switch (cmd->opcode) {
 case NVME_CMD_FLUSH:
-return nvme_flush(n, ns, cmd, req);
+return nvme_flush(n, cmd, req);
 case NVME_CMD_WRITE_ZEROES:
-return nvme_write_zeroes(n, ns, cmd, req);
+return nvme_write_zeroes(n, cmd, req);
 case NVME_CMD_WRITE:
 case NVME_CMD_READ:
-return nvme_rw(n, ns, cmd, req);
+return nvme_rw(n, cmd, req);
 default:
 trace_pci_nvme_err_invalid_opc(cmd->opcode);
 return NVME_INVALID_OPCODE | NVME_DNR;
@@ -1554,7 +1559,6 @@ static void nvme_process_sq(void *opaque)
 req = QTAILQ_FIRST(&sq->req_list);
 QTAILQ_REMOVE(&sq->req_list, req, entry);
 QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
-memset(&req->cqe, 0, sizeof(req->cqe));
 req->cqe.cid = cmd.cid;
 
 status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index b5ce51c6e84b..6eaafd2e35f5 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -21,6 +21,7 @@ typedef struct NvmeAsyncEvent {
 
 typedef struct NvmeRequest {
 struct NvmeSQueue   *sq;
+struct NvmeNamespace*ns;
 BlockAIOCB  *aiocb;
 uint16_tstatus;
 NvmeCqe cqe;
-- 
2.27.0

[PATCH 15/17] hw/block/nvme: allow multiple aios per command

From: Klaus Jensen 

This refactors how the device issues asynchronous block backend
requests. The NvmeRequest now holds a queue of NvmeAIOs that are
associated with the command. This allows multiple aios to be issued for
a command. Only when all requests have been completed will the device
post a completion queue entry.

Because the device is currently guaranteed to only issue a single aio
request per command, the benefit is not immediately obvious. But this
functionality is required to support metadata, the dataset management
command as well as zoned namespaces and other features that require
additional persistent state.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
---
 hw/block/nvme.c   | 330 +-
 hw/block/nvme.h   | 104 +++--
 hw/block/trace-events |   3 +
 3 files changed, 360 insertions(+), 77 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 3309f8e0eac1..d836319f068a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -74,6 +74,7 @@
 } while (0)
 
 static void nvme_process_sq(void *opaque);
+static void nvme_aio_cb(void *opaque, int ret);
 
 static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
 {
@@ -178,6 +179,9 @@ static void nvme_req_clear(NvmeRequest *req)
 {
 req->ns = NULL;
 memset(&req->cqe, 0x0, sizeof(req->cqe));
+req->status = NVME_SUCCESS;
+req->slba = req->nlb = 0x0;
+req->cb = req->cb_arg = NULL;
 
 if (req->qsg.sg) {
 qemu_sglist_destroy(&req->qsg);
@@ -399,6 +403,91 @@ static uint16_t nvme_map(NvmeCtrl *n, size_t len, 
NvmeRequest *req)
 return nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
 }
 
+static void nvme_aio_destroy(NvmeAIO *aio)
+{
+g_free(aio);
+}
+
+/*
+ * Submit an asynchronous I/O operation as described by the given NvmeAIO. This
+ * function takes care of accounting and special handling of reads and writes
+ * going to the Controller Memory Buffer.
+ */
+static void nvme_submit_aio(NvmeAIO *aio)
+{
+BlockBackend *blk = aio->blk;
+BlockAcctCookie *acct = &aio->acct;
+BlockAcctStats *stats = blk_get_stats(blk);
+
+bool is_write;
+
+switch (aio->opc) {
+case NVME_AIO_OPC_NONE:
+break;
+
+case NVME_AIO_OPC_FLUSH:
+block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
+aio->aiocb = blk_aio_flush(blk, nvme_aio_cb, aio);
+break;
+
+case NVME_AIO_OPC_WRITE_ZEROES:
+block_acct_start(stats, acct, aio->len, BLOCK_ACCT_WRITE);
+aio->aiocb = blk_aio_pwrite_zeroes(blk, aio->offset, aio->len,
+   BDRV_REQ_MAY_UNMAP, nvme_aio_cb,
+   aio);
+break;
+
+case NVME_AIO_OPC_READ:
+case NVME_AIO_OPC_WRITE:
+is_write = (aio->opc == NVME_AIO_OPC_WRITE);
+
+block_acct_start(stats, acct, aio->len,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+if (aio->flags & NVME_AIO_DMA) {
+QEMUSGList *qsg = (QEMUSGList *)aio->payload;
+
+if (is_write) {
+aio->aiocb = dma_blk_write(blk, qsg, aio->offset,
+   BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
+} else {
+aio->aiocb = dma_blk_read(blk, qsg, aio->offset,
+  BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
+}
+} else {
+QEMUIOVector *iov = (QEMUIOVector *)aio->payload;
+
+if (is_write) {
+aio->aiocb = blk_aio_pwritev(blk, aio->offset, iov, 0,
+ nvme_aio_cb, aio);
+} else {
+aio->aiocb = blk_aio_preadv(blk, aio->offset, iov, 0,
+nvme_aio_cb, aio);
+}
+}
+
+break;
+}
+}
+
+/*
+ * Register an asynchronous I/O operation with the NvmeRequest. The NvmeRequest
+ * will not complete until all registered AIO's have completed and the
+ * aio_tailq goes empty.
+ */
+static inline void nvme_req_add_aio(NvmeRequest *req, NvmeAIO *aio)
+{
+assert(req);
+
+trace_pci_nvme_req_add_aio(nvme_cid(req), aio, blk_name(aio->blk),
+   aio->offset, aio->len,
+   nvme_aio_opc_str(aio), req);
+
+QTAILQ_INSERT_TAIL(&req->aio_tailq, aio, tailq_entry);
+
+nvme_submit_aio(aio);
+}
+
 static void nvme_post_cqes(void *opaque)
 {
 NvmeCQueue *cq = opaque;
@@ -435,6 +524,7 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, 
NvmeRequest *req)
 assert(cq->cqid == req->sq->cqid);
 trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
   req->status);
+
 QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
 QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
 timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +

[PATCH 16/17] hw/block/nvme: add nvme_check_rw helper

From: Klaus Jensen 

Move various request checks to a separate function.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 32 +++-
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d836319f068a..ec08841f74b6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -632,6 +632,28 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, 
NvmeNamespace *ns,
 return NVME_SUCCESS;
 }
 
+static uint16_t nvme_check_rw(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeNamespace *ns = req->ns;
+size_t len = req->nlb << nvme_ns_lbads(ns);
+uint16_t status;
+
+status = nvme_check_mdts(n, len);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), len);
+return status;
+}
+
+status = nvme_check_bounds(n, ns, req->slba, req->nlb);
+if (status) {
+trace_pci_nvme_err_invalid_lba_range(req->slba, req->nlb,
+ ns->id_ns.nsze);
+return status;
+}
+
+return NVME_SUCCESS;
+}
+
 static void nvme_rw_cb(NvmeRequest *req, void *opaque)
 {
 NvmeSQueue *sq = req->sq;
@@ -822,16 +844,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 trace_pci_nvme_rw(nvme_req_is_write(req) ? "write" : "read", req->nlb,
   len, req->slba);
 
-status = nvme_check_mdts(n, len);
+status = nvme_check_rw(n, req);
 if (status) {
-trace_pci_nvme_err_mdts(nvme_cid(req), len);
-goto invalid;
-}
-
-status = nvme_check_bounds(n, ns, req->slba, req->nlb);
-if (status) {
-trace_pci_nvme_err_invalid_lba_range(req->slba, req->nlb,
- ns->id_ns.nsze);
 goto invalid;
 }
 
-- 
2.27.0

[PATCH 17/17] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp

From: Klaus Jensen 

Since clean up of the request qsg/iov is now always done post-use, there
is no need to use a stack-allocated qsg/iov in nvme_dma_prp.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ec08841f74b6..fa0f8e802d9b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -350,45 +350,39 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
uint32_t len,
  uint64_t prp1, uint64_t prp2, DMADirection dir,
  NvmeRequest *req)
 {
-QEMUSGList qsg;
-QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;
 
-status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
+status = nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
 if (status) {
 return status;
 }
 
-if (qsg.nsg > 0) {
+if (req->qsg.nsg > 0) {
 uint64_t residual;
 
 if (dir == DMA_DIRECTION_TO_DEVICE) {
-residual = dma_buf_write(ptr, len, &qsg);
+residual = dma_buf_write(ptr, len, &req->qsg);
 } else {
-residual = dma_buf_read(ptr, len, &qsg);
+residual = dma_buf_read(ptr, len, &req->qsg);
 }
 
 if (unlikely(residual)) {
 trace_pci_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
-
-qemu_sglist_destroy(&qsg);
 } else {
 size_t bytes;
 
 if (dir == DMA_DIRECTION_TO_DEVICE) {
-bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
+bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
 } else {
-bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
+bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
 }
 
 if (unlikely(bytes != len)) {
 trace_pci_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
-
-qemu_iovec_destroy(&iov);
 }
 
 return status;
-- 
2.27.0

Re: [PATCH] target/m68k: fix physical address translation in m68k_cpu_get_phys_page_debug()

2020-06-29 Thread Laurent Vivier

Le 29/06/2020 à 19:56, Philippe Mathieu-Daudé a écrit :
> On 6/29/20 6:26 PM, Mark Cave-Ayland wrote:
>> The result of the get_physical_address() function should be combined with the
>> offset of the original page access before being returned. Otherwise the
>> m68k_cpu_get_phys_page_debug() function can round to the wrong page causing
>> incorrect lookups in gdbstub and various "Disassembler disagrees with
>> translator over instruction decoding" warnings to appear at translation time.
>>
>> Fixes: 88b2fef6c3 ("target/m68k: add MC68040 MMU")
>> Signed-off-by: Mark Cave-Ayland 
>> ---
>>  target/m68k/helper.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/target/m68k/helper.c b/target/m68k/helper.c
>> index 79b0b10ea9..631eab7774 100644
>> --- a/target/m68k/helper.c
>> +++ b/target/m68k/helper.c
>> @@ -820,10 +820,14 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, 
>> vaddr addr)
>>  if (env->sr & SR_S) {
>>  access_type |= ACCESS_SUPER;
>>  }
>> +
>>  if (get_physical_address(env, &phys_addr, &prot,
>>   addr, access_type, &page_size) != 0) {
>>  return -1;
>>  }
>> +
>> +addr &= TARGET_PAGE_MASK;
>> +phys_addr += addr & (page_size - 1);
> 
> Correct but all the callers do the same, maybe this can somehow
> be moved to the callee? Anyway:

I agree.

Anyway:
Reviewed-by: Laurent Vivier 

Thanks,
Laurent

[PATCH 14/17] hw/block/nvme: remove NvmeCmd parameter

From: Klaus Jensen 

Keep a copy of the raw nvme command in the NvmeRequest and remove the
now redundant NvmeCmd parameter.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 177 +---
 hw/block/nvme.h |   1 +
 2 files changed, 93 insertions(+), 85 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index c6c2c4670f7d..3309f8e0eac1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -390,9 +390,9 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
uint32_t len,
 return status;
 }
 
-static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
- NvmeRequest *req)
+static uint16_t nvme_map(NvmeCtrl *n, size_t len, NvmeRequest *req)
 {
+NvmeCmd *cmd = &req->cmd;
 uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
 uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
@@ -562,7 +562,7 @@ static void nvme_rw_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(cq, req);
 }
 
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
 {
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
  BLOCK_ACCT_FLUSH);
@@ -571,9 +571,9 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
 {
-NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
 const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
@@ -598,9 +598,9 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd 
*cmd, NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 {
-NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
 uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
@@ -629,7 +629,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 return status;
 }
 
-if (nvme_map(n, cmd, data_size, req)) {
+if (nvme_map(n, data_size, req)) {
 block_acct_invalid(blk_get_stats(n->conf.blk), acct);
 return NVME_INVALID_FIELD | NVME_DNR;
 }
@@ -655,11 +655,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
-uint32_t nsid = le32_to_cpu(cmd->nsid);
+uint32_t nsid = le32_to_cpu(req->cmd.nsid);
 
-trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
+trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
+  req->cmd.opcode);
 
 if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
 trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
@@ -667,16 +668,16 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 }
 
 req->ns = &n->namespaces[nsid - 1];
-switch (cmd->opcode) {
+switch (req->cmd.opcode) {
 case NVME_CMD_FLUSH:
-return nvme_flush(n, cmd, req);
+return nvme_flush(n, req);
 case NVME_CMD_WRITE_ZEROES:
-return nvme_write_zeroes(n, cmd, req);
+return nvme_write_zeroes(n, req);
 case NVME_CMD_WRITE:
 case NVME_CMD_READ:
-return nvme_rw(n, cmd, req);
+return nvme_rw(n, req);
 default:
-trace_pci_nvme_err_invalid_opc(cmd->opcode);
+trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
 return NVME_INVALID_OPCODE | NVME_DNR;
 }
 }
@@ -692,10 +693,10 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
 }
 }
 
-static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
 {
-NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
-NvmeRequest *req, *next;
+NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
+NvmeRequest *r, *next;
 NvmeSQueue *sq;
 NvmeCQueue *cq;
 uint16_t qid = le16_to_cpu(c->qid);
@@ -709,19 +710,19 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
 
 sq = n->sq[qid];
 while (!QTAILQ_EMPTY(&sq->out_req_list)) {
-req = QTAILQ_FIRST(&sq->out_req_list);
-assert(req->aiocb);
-blk_aio_cancel(req->aiocb);
+r = QTAILQ_FIRST(&sq->out_req_list);
+assert(r->aiocb);
+blk_aio_cancel(r->aiocb);
 }
 if (!nvme_check_cqid(n, sq->cqid)) {
 cq = n->cq[sq->cqid];
 QTAILQ_REMOVE(&cq->sq_list, sq, entry);
 
 nvme_post_cqes(cq);
-QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
-i

[PATCH 09/17] hw/block/nvme: refactor request bounds checking

From: Klaus Jensen 

Hoist bounds checking into its own function.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 55b305458152..7cd37ec91823 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -518,6 +518,18 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t 
event_type)
 }
 }
 
+static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
+ uint64_t slba, uint32_t nlb)
+{
+uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
+
+if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
+return NVME_LBA_RANGE | NVME_DNR;
+}
+
+return NVME_SUCCESS;
+}
+
 static void nvme_rw_cb(void *opaque, int ret)
 {
 NvmeRequest *req = opaque;
@@ -565,12 +577,14 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, 
NvmeNamespace *ns, NvmeCmd *cmd,
 uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
 uint64_t offset = slba << data_shift;
 uint32_t count = nlb << data_shift;
+uint16_t status;
 
 trace_pci_nvme_write_zeroes(nvme_cid(req), slba, nlb);
 
-if (unlikely(slba + nlb > ns->id_ns.nsze)) {
+status = nvme_check_bounds(n, ns, slba, nlb);
+if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-return NVME_LBA_RANGE | NVME_DNR;
+return status;
 }
 
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
@@ -593,13 +607,15 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 uint64_t data_offset = slba << data_shift;
 int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
 enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
+uint16_t status;
 
 trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
 
-if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
-block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+status = nvme_check_bounds(n, ns, slba, nlb);
+if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-return NVME_LBA_RANGE | NVME_DNR;
+block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+return status;
 }
 
 if (nvme_map(n, cmd, data_size, req)) {
-- 
2.27.0

[PATCH 13/17] hw/block/nvme: consolidate qsg/iov clearing

From: Klaus Jensen 

Always destroy the request qsg/iov at the end of request use.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 48 +---
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 3d38f61b61e5..c6c2c4670f7d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -178,6 +178,14 @@ static void nvme_req_clear(NvmeRequest *req)
 {
 req->ns = NULL;
 memset(&req->cqe, 0x0, sizeof(req->cqe));
+
+if (req->qsg.sg) {
+qemu_sglist_destroy(&req->qsg);
+}
+
+if (req->iov.iov) {
+qemu_iovec_destroy(&req->iov);
+}
 }
 
 static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
@@ -262,15 +270,14 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 
 status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
 if (status) {
-goto unmap;
+return status;
 }
 
 len -= trans_len;
 if (len) {
 if (unlikely(!prp2)) {
 trace_pci_nvme_err_invalid_prp2_missing();
-status = NVME_INVALID_FIELD | NVME_DNR;
-goto unmap;
+return NVME_INVALID_FIELD | NVME_DNR;
 }
 
 if (len > n->page_size) {
@@ -291,13 +298,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 if (i == n->max_prp_ents - 1 && len > n->page_size) {
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-status = NVME_INVALID_FIELD | NVME_DNR;
-goto unmap;
+return NVME_INVALID_FIELD | NVME_DNR;
 }
 
 if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
-status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
-goto unmap;
+return NVME_INVALID_USE_OF_CMB | NVME_DNR;
 }
 
 i = 0;
@@ -310,14 +315,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-status = NVME_INVALID_FIELD | NVME_DNR;
-goto unmap;
+return NVME_INVALID_FIELD | NVME_DNR;
 }
 
 trans_len = MIN(len, n->page_size);
 status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
 if (status) {
-goto unmap;
+return status;
 }
 
 len -= trans_len;
@@ -326,27 +330,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 } else {
 if (unlikely(prp2 & (n->page_size - 1))) {
 trace_pci_nvme_err_invalid_prp2_align(prp2);
-status = NVME_INVALID_FIELD | NVME_DNR;
-goto unmap;
+return NVME_INVALID_FIELD | NVME_DNR;
 }
 status = nvme_map_addr(n, qsg, iov, prp2, len);
 if (status) {
-goto unmap;
+return status;
 }
 }
 }
+
 return NVME_SUCCESS;
-
-unmap:
-if (iov && iov->iov) {
-qemu_iovec_destroy(iov);
-}
-
-if (qsg && qsg->sg) {
-qemu_sglist_destroy(qsg);
-}
-
-return status;
 }
 
 static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
@@ -566,13 +559,6 @@ static void nvme_rw_cb(void *opaque, int ret)
 req->status = NVME_INTERNAL_DEV_ERROR;
 }
 
-if (req->qsg.nalloc) {
-qemu_sglist_destroy(&req->qsg);
-}
-if (req->iov.nalloc) {
-qemu_iovec_destroy(&req->iov);
-}
-
 nvme_enqueue_req_completion(cq, req);
 }
 
-- 
2.27.0

[PATCH 11/17] hw/block/nvme: be consistent about zeros vs zeroes

From: Klaus Jensen 

The NVM Express specification generally uses 'zeroes' and not 'zeros'.
It might very well be wrong, but let us align with it.

Signed-off-by: Klaus Jensen 
---
 block/nvme.c | 4 ++--
 hw/block/nvme.c  | 8 
 include/block/nvme.h | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 29e90557c428..bee0878dec71 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -465,7 +465,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
   s->page_size / sizeof(uint64_t) * s->page_size);
 
 oncs = le16_to_cpu(idctrl->oncs);
-s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS);
+s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
 s->supports_discard = !!(oncs & NVME_ONCS_DSM);
 
 memset(resp, 0, 4096);
@@ -1117,7 +1117,7 @@ static coroutine_fn int 
nvme_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 NvmeCmd cmd = {
-.opcode = NVME_CMD_WRITE_ZEROS,
+.opcode = NVME_CMD_WRITE_ZEROES,
 .nsid = cpu_to_le32(s->nsid),
 .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0x),
 .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0x),
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d5dff6869b69..12f1b6331c43 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -579,7 +579,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
@@ -679,8 +679,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 switch (cmd->opcode) {
 case NVME_CMD_FLUSH:
 return nvme_flush(n, ns, cmd, req);
-case NVME_CMD_WRITE_ZEROS:
-return nvme_write_zeros(n, ns, cmd, req);
+case NVME_CMD_WRITE_ZEROES:
+return nvme_write_zeroes(n, ns, cmd, req);
 case NVME_CMD_WRITE:
 case NVME_CMD_READ:
 return nvme_rw(n, ns, cmd, req);
@@ -2280,7 +2280,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->sqes = (0x6 << 4) | 0x6;
 id->cqes = (0x4 << 4) | 0x4;
 id->nn = cpu_to_le32(n->num_namespaces);
-id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP |
+id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES);
 
 pstrcpy((char *) id->subnqn, sizeof(id->subnqn), "nqn.2019-08.org.qemu:");
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 60833039a6c5..91456255ffa7 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -459,7 +459,7 @@ enum NvmeIoCommands {
 NVME_CMD_READ   = 0x02,
 NVME_CMD_WRITE_UNCOR= 0x04,
 NVME_CMD_COMPARE= 0x05,
-NVME_CMD_WRITE_ZEROS= 0x08,
+NVME_CMD_WRITE_ZEROES   = 0x08,
 NVME_CMD_DSM= 0x09,
 };
 
@@ -837,7 +837,7 @@ enum NvmeIdCtrlOncs {
 NVME_ONCS_COMPARE   = 1 << 0,
 NVME_ONCS_WRITE_UNCORR  = 1 << 1,
 NVME_ONCS_DSM   = 1 << 2,
-NVME_ONCS_WRITE_ZEROS   = 1 << 3,
+NVME_ONCS_WRITE_ZEROES  = 1 << 3,
 NVME_ONCS_FEATURES  = 1 << 4,
 NVME_ONCS_RESRVATIONS   = 1 << 5,
 NVME_ONCS_TIMESTAMP = 1 << 6,
-- 
2.27.0

[PATCH 05/17] hw/block/nvme: refactor dma read/write

From: Klaus Jensen 

Refactor the nvme_dma_{read,write}_prp functions into a common function
taking a DMADirection parameter.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 88 -
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ded78a2301a6..86d35547b752 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -326,55 +326,50 @@ unmap:
 return status;
 }
 
-static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-   uint64_t prp1, uint64_t prp2)
+static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ uint64_t prp1, uint64_t prp2, DMADirection dir)
 {
 QEMUSGList qsg;
 QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;
 
-if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
-return NVME_INVALID_FIELD | NVME_DNR;
+status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
+if (status) {
+return status;
 }
+
 if (qsg.nsg > 0) {
-if (dma_buf_write(ptr, len, &qsg)) {
-status = NVME_INVALID_FIELD | NVME_DNR;
+uint64_t residual;
+
+if (dir == DMA_DIRECTION_TO_DEVICE) {
+residual = dma_buf_write(ptr, len, &qsg);
+} else {
+residual = dma_buf_read(ptr, len, &qsg);
 }
-qemu_sglist_destroy(&qsg);
-} else {
-if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
-status = NVME_INVALID_FIELD | NVME_DNR;
-}
-qemu_iovec_destroy(&iov);
-}
-return status;
-}
 
-static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-uint64_t prp1, uint64_t prp2)
-{
-QEMUSGList qsg;
-QEMUIOVector iov;
-uint16_t status = NVME_SUCCESS;
-
-trace_pci_nvme_dma_read(prp1, prp2);
-
-if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
-return NVME_INVALID_FIELD | NVME_DNR;
-}
-if (qsg.nsg > 0) {
-if (unlikely(dma_buf_read(ptr, len, &qsg))) {
+if (unlikely(residual)) {
 trace_pci_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
+
 qemu_sglist_destroy(&qsg);
 } else {
-if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) {
+size_t bytes;
+
+if (dir == DMA_DIRECTION_TO_DEVICE) {
+bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
+} else {
+bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
+}
+
+if (unlikely(bytes != len)) {
 trace_pci_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
+
 qemu_iovec_destroy(&iov);
 }
+
 return status;
 }
 
@@ -806,8 +801,8 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint8_t rae,
 nvme_clear_events(n, NVME_AER_TYPE_SMART);
 }
 
-return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
- prp2);
+return nvme_dma_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2,
+DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
@@ -828,8 +823,8 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint32_t buf_len,
 
 trans_len = MIN(sizeof(fw_log) - off, buf_len);
 
-return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
- prp2);
+return nvme_dma_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, prp2,
+DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
@@ -853,7 +848,8 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint8_t rae,
 
 trans_len = MIN(sizeof(errlog) - off, buf_len);
 
-return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
+return nvme_dma_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2,
+DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -1008,8 +1004,8 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, 
NvmeIdentify *c)
 
 trace_pci_nvme_identify_ctrl();
 
-return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
-prp1, prp2);
+return nvme_dma_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1,
+prp2, DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
@@ -1028,8 +1024,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeIdentify *c)
 
 ns = &n->namespaces[nsid - 1];
 
-return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
-prp1, prp2);
+return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
+prp2, DMA_DIRECTION_FROM_DEVICE);

[PATCH 08/17] hw/block/nvme: verify validity of prp lists in the cmb

From: Klaus Jensen 

Before this patch the device already supported PRP lists in the CMB, but
it did not check for the validity of it nor announced the support in the
Identify Controller data structure LISTS field.

If some of the PRPs in a PRP list are in the CMB, then ALL entries must
be there. This patch makes sure that requirement is verified as well as
properly announcing support for PRP lists in the CMB.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d236a3cdee54..55b305458152 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -237,6 +237,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
 uint16_t status;
+bool prp_list_in_cmb = false;
 
 trace_pci_nvme_map_prp(nvme_cid(req), trans_len, len, prp1, prp2,
num_prps);
@@ -264,11 +265,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
+
 if (len > n->page_size) {
 uint64_t prp_list[n->max_prp_ents];
 uint32_t nents, prp_trans;
 int i = 0;
 
+if (nvme_addr_is_cmb(n, prp2)) {
+prp_list_in_cmb = true;
+}
+
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
 nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
@@ -282,6 +288,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 goto unmap;
 }
 
+if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
+status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
+goto unmap;
+}
+
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
@@ -301,6 +312,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 if (status) {
 goto unmap;
 }
+
 len -= trans_len;
 i++;
 }
@@ -2097,7 +2109,7 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
 
 NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
 NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
-NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
 NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
 NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
 NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
-- 
2.27.0

[PATCH 10/17] hw/block/nvme: add check for mdts

From: Klaus Jensen 

Add 'mdts' device parameter to control the Maximum Data Transfer Size of
the controller and check that it is respected.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 32 ++--
 hw/block/nvme.h   |  1 +
 hw/block/trace-events |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 7cd37ec91823..d5dff6869b69 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -18,9 +18,10 @@
  * Usage: add options:
  *  -drive file=,if=none,id=
  *  -device nvme,drive=,serial=,id=, \
- *  cmb_size_mb=, \
+ *  [cmb_size_mb=,] \
  *  [pmrdev=,] \
- *  max_ioqpairs=
+ *  [max_ioqpairs=,] \
+ *  [mdts=]
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -518,6 +519,17 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t 
event_type)
 }
 }
 
+static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
+{
+uint8_t mdts = n->params.mdts;
+
+if (mdts && len > n->page_size << mdts) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+return NVME_SUCCESS;
+}
+
 static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
  uint64_t slba, uint32_t nlb)
 {
@@ -611,6 +623,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 
 trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
 
+status = nvme_check_mdts(n, data_size);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
+block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+return status;
+}
+
 status = nvme_check_bounds(n, ns, slba, nlb);
 if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
@@ -904,6 +923,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 uint32_t numdl, numdu;
 uint64_t off, lpol, lpou;
 size_t   len;
+uint16_t status;
 
 numdl = (dw10 >> 16);
 numdu = (dw11 & 0x);
@@ -919,6 +939,12 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 
 trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
 
+status = nvme_check_mdts(n, len);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), len);
+return status;
+}
+
 switch (lid) {
 case NVME_LOG_ERROR_INFO:
 return nvme_error_info(n, cmd, rae, len, off, req);
@@ -2227,6 +2253,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->ieee[0] = 0x00;
 id->ieee[1] = 0x02;
 id->ieee[2] = 0xb3;
+id->mdts = n->params.mdts;
 id->ver = cpu_to_le32(NVME_SPEC_VER);
 id->oacs = cpu_to_le16(0);
 
@@ -2345,6 +2372,7 @@ static Property nvme_props[] = {
 DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
+DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 0169e1736f0c..b5ce51c6e84b 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -11,6 +11,7 @@ typedef struct NvmeParams {
 uint32_t cmb_size_mb;
 uint8_t  aerl;
 uint32_t aer_max_queued;
+uint8_t  mdts;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 6d0cd588c786..5d7d4679650b 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -85,6 +85,7 @@ pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 
 # nvme traces for error conditions
+pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %"PRIu64""
 pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
 pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or 
not page aligned: 0x%"PRIx64""
 pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
0x%"PRIx64""
-- 
2.27.0

[PATCH 07/17] hw/block/nvme: add request mapping helper

From: Klaus Jensen 

Introduce the nvme_map helper to remove some noise in the main nvme_rw
function.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e7b7a1900b0b..d236a3cdee54 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -378,6 +378,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
uint32_t len,
 return status;
 }
 
+static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
+ NvmeRequest *req)
+{
+uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+
+return nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
+}
+
 static void nvme_post_cqes(void *opaque)
 {
 NvmeCQueue *cq = opaque;
@@ -565,8 +574,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
 uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
-uint64_t prp1 = le64_to_cpu(rw->dptr.prp1);
-uint64_t prp2 = le64_to_cpu(rw->dptr.prp2);
 
 uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
@@ -583,7 +590,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 return NVME_LBA_RANGE | NVME_DNR;
 }
 
-if (nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, data_size, req)) {
+if (nvme_map(n, cmd, data_size, req)) {
 block_acct_invalid(blk_get_stats(n->conf.blk), acct);
 return NVME_INVALID_FIELD | NVME_DNR;
 }
-- 
2.27.0

[PATCH 03/17] hw/block/nvme: replace dma_acct with blk_acct equivalent

From: Klaus Jensen 

The QSG isn't always initialized, so accounting could be wrong. Issue a
call to blk_acct_start instead with the size taken from the QSG or IOV
depending on the kind of I/O.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aaf4651eab4c..54f31e7429c6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -585,9 +585,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
 if (req->qsg.nsg > 0) {
 req->has_sg = true;
+block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
+ acct);
 req->aiocb = is_write ?
 dma_blk_write(n->conf.blk, &req->qsg, data_offset, 
BDRV_SECTOR_SIZE,
   nvme_rw_cb, req) :
@@ -595,6 +596,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
  nvme_rw_cb, req);
 } else {
 req->has_sg = false;
+block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
+ acct);
 req->aiocb = is_write ?
 blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
 req) :
-- 
2.27.0

[PATCH 02/17] hw/block/nvme: add mapping helpers

From: Klaus Jensen 

Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
use them in nvme_map_prp.

This fixes a bug where in the case of a CMB transfer, the device would
map to the buffer with a wrong length.

Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 109 +++---
 hw/block/trace-events |   2 +
 2 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 3dbce536456c..aaf4651eab4c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -74,6 +74,11 @@
 
 static void nvme_process_sq(void *opaque);
 
+static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
+{
+return &n->cmbuf[addr - n->ctrl_mem.addr];
+}
+
 static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
 hwaddr low = n->ctrl_mem.addr;
@@ -85,7 +90,7 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
-memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
+memcpy(buf, nvme_addr_to_cmb(n, addr), size);
 return;
 }
 
@@ -168,29 +173,91 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 }
 }
 
+static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
+  size_t len)
+{
+if (!len) {
+return NVME_SUCCESS;
+}
+
+trace_pci_nvme_map_addr_cmb(addr, len);
+
+if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
+return NVME_DATA_TRAS_ERROR;
+}
+
+qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
+
+return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+  hwaddr addr, size_t len)
+{
+if (!len) {
+return NVME_SUCCESS;
+}
+
+trace_pci_nvme_map_addr(addr, len);
+
+if (nvme_addr_is_cmb(n, addr)) {
+if (qsg && qsg->sg) {
+return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+}
+
+assert(iov);
+
+if (!iov->iov) {
+qemu_iovec_init(iov, 1);
+}
+
+return nvme_map_addr_cmb(n, iov, addr, len);
+}
+
+if (iov && iov->iov) {
+return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+}
+
+assert(qsg);
+
+if (!qsg->sg) {
+pci_dma_sglist_init(qsg, &n->parent_obj, 1);
+}
+
+qemu_sglist_add(qsg, addr, len);
+
+return NVME_SUCCESS;
+}
+
 static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
  uint64_t prp2, uint32_t len, NvmeCtrl *n)
 {
 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
+uint16_t status;
 
 if (unlikely(!prp1)) {
 trace_pci_nvme_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
-} else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
-   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
-qsg->nsg = 0;
+}
+
+if (nvme_addr_is_cmb(n, prp1)) {
 qemu_iovec_init(iov, num_prps);
-qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], 
trans_len);
 } else {
 pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
-qemu_sglist_add(qsg, prp1, trans_len);
 }
+
+status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
+if (status) {
+goto unmap;
+}
+
 len -= trans_len;
 if (len) {
 if (unlikely(!prp2)) {
 trace_pci_nvme_err_invalid_prp2_missing();
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
 if (len > n->page_size) {
@@ -207,6 +274,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector 
*iov, uint64_t prp1,
 if (i == n->max_prp_ents - 1 && len > n->page_size) {
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
 
@@ -220,14 +288,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
QEMUIOVector *iov, uint64_t prp1,
 
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
 
 trans_len = MIN(len, n->page_size);
-if (qsg->nsg){
-qemu_sglist_add(qsg, prp_ent, trans_len);
-} else {
-qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - 
n->ctrl_mem.addr], trans_len);
+stat

[PATCH 04/17] hw/block/nvme: remove redundant has_sg member

From: Klaus Jensen 

Remove the has_sg member from NvmeRequest since it's redundant.

Also, make sure the request iov is destroyed at completion time.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 11 ++-
 hw/block/nvme.h |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 54f31e7429c6..ded78a2301a6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -513,16 +513,20 @@ static void nvme_rw_cb(void *opaque, int ret)
 block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
 req->status = NVME_INTERNAL_DEV_ERROR;
 }
-if (req->has_sg) {
+
+if (req->qsg.nalloc) {
 qemu_sglist_destroy(&req->qsg);
 }
+if (req->iov.nalloc) {
+qemu_iovec_destroy(&req->iov);
+}
+
 nvme_enqueue_req_completion(cq, req);
 }
 
 static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 NvmeRequest *req)
 {
-req->has_sg = false;
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
  BLOCK_ACCT_FLUSH);
 req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
@@ -548,7 +552,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace 
*ns, NvmeCmd *cmd,
 return NVME_LBA_RANGE | NVME_DNR;
 }
 
-req->has_sg = false;
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
  BLOCK_ACCT_WRITE);
 req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
@@ -586,7 +589,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 }
 
 if (req->qsg.nsg > 0) {
-req->has_sg = true;
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
  acct);
 req->aiocb = is_write ?
@@ -595,7 +597,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
  nvme_rw_cb, req);
 } else {
-req->has_sg = false;
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
  acct);
 req->aiocb = is_write ?
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 54ec54f491bf..0169e1736f0c 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -22,7 +22,6 @@ typedef struct NvmeRequest {
 struct NvmeSQueue   *sq;
 BlockAIOCB  *aiocb;
 uint16_tstatus;
-boolhas_sg;
 NvmeCqe cqe;
 BlockAcctCookie acct;
 QEMUSGList  qsg;
-- 
2.27.0

[PATCH 06/17] hw/block/nvme: pass request along for tracing

From: Klaus Jensen 

Pass along the NvmeRequest in various functions since it is very useful
for tracing.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 67 +--
 hw/block/trace-events |  1 +
 2 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 86d35547b752..e7b7a1900b0b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -229,14 +229,18 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 return NVME_SUCCESS;
 }
 
-static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
- uint64_t prp2, uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+ uint64_t prp1, uint64_t prp2, uint32_t len,
+ NvmeRequest *req)
 {
 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
 uint16_t status;
 
+trace_pci_nvme_map_prp(nvme_cid(req), trans_len, len, prp1, prp2,
+   num_prps);
+
 if (unlikely(!prp1)) {
 trace_pci_nvme_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
@@ -327,13 +331,14 @@ unmap:
 }
 
 static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- uint64_t prp1, uint64_t prp2, DMADirection dir)
+ uint64_t prp1, uint64_t prp2, DMADirection dir,
+ NvmeRequest *req)
 {
 QEMUSGList qsg;
 QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;
 
-status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
+status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
 if (status) {
 return status;
 }
@@ -578,7 +583,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
NvmeCmd *cmd,
 return NVME_LBA_RANGE | NVME_DNR;
 }
 
-if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
+if (nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, data_size, req)) {
 block_acct_invalid(blk_get_stats(n->conf.blk), acct);
 return NVME_INVALID_FIELD | NVME_DNR;
 }
@@ -802,7 +807,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint8_t rae,
 }
 
 return nvme_dma_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2,
-DMA_DIRECTION_FROM_DEVICE);
+DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
@@ -824,7 +829,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint32_t buf_len,
 trans_len = MIN(sizeof(fw_log) - off, buf_len);
 
 return nvme_dma_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, prp2,
-DMA_DIRECTION_FROM_DEVICE);
+DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
@@ -849,7 +854,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, 
uint8_t rae,
 trans_len = MIN(sizeof(errlog) - off, buf_len);
 
 return nvme_dma_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2,
-DMA_DIRECTION_FROM_DEVICE);
+DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -997,7 +1002,8 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
 return NVME_SUCCESS;
 }
 
-static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
+   NvmeRequest *req)
 {
 uint64_t prp1 = le64_to_cpu(c->prp1);
 uint64_t prp2 = le64_to_cpu(c->prp2);
@@ -1005,10 +1011,11 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, 
NvmeIdentify *c)
 trace_pci_nvme_identify_ctrl();
 
 return nvme_dma_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1,
-prp2, DMA_DIRECTION_FROM_DEVICE);
+prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
+ NvmeRequest *req)
 {
 NvmeNamespace *ns;
 uint32_t nsid = le32_to_cpu(c->nsid);
@@ -1025,10 +1032,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeIdentify *c)
 ns = &n->namespaces[nsid - 1];
 
 return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
-prp2, DMA_DIRECTION_FROM_DEVICE);
+prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
+

[PATCH 00/17] hw/block/nvme: AIO and address mapping refactoring