date:20210118

Re: [RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-18 Thread Minwoo Im

On 21-01-19 07:04:04, Klaus Jensen wrote:
> On Jan 19 12:21, Minwoo Im wrote:
> > On 21-01-18 22:14:45, Klaus Jensen wrote:
> > > On Jan 17 23:53, Minwoo Im wrote:
> > > > Hello,
> > > > 
> > > > This patch series introduces NVMe subsystem device to support multi-path
> > > > I/O in NVMe device model.  Two use-cases are supported along with this
> > > > patch: Multi-controller, Namespace Sharing.
> > > > 
> > > > V1 RFC has been discussed with Klaus and Keith, I really appreciate them
> > > > for this patch series to have proper direction [1].
> > > > 
> > > > This patch series contains few start-up refactoring pathces from the
> > > > first to fifth patches to make nvme-ns device not to rely on the nvme
> > > > controller always.  Because nvme-ns shall be able to be mapped to the
> > > > subsystem level, not a single controller level so that it should provide
> > > > generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
> > > > the first five patches are to remove the NvmeCtrl * instance argument
> > > > from the nvme_ns_setup().  I'd be happy if they are picked!
> > > > 
> > > > For controller and namespace devices, 'subsys' property has been
> > > > introduced to map them to a subsystem.  If multi-controller needed, we
> > > > can specify 'subsys' to controllers the same.
> > > > 
> > > > For namespace deivice, if 'subsys' is not given just like it was, it
> > > > will have to be provided with 'bus' parameter to specify a nvme
> > > > controller device to attach, it means, they are mutual-exlusive.  To
> > > > share a namespace between or among controllers, then nvme-ns should have
> > > > 'subsys' property to a single nvme subsystem instance.  To make a
> > > > namespace private one, then we need to specify 'bus' property rather
> > > > than the 'subsys'.
> > > > 
> > > > Of course, this series does not require any updates for the run command
> > > > for the previos users.
> > > > 
> > > > Plase refer the following example with nvme-cli output:
> > > > 
> > > > QEMU Run:
> > > >   -device nvme-subsys,id=subsys0 \
> > > >   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
> > > >   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
> > > >   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
> > > >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
> > > >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
> > > >   \
> > > >   -device nvme,serial=qux,id=nvme3 \
> > > >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > > > 
> > > > nvme-cli:
> > > >   root@vm:~/work# nvme list -v
> > > >   NVM Express Subsystems
> > > > 
> > > >   SubsystemSubsystem-NQN
> > > > Controllers
> > > >    
> > > > 
> > > >  
> > > >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > > > nvme0, nvme1, nvme2
> > > >   nvme-subsys3 nqn.2019-08.org.qemu:qux 
> > > > nvme3
> > > > 
> > > >   NVM Express Controllers
> > > > 
> > > >   Device   SN   MN  
> > > >  FR   TxPort AddressSubsystemNamespaces
> > > >     
> > > >   -- -- 
> > > >  
> > > >   nvme0foo  QEMU NVMe Ctrl  
> > > >  1.0  pcie   :00:06.0   nvme-subsys1 nvme1n1
> > > >   nvme1bar  QEMU NVMe Ctrl  
> > > >  1.0  pcie   :00:07.0   nvme-subsys1 nvme1n1
> > > >   nvme2baz  QEMU NVMe Ctrl  
> > > >  1.0  pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
> > > >   nvme3qux  QEMU NVMe Ctrl  
> > > >  1.0  pcie   :00:09.0   nvme-subsys3
> > > > 
> > > >   NVM Express Namespaces
> > > > 
> > > >   Device   NSID Usage  Format   
> > > > Controllers
> > > >     --  
> > > > 
> > > >   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   
> > > > nvme0, nvme1, nvme2
> > > >   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   
> > > > nvme2
> > > >   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   
> > > > nvme3
> > > > 
> > > > Summary:
> > > >   - Refactored nvme-ns device not to rely on controller during the
> > > > setup.  [1/11 - 5/11]
> > > >   - Introduced a nvme-subsys device model. [6/11]
> > > >   - Create subsystem NQN based on subsystem. [7/11]
> > > >   - Introduced multi-controller model. [8/11 - 9/11]
> > > >   -

Re: [RFC PATCH V2 02/11] hw/block/nvme: open code for volatile write cache

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> Volatile Write Cache(VWC) feature is set in nvme_ns_setup() in the
> initial time.  This feature is related to block device backed,  but this
> feature is controlled in controller level via Set/Get Features command.
> 
> This patch removed dependency between nvme and nvme-ns to manage the VWC
> flag value.  Also, it open coded the Get Features for VWC to check all
> namespaces attached to the controller, and if false detected, return
> directly false.
> 
> Signed-off-by: Minwoo Im 
> ---
>  hw/block/nvme-ns.c |  4 
>  hw/block/nvme.c| 15 ---
>  hw/block/nvme.h|  1 -
>  3 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> index 32662230130b..c403cd36b6bd 100644
> --- a/hw/block/nvme-ns.c
> +++ b/hw/block/nvme-ns.c
> @@ -89,10 +89,6 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace 
> *ns, Error **errp)
>  return -1;
>  }
>  
> -if (blk_enable_write_cache(ns->blkconf.blk)) {
> -n->features.vwc = 0x1;
> -}
> -
>  return 0;
>  }
>  
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index cf0fe28fe6eb..b2a9c48a9d81 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -3033,6 +3033,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> NvmeRequest *req)
>  NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
>  uint16_t iv;
>  NvmeNamespace *ns;
> +int i;
>  
>  static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
>  [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
> @@ -3108,7 +3109,17 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> NvmeRequest *req)
>  result = ns->features.err_rec;
>  goto out;
>  case NVME_VOLATILE_WRITE_CACHE:
> -result = n->features.vwc;
> +for (i = 1; i <= n->num_namespaces; i++) {
> +ns = nvme_ns(n, i);
> +if (!ns) {
> +continue;
> +}
> +
> +result = blk_enable_write_cache(ns->blkconf.blk);
> +if (!result) {
> +break;
> +}
> +}

I did a small tweak here and changed `if (!result)` to `if (result)`. We
want to report that a volatile write cache is present if ANY of the
namespace backing devices have a write cache.


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-18 Thread Klaus Jensen

On Jan 19 12:21, Minwoo Im wrote:
> On 21-01-18 22:14:45, Klaus Jensen wrote:
> > On Jan 17 23:53, Minwoo Im wrote:
> > > Hello,
> > > 
> > > This patch series introduces NVMe subsystem device to support multi-path
> > > I/O in NVMe device model.  Two use-cases are supported along with this
> > > patch: Multi-controller, Namespace Sharing.
> > > 
> > > V1 RFC has been discussed with Klaus and Keith, I really appreciate them
> > > for this patch series to have proper direction [1].
> > > 
> > > This patch series contains few start-up refactoring pathces from the
> > > first to fifth patches to make nvme-ns device not to rely on the nvme
> > > controller always.  Because nvme-ns shall be able to be mapped to the
> > > subsystem level, not a single controller level so that it should provide
> > > generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
> > > the first five patches are to remove the NvmeCtrl * instance argument
> > > from the nvme_ns_setup().  I'd be happy if they are picked!
> > > 
> > > For controller and namespace devices, 'subsys' property has been
> > > introduced to map them to a subsystem.  If multi-controller needed, we
> > > can specify 'subsys' to controllers the same.
> > > 
> > > For namespace deivice, if 'subsys' is not given just like it was, it
> > > will have to be provided with 'bus' parameter to specify a nvme
> > > controller device to attach, it means, they are mutual-exlusive.  To
> > > share a namespace between or among controllers, then nvme-ns should have
> > > 'subsys' property to a single nvme subsystem instance.  To make a
> > > namespace private one, then we need to specify 'bus' property rather
> > > than the 'subsys'.
> > > 
> > > Of course, this series does not require any updates for the run command
> > > for the previos users.
> > > 
> > > Plase refer the following example with nvme-cli output:
> > > 
> > > QEMU Run:
> > >   -device nvme-subsys,id=subsys0 \
> > >   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
> > >   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
> > >   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
> > >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
> > >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
> > >   \
> > >   -device nvme,serial=qux,id=nvme3 \
> > >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > > 
> > > nvme-cli:
> > >   root@vm:~/work# nvme list -v
> > >   NVM Express Subsystems
> > > 
> > >   SubsystemSubsystem-NQN  
> > >   Controllers
> > >    
> > > 
> > >  
> > >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0   
> > >   nvme0, nvme1, nvme2
> > >   nvme-subsys3 nqn.2019-08.org.qemu:qux   
> > >   nvme3
> > > 
> > >   NVM Express Controllers
> > > 
> > >   Device   SN   MN   
> > > FR   TxPort AddressSubsystemNamespaces
> > >      
> > >  -- --  
> > >   nvme0foo  QEMU NVMe Ctrl   
> > > 1.0  pcie   :00:06.0   nvme-subsys1 nvme1n1
> > >   nvme1bar  QEMU NVMe Ctrl   
> > > 1.0  pcie   :00:07.0   nvme-subsys1 nvme1n1
> > >   nvme2baz  QEMU NVMe Ctrl   
> > > 1.0  pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
> > >   nvme3qux  QEMU NVMe Ctrl   
> > > 1.0  pcie   :00:09.0   nvme-subsys3
> > > 
> > >   NVM Express Namespaces
> > > 
> > >   Device   NSID Usage  Format   
> > > Controllers
> > >     --  
> > > 
> > >   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   
> > > nvme0, nvme1, nvme2
> > >   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
> > >   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
> > > 
> > > Summary:
> > >   - Refactored nvme-ns device not to rely on controller during the
> > > setup.  [1/11 - 5/11]
> > >   - Introduced a nvme-subsys device model. [6/11]
> > >   - Create subsystem NQN based on subsystem. [7/11]
> > >   - Introduced multi-controller model. [8/11 - 9/11]
> > >   - Updated namespace sharing scheme to be based on nvme-subsys
> > > hierarchy. [10/11 - 11/11]
> > > 
> > > Since RFC V1:
> > >   - Updated namespace sharing scheme to be based on nvme-subsys
> > > hierarchy.
> > > 
> > 
> > Great stuff Minwoo. Thanks!
>

Re: Re: [PATCH v4 0/3] support NVMe smart critial warning injection

2021-01-18 Thread Klaus Jensen

On Jan 19 10:05, zhenwei pi wrote:
> On 1/18/21 5:34 PM, Klaus Jensen wrote:
> > On Jan 15 11:26, zhenwei pi wrote:
> > > v3 -> v4:
> > > - Drop "Fix overwritten bar.cap". (Already fixed)
> > > 
> > > - Avoid to enqueue the duplicate event.
> > > 
> > > - Several minor changes for coding style & function/variable name.
> > > 
> > > v2 -> v3:
> > > - Introduce "Persistent Memory Region has become read-only or
> > >unreliable"
> > > 
> > > - Fix overwritten bar.cap
> > > 
> > > - Check smart critical warning value from QOM.
> > > 
> > > - Trigger asynchronous event during smart warning injection.
> > > 
> > > v1 -> v2:
> > > - Suggested by Philippe & Klaus, set/get smart_critical_warning by QMP.
> > > 
> > > v1:
> > > - Add smart_critical_warning for nvme device which can be set by QEMU
> > >command line to emulate hardware error.
> > > 
> > > Zhenwei Pi (3):
> > >block/nvme: introduce bit 5 for critical warning
> > >hw/block/nvme: add smart_critical_warning property
> > >hw/blocl/nvme: trigger async event during injecting smart warning
> > > 
> > >   hw/block/nvme.c  | 91 +++-
> > >   hw/block/nvme.h  |  1 +
> > >   include/block/nvme.h |  3 ++
> > >   3 files changed, 86 insertions(+), 9 deletions(-)
> > > 
> > 
> > This looks pretty good to me.
> > 
> > I think maybe we want to handle the duplicate event stuff more generally
> > from the AER/AEN code, but this does the job.
> > 
> > Tested-by: Klaus Jensen 
> > Reviewed-by: Klaus Jensen 
> > 
> 
> What's the next step I should take? Should I push a new version to implement
> this purpose? From my understanding, before inserting a new event to
> aer_queue, I can parse all the pending aer to find the same event.
> 
> nvme_enqueue_event()
> {
> ...
> 
> QTAILQ_FOREACH_SAFE(event, >aer_queue, entry, next) {
> if ((event->result.event_type == event_type)
> && (event->result.event_info == event_info)
> && (event->result.log_page == log_page))
> return;
> }
> 
> QTAILQ_INSERT_TAIL(>aer_queue, event, entry);
> 
> 
> 
> n->aer_queued++;
> ...
> }
> 

No, I'll pick up your series as is, I'll pick it up for nvme-next later
today if noone complains! :)


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-18 Thread Minwoo Im

On 21-01-18 22:14:45, Klaus Jensen wrote:
> On Jan 17 23:53, Minwoo Im wrote:
> > Hello,
> > 
> > This patch series introduces NVMe subsystem device to support multi-path
> > I/O in NVMe device model.  Two use-cases are supported along with this
> > patch: Multi-controller, Namespace Sharing.
> > 
> > V1 RFC has been discussed with Klaus and Keith, I really appreciate them
> > for this patch series to have proper direction [1].
> > 
> > This patch series contains few start-up refactoring pathces from the
> > first to fifth patches to make nvme-ns device not to rely on the nvme
> > controller always.  Because nvme-ns shall be able to be mapped to the
> > subsystem level, not a single controller level so that it should provide
> > generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
> > the first five patches are to remove the NvmeCtrl * instance argument
> > from the nvme_ns_setup().  I'd be happy if they are picked!
> > 
> > For controller and namespace devices, 'subsys' property has been
> > introduced to map them to a subsystem.  If multi-controller needed, we
> > can specify 'subsys' to controllers the same.
> > 
> > For namespace deivice, if 'subsys' is not given just like it was, it
> > will have to be provided with 'bus' parameter to specify a nvme
> > controller device to attach, it means, they are mutual-exlusive.  To
> > share a namespace between or among controllers, then nvme-ns should have
> > 'subsys' property to a single nvme subsystem instance.  To make a
> > namespace private one, then we need to specify 'bus' property rather
> > than the 'subsys'.
> > 
> > Of course, this series does not require any updates for the run command
> > for the previos users.
> > 
> > Plase refer the following example with nvme-cli output:
> > 
> > QEMU Run:
> >   -device nvme-subsys,id=subsys0 \
> >   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
> >   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
> >   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
> >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
> >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
> >   \
> >   -device nvme,serial=qux,id=nvme3 \
> >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > 
> > nvme-cli:
> >   root@vm:~/work# nvme list -v
> >   NVM Express Subsystems
> > 
> >   SubsystemSubsystem-NQN
> > Controllers
> >    
> > 
> >  
> >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > nvme0, nvme1, nvme2
> >   nvme-subsys3 nqn.2019-08.org.qemu:qux 
> > nvme3
> > 
> >   NVM Express Controllers
> > 
> >   Device   SN   MN   FR 
> >   TxPort AddressSubsystemNamespaces
> >      
> >  -- --  
> >   nvme0foo  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:06.0   nvme-subsys1 nvme1n1
> >   nvme1bar  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:07.0   nvme-subsys1 nvme1n1
> >   nvme2baz  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
> >   nvme3qux  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:09.0   nvme-subsys3
> > 
> >   NVM Express Namespaces
> > 
> >   Device   NSID Usage  Format   
> > Controllers
> >     --  
> > 
> >   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
> > nvme1, nvme2
> >   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
> >   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
> > 
> > Summary:
> >   - Refactored nvme-ns device not to rely on controller during the
> > setup.  [1/11 - 5/11]
> >   - Introduced a nvme-subsys device model. [6/11]
> >   - Create subsystem NQN based on subsystem. [7/11]
> >   - Introduced multi-controller model. [8/11 - 9/11]
> >   - Updated namespace sharing scheme to be based on nvme-subsys
> > hierarchy. [10/11 - 11/11]
> > 
> > Since RFC V1:
> >   - Updated namespace sharing scheme to be based on nvme-subsys
> > hierarchy.
> > 
> 
> Great stuff Minwoo. Thanks!
> 
> I'll pick up [01-05/11] directly since they are pretty trivial.

Thanks! will prepare the next series based on there.

> The subsystem model looks pretty much like it should, I don't have a lot
> of comments.
> 
> One thing that I

Re: [RFC PATCH V2 10/11] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-18 Thread Minwoo Im

> Let's keep convention (or should be convention...) of using NvmeIdNs
> prefix for identify data structure fields on the enum.

Okay!

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im

On 21-01-18 20:23:30, Klaus Jensen wrote:
> On Jan 18 14:22, Klaus Jensen wrote:
> > On Jan 18 22:12, Minwoo Im wrote:
> > > On 21-01-18 14:10:50, Klaus Jensen wrote:
> > > > On Jan 18 22:09, Minwoo Im wrote:
> > > > > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to 
> > > > > > > move onto
> > > > > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this 
> > > > > > > device
> > > > > > > model?
> > > > > > 
> > > > > > Next patch moves to v1.4. I wanted to split it because the "bump" 
> > > > > > patch
> > > > > > also adds a couple of other v1.4 requires fields. But I understand 
> > > > > > that
> > > > > > this is slightly wrong.
> > > > > 
> > > > > Sorry, I meant I'd like to have CMB for v1.3 support along with the 
> > > > > v1.4
> > > > > support in this device model separately.  Maybe I missed the 
> > > > > linux-nvme
> > > > > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > > > > v1.3 in NVMe driver?
> > > > 
> > > > I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> > > > support both the v1.3 and v1.4 behavior :)
> > > 
> > > Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)
> > 
> > My first version of this patch actually included compatibility support
> > for v1.3 ("legacy cmb"), but Keith suggested we just drop that and keep
> > this device compliant.
> > 
> > What we could do is allow the spec version to be chosen with a
> > parameter?
> 
> Uhm. Maybe not. I gave this some more thought.
> 
> Adding a device parameter to choose the specification version requires
> us to maintain QEMU "compat" properties across different QEMU version.
> I'm not sure we want that for something like this.

Agreed.  The default should head for latest one.

> 
> Maybe the best course of action actually *is* an 'x-legacy-cmb'
> parameter.

This looks fine.

As kernel driver does not remove the v1.3 CMB support, then it would be
great if QEMU suports that also.

Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im

On 21-01-18 20:53:24, Klaus Jensen wrote:
> On Jan 18 21:59, Minwoo Im wrote:
> > > The spec requires aligned 32 bit accesses (or the size of the register),
> > > so maybe it's about time we actually ignore instead of falling through.
> > 
> > Agreed.
> > 
> 
> On the other hand.
> 
> The spec just allows undefined behavior if the alignment or size
> requirement is violated. So falling through is not wrong.

If so, maybe we just can make this MMIO region support under 4bytes
access with error messaging.  I don't think we should not support them
on purpose because spec says it just results in undefined behaviors
which is just undefined how to handle them.

Re: Re: [PATCH v4 0/3] support NVMe smart critial warning injection

2021-01-18 Thread zhenwei pi


On 1/18/21 5:34 PM, Klaus Jensen wrote:

On Jan 15 11:26, zhenwei pi wrote:

v3 -> v4:
- Drop "Fix overwritten bar.cap". (Already fixed)

- Avoid to enqueue the duplicate event.

- Several minor changes for coding style & function/variable name.

v2 -> v3:
- Introduce "Persistent Memory Region has become read-only or
   unreliable"

- Fix overwritten bar.cap

- Check smart critical warning value from QOM.

- Trigger asynchronous event during smart warning injection.

v1 -> v2:
- Suggested by Philippe & Klaus, set/get smart_critical_warning by QMP.

v1:
- Add smart_critical_warning for nvme device which can be set by QEMU
   command line to emulate hardware error.

Zhenwei Pi (3):
   block/nvme: introduce bit 5 for critical warning
   hw/block/nvme: add smart_critical_warning property
   hw/blocl/nvme: trigger async event during injecting smart warning

  hw/block/nvme.c  | 91 +++-
  hw/block/nvme.h  |  1 +
  include/block/nvme.h |  3 ++
  3 files changed, 86 insertions(+), 9 deletions(-)



This looks pretty good to me.

I think maybe we want to handle the duplicate event stuff more generally
from the AER/AEN code, but this does the job.

Tested-by: Klaus Jensen 
Reviewed-by: Klaus Jensen 



What's the next step I should take? Should I push a new version to 
implement this purpose? From my understanding, before inserting a new 
event to aer_queue, I can parse all the pending aer to find the same event.


nvme_enqueue_event()
{
...

QTAILQ_FOREACH_SAFE(event, >aer_queue, entry, next) {
if ((event->result.event_type == event_type)
&& (event->result.event_info == event_info)
&& (event->result.log_page == log_page))
return;
}

QTAILQ_INSERT_TAIL(>aer_queue, event, entry); 




n->aer_queued++;
...
}

--
zhenwei pi

Re: [PATCH 7/7] block/rbd: change request alignment to 1 byte

2021-01-18 Thread Jason Dillaman

On Fri, Jan 15, 2021 at 10:39 AM Peter Lieven  wrote:
>
> Am 15.01.21 um 16:27 schrieb Jason Dillaman:
> > On Thu, Jan 14, 2021 at 2:59 PM Peter Lieven  wrote:
> >> Am 14.01.21 um 20:19 schrieb Jason Dillaman:
> >>> On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven  wrote:
>  since we implement byte interfaces and librbd supports aio on byte 
>  granularity we can lift
>  the 512 byte alignment.
> 
>  Signed-off-by: Peter Lieven 
>  ---
>   block/rbd.c | 2 --
>   1 file changed, 2 deletions(-)
> 
>  diff --git a/block/rbd.c b/block/rbd.c
>  index 27b4404adf..8673e8f553 100644
>  --- a/block/rbd.c
>  +++ b/block/rbd.c
>  @@ -223,8 +223,6 @@ done:
>   static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
>   {
>   BDRVRBDState *s = bs->opaque;
>  -/* XXX Does RBD support AIO on less than 512-byte alignment? */
>  -bs->bl.request_alignment = 512;
> >>> Just a suggestion, but perhaps improve discard alignment, max discard,
> >>> optimal alignment (if that's something QEMU handles internally) if not
> >>> overridden by the user.
> >>
> >> Qemu supports max_discard and discard_alignment. Is there a call to get 
> >> these limits
> >>
> >> from librbd?
> >>
> >>
> >> What do you mean by optimal_alignment? The object size?
> > krbd does a good job of initializing defaults [1] where optimal and
> > discard alignment is 64KiB (can actually be 4KiB now), max IO size for
> > writes, discards, and write-zeroes is the object size * the stripe
> > count.
>
>
> Okay, I will have a look at it. If qemu issues a write, discard, write_zero 
> greater than
>
> obj_size  * stripe count will librbd split it internally or will the request 
> fail?

librbd will handle it as needed. My goal is really just to get the
hints down the guest OS.

> Regarding the alignment it seems that rbd_dev->opts->alloc_size is something 
> that comes from the device
>
> configuration and not from rbd? I don't have that information inside the Qemu 
> RBD driver.

librbd doesn't really have the information either. The 64KiB guess
that krbd uses was a compromise since that was the default OSD
allocation size for HDDs since Luminous. Starting with Pacific that
default is going down to 4KiB.

>
> Peter
>
>


-- 
Jason

Re: [RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> Hello,
> 
> This patch series introduces NVMe subsystem device to support multi-path
> I/O in NVMe device model.  Two use-cases are supported along with this
> patch: Multi-controller, Namespace Sharing.
> 
> V1 RFC has been discussed with Klaus and Keith, I really appreciate them
> for this patch series to have proper direction [1].
> 
> This patch series contains few start-up refactoring pathces from the
> first to fifth patches to make nvme-ns device not to rely on the nvme
> controller always.  Because nvme-ns shall be able to be mapped to the
> subsystem level, not a single controller level so that it should provide
> generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
> the first five patches are to remove the NvmeCtrl * instance argument
> from the nvme_ns_setup().  I'd be happy if they are picked!
> 
> For controller and namespace devices, 'subsys' property has been
> introduced to map them to a subsystem.  If multi-controller needed, we
> can specify 'subsys' to controllers the same.
> 
> For namespace deivice, if 'subsys' is not given just like it was, it
> will have to be provided with 'bus' parameter to specify a nvme
> controller device to attach, it means, they are mutual-exlusive.  To
> share a namespace between or among controllers, then nvme-ns should have
> 'subsys' property to a single nvme subsystem instance.  To make a
> namespace private one, then we need to specify 'bus' property rather
> than the 'subsys'.
> 
> Of course, this series does not require any updates for the run command
> for the previos users.
> 
> Plase refer the following example with nvme-cli output:
> 
> QEMU Run:
>   -device nvme-subsys,id=subsys0 \
>   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
>   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
>   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
>   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
>   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
>   \
>   -device nvme,serial=qux,id=nvme3 \
>   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> 
> nvme-cli:
>   root@vm:~/work# nvme list -v
>   NVM Express Subsystems
> 
>   SubsystemSubsystem-NQN  
>   Controllers
>    
> 
>  
>   nvme-subsys1 nqn.2019-08.org.qemu:subsys0   
>   nvme0, nvme1, nvme2
>   nvme-subsys3 nqn.2019-08.org.qemu:qux   
>   nvme3
> 
>   NVM Express Controllers
> 
>   Device   SN   MN   FR   
> TxPort AddressSubsystemNamespaces
>      
>  -- --  
>   nvme0foo  QEMU NVMe Ctrl   1.0  
> pcie   :00:06.0   nvme-subsys1 nvme1n1
>   nvme1bar  QEMU NVMe Ctrl   1.0  
> pcie   :00:07.0   nvme-subsys1 nvme1n1
>   nvme2baz  QEMU NVMe Ctrl   1.0  
> pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
>   nvme3qux  QEMU NVMe Ctrl   1.0  
> pcie   :00:09.0   nvme-subsys3
> 
>   NVM Express Namespaces
> 
>   Device   NSID Usage  Format   
> Controllers
>     --  
> 
>   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
> nvme1, nvme2
>   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
>   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
> 
> Summary:
>   - Refactored nvme-ns device not to rely on controller during the
> setup.  [1/11 - 5/11]
>   - Introduced a nvme-subsys device model. [6/11]
>   - Create subsystem NQN based on subsystem. [7/11]
>   - Introduced multi-controller model. [8/11 - 9/11]
>   - Updated namespace sharing scheme to be based on nvme-subsys
> hierarchy. [10/11 - 11/11]
> 
> Since RFC V1:
>   - Updated namespace sharing scheme to be based on nvme-subsys
> hierarchy.
> 

Great stuff Minwoo. Thanks!

I'll pick up [01-05/11] directly since they are pretty trivial.

The subsystem model looks pretty much like it should, I don't have a lot
of comments.

One thing that I considered, is if we should reverse the "registration"
and think about it as namespace attachment. The spec is about
controllers attaching to namespaces, not the other way around.
Basically, let the namespaces be configured first and register on the
subsystem (accumulating in a "namespaces" array), then have the

Re: [RFC PATCH V2 10/11] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC)
> field to support shared namespace from controller(s).
> 
> This field is in Identify Namespace data structure in [30].
> 
> Signed-off-by: Minwoo Im 
> ---
>  include/block/nvme.h | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index e83ec1e124c6..dd7b5ba9ef4c 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -1110,6 +1110,10 @@ enum NvmeNsIdentifierType {
>  NVME_NIDT_CSI   = 0x04,
>  };
>  
> +enum NvmeNsNmic {
> +NVME_NMIC_NS_SHARED = 1 << 0,
> +};
> +

Let's keep convention (or should be convention...) of using NvmeIdNs
prefix for identify data structure fields on the enum.


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 05/11] hw/block/nvme: remove unused argument in nvme_ns_setup

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> nvme_ns_setup() finally does not have nothing to do with NvmeCtrl
> instance.
> 
> Signed-off-by: Minwoo Im 

I also think this *could* be squashed into [04/11] without too much
strain on the eye, but

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 04/11] hw/block/nvme: split setup and register for namespace

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> In NVMe, namespace is being attached to process I/O.  We register NVMe
> namespace to a controller via nvme_register_namespace() during
> nvme_ns_setup().  This is main reason of receiving NvmeCtrl object
> instance to this function to map the namespace to a controller.
> 
> To make namespace instance more independent, it should be split into two
> parts: setup and register.  This patch split them into two differnt
> parts, and finally nvme_ns_setup() does not have nothing to do with
> NvmeCtrl instance at all.
> 
> This patch is a former patch to introduce NVMe subsystem scheme to the
> existing design especially for multi-path.  In that case, it should be
> split into two to make namespace independent from a controller.
> 
> Signed-off-by: Minwoo Im 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 02/11] hw/block/nvme: open code for volatile write cache

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> Volatile Write Cache(VWC) feature is set in nvme_ns_setup() in the
> initial time.  This feature is related to block device backed,  but this
> feature is controlled in controller level via Set/Get Features command.
> 
> This patch removed dependency between nvme and nvme-ns to manage the VWC
> flag value.  Also, it open coded the Get Features for VWC to check all
> namespaces attached to the controller, and if false detected, return
> directly false.
> 
> Signed-off-by: Minwoo Im 

The VWC feature really should be namespace specific. I wonder why they
didn't fix that when they added an NSID to the Flush command...

Anyway, this is much better.

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 03/11] hw/block/nvme: remove unused argument in nvme_ns_init_blk

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> Removed no longer used aregument NvmeCtrl object in nvme_ns_init_blk().
> 
> Signed-off-by: Minwoo Im 

I don't think it's too unwieldy for this to be squashed into [02/11],
but

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [RFC PATCH V2 01/11] hw/block/nvme: remove unused argument in nvme_ns_init_zoned

2021-01-18 Thread Klaus Jensen

On Jan 17 23:53, Minwoo Im wrote:
> nvme_ns_init_zoned() has no use for given NvmeCtrl object.
> 
> Signed-off-by: Minwoo Im 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Klaus Jensen

On Jan 18 21:59, Minwoo Im wrote:
> > The spec requires aligned 32 bit accesses (or the size of the register),
> > so maybe it's about time we actually ignore instead of falling through.
> 
> Agreed.
> 

On the other hand.

The spec just allows undefined behavior if the alignment or size
requirement is violated. So falling through is not wrong.

Keith, any thoughts on this?

signature.asc
Description: PGP signature

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Klaus Jensen

On Jan 18 14:22, Klaus Jensen wrote:
> On Jan 18 22:12, Minwoo Im wrote:
> > On 21-01-18 14:10:50, Klaus Jensen wrote:
> > > On Jan 18 22:09, Minwoo Im wrote:
> > > > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move 
> > > > > > onto
> > > > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this 
> > > > > > device
> > > > > > model?
> > > > > 
> > > > > Next patch moves to v1.4. I wanted to split it because the "bump" 
> > > > > patch
> > > > > also adds a couple of other v1.4 requires fields. But I understand 
> > > > > that
> > > > > this is slightly wrong.
> > > > 
> > > > Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
> > > > support in this device model separately.  Maybe I missed the linux-nvme
> > > > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > > > v1.3 in NVMe driver?
> > > 
> > > I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> > > support both the v1.3 and v1.4 behavior :)
> > 
> > Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)
> 
> My first version of this patch actually included compatibility support
> for v1.3 ("legacy cmb"), but Keith suggested we just drop that and keep
> this device compliant.
> 
> What we could do is allow the spec version to be chosen with a
> parameter?

Uhm. Maybe not. I gave this some more thought.

Adding a device parameter to choose the specification version requires
us to maintain QEMU "compat" properties across different QEMU version.
I'm not sure we want that for something like this.

Maybe the best course of action actually *is* an 'x-legacy-cmb'
parameter.


signature.asc
Description: PGP signature

Re: [PATCH v2 03/36] block: bdrv_append(): don't consume reference

2021-01-18 Thread Kevin Wolf

Am 18.01.2021 um 18:21 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 18.01.2021 17:18, Kevin Wolf wrote:
> > Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> > > We have too much comments for this feature. It seems better just don't
> > > do it. Most of real users (tests don't count) have to create additional
> > > reference.
> > > 
> > > Drop also comment in external_snapshot_prepare:
> > >   - bdrv_append doesn't "remove" old bs in common sense, it sounds
> > > strange
> > >   - the fact that bdrv_append can fail is obvious from the context
> > >   - the fact that we must rollback all changes in transaction abort is
> > > known (it's the direct role of abort)
> > > 
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy 

> > > diff --git a/blockdev.c b/blockdev.c
> > > index b5f11c524b..96c96f8ba6 100644
> > > --- a/blockdev.c
> > > +++ b/blockdev.c
> > > @@ -1587,10 +1587,6 @@ static void 
> > > external_snapshot_prepare(BlkActionState *common,
> > >   goto out;
> > >   }
> > > -/* This removes our old bs and adds the new bs. This is an operation 
> > > that
> > > - * can fail, so we need to do it in .prepare; undoing it for abort is
> > > - * always possible. */
> > 
> > This comment is still relevant, it's unrelated to the bdrv_ref().
> 
> I described in commit message, why I dislike this comment :) I can
> keep it of course, it's not critical

Ah, right, I missed this bit in the commit message (or forgot it until I
reached this hunk) and thought it was an accidental removal.

If it's intentional, no reason to change the patch.

Kevin

Re: [PATCH v2 02/36] tests/test-bdrv-graph-mod: add test_parallel_perm_update

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 17:05, Kevin Wolf wrote:

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:

Add test to show that simple DFS recursion order is not correct for
permission update. Correct order is topological-sort order, which will
be introduced later.

Consider the block driver which has two filter children: one active
with exclusive write access and one inactive with no specific
permissions.

And, these two children has a common base child, like this:

┌─┐ ┌──┐
│ fl2 │ ◀── │ top  │
└─┘ └──┘
   │   │
   │   │ w
   │   ▼
   │ ┌──┐
   │ │ fl1  │
   │ └──┘
   │   │
   │   │ w
   │   ▼
   │ ┌──┐
   └───▶ │ base │
 └──┘

So, exclusive write is propagated.

Assume, we want to make fl2 active instead of fl1.
So, we set some option for top driver and do permission update.

If permission update (remember, it's DFS) goes first through
top->fl1->base branch it will succeed: it firstly drop exclusive write
permissions and than apply them for another BdrvChildren.
But if permission update goes first through top->fl2->base branch it
will fail, as when we try to update fl2->base child, old not yet
updated fl1->base child will be in conflict.

Now test fails, so it runs only with -d flag. To run do

   ./test-bdrv-graph-mod -d -p /bdrv-graph-mod/parallel-perm-update

from /tests.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/test-bdrv-graph-mod.c | 64 +
  1 file changed, 64 insertions(+)

diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c
index 3b9e6f242f..27e3361a60 100644
--- a/tests/test-bdrv-graph-mod.c
+++ b/tests/test-bdrv-graph-mod.c
@@ -232,6 +232,68 @@ static void test_parallel_exclusive_write(void)
  bdrv_unref(top);
  }
  
+static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,

+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+if (bs->file && c == bs->file) {
+*nperm = BLK_PERM_WRITE;
+*nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
+} else {
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+}
+}
+
+static BlockDriver bdrv_write_to_file = {
+.format_name = "tricky-perm",
+.bdrv_child_perm = write_to_file_perms,
+};
+
+static void test_parallel_perm_update(void)
+{
+BlockDriverState *top = no_perm_node("top");
+BlockDriverState *tricky =
+bdrv_new_open_driver(_write_to_file, "tricky", BDRV_O_RDWR,
+ _abort);
+BlockDriverState *base = no_perm_node("base");
+BlockDriverState *fl1 = pass_through_node("fl1");
+BlockDriverState *fl2 = pass_through_node("fl2");
+BdrvChild *c_fl1, *c_fl2;
+
+bdrv_attach_child(top, tricky, "file", _of_bds, BDRV_CHILD_DATA,
+  _abort);
+c_fl1 = bdrv_attach_child(tricky, fl1, "first", _of_bds,
+  BDRV_CHILD_FILTERED, _abort);
+c_fl2 = bdrv_attach_child(tricky, fl2, "second", _of_bds,
+  BDRV_CHILD_FILTERED, _abort);
+bdrv_attach_child(fl1, base, "backing", _of_bds, BDRV_CHILD_FILTERED,
+  _abort);
+bdrv_attach_child(fl2, base, "backing", _of_bds, BDRV_CHILD_FILTERED,
+  _abort);
+bdrv_ref(base);
+
+/* Select fl1 as first child to be active */
+tricky->file = c_fl1;
+bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
+
+assert(c_fl1->perm & BLK_PERM_WRITE);
+
+/* Now, try to switch active child and update permissions */
+tricky->file = c_fl2;
+bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
+
+assert(c_fl2->perm & BLK_PERM_WRITE);
+
+/* Switch once more, to not care about real child order in the list */
+tricky->file = c_fl1;
+bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
+
+assert(c_fl1->perm & BLK_PERM_WRITE);


Should we also assert in each case that the we don't hole the write
permission for the inactive child?



Won't hurt, will add

--
Best regards,
Vladimir

Re: [PATCH v2 10/36] util: add transactions.c

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 19:50, Kevin Wolf wrote:

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:

Add simple transaction API to use in further update of block graph
operations.

Supposed usage is:

- "prepare" is main function of the action and it should make the main
   effect of the action to be visible for the following actions, keeping
   possibility of roll-back, saving necessary things in action state,
   which is prepended to the list. So, driver struct doesn't include
   "prepare" field, as it is supposed to be called directly.


So the convention is that tran_prepend() should be called by the
function that does the preparation?


yes.


Or would we call tran_prepend() and
do the actual action in different places?


- commit/rollback is supposed to be called for the list of action
   states, to commit/rollback all the actions in reverse order

- When possible "commit" should not make visible effect for other
   actions, which make possible transparent logical interaction between
   actions.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/qemu/transactions.h | 46 +
  util/transactions.c | 81 +
  util/meson.build|  1 +
  3 files changed, 128 insertions(+)
  create mode 100644 include/qemu/transactions.h
  create mode 100644 util/transactions.c

diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
new file mode 100644
index 00..a5b15f46ab
--- /dev/null
+++ b/include/qemu/transactions.h
@@ -0,0 +1,46 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Vladimir Sementsov-Ogievskiy 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef QEMU_TRANSACTIONS_H
+#define QEMU_TRANSACTIONS_H
+
+#include 
+
+typedef struct TransactionActionDrv {
+void (*abort)(void *opeque);
+void (*commit)(void *opeque);
+void (*clean)(void *opeque);
+} TransactionActionDrv;


s/opeque/opaque/


+void tran_prepend(GSList **list, TransactionActionDrv *drv, void *opaque);
+void tran_abort(GSList *backup);
+void tran_commit(GSList *backup);


I'd add an empty line before a full function definition.


+static inline void tran_finalize(GSList *backup, int ret)
+{
+if (ret < 0) {
+tran_abort(backup);
+} else {
+tran_commit(backup);
+}
+}


Let's use an opaque struct instead of GSList here and...


+#endif /* QEMU_TRANSACTIONS_H */
diff --git a/util/transactions.c b/util/transactions.c
new file mode 100644
index 00..ef1b9a36a4
--- /dev/null
+++ b/util/transactions.c
@@ -0,0 +1,81 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/transactions.h"
+
+typedef struct BdrvAction {
+TransactionActionDrv *drv;
+void *opaque;
+} BdrvAction;


...add a QSLIST_ENTRY (or similar) here to make it a type-safe list.

The missing type safety of GSList means that we should avoid it whenever
it's easily possible (i.e. we know the number of lists in which an
element will be). Here, each BdrvAction will only be in a single list,
so typed lists should be simple enough.



OK


--
Best regards,
Vladimir

[RFC PATCH 1/2] scsi/utils: Add INVALID_PARAM_VALUE sense code definition

2021-01-18 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/scsi/utils.h | 2 ++
 scsi/utils.c | 5 +
 2 files changed, 7 insertions(+)

diff --git a/include/scsi/utils.h b/include/scsi/utils.h
index fbc55882799..096489c6cd1 100644
--- a/include/scsi/utils.h
+++ b/include/scsi/utils.h
@@ -57,6 +57,8 @@ extern const struct SCSISense sense_code_LBA_OUT_OF_RANGE;
 extern const struct SCSISense sense_code_INVALID_FIELD;
 /* Illegal request, Invalid field in parameter list */
 extern const struct SCSISense sense_code_INVALID_PARAM;
+/* Illegal request, Invalid value in parameter list */
+extern const struct SCSISense sense_code_INVALID_PARAM_VALUE;
 /* Illegal request, Parameter list length error */
 extern const struct SCSISense sense_code_INVALID_PARAM_LEN;
 /* Illegal request, LUN not supported */
diff --git a/scsi/utils.c b/scsi/utils.c
index b37c2830148..793c3a6b9c9 100644
--- a/scsi/utils.c
+++ b/scsi/utils.c
@@ -197,6 +197,11 @@ const struct SCSISense sense_code_INVALID_PARAM = {
 .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x00
 };
 
+/* Illegal request, Invalid value in parameter list */
+const struct SCSISense sense_code_INVALID_PARAM_VALUE = {
+.key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x01
+};
+
 /* Illegal request, Parameter list length error */
 const struct SCSISense sense_code_INVALID_PARAM_LEN = {
 .key = ILLEGAL_REQUEST, .asc = 0x1a, .ascq = 0x00
-- 
2.26.2

Re: [PATCH v2 06/36] block: BdrvChildClass: add .get_parent_aio_context handler

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 18:13, Kevin Wolf wrote:

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:

Add new handler to get aio context and implement it in all child
classes. Add corresponding public interface to be used soon.

Signed-off-by: Vladimir Sementsov-Ogievskiy 


Hm, are you going to introduce cases where parent and child context
don't match, or why is this a useful function?

Even if so, I feel it shouldn't be any of the child's business what
AioContext the parent uses.

Well, maybe the rest of the series will answer this.



It's for the following patch, to not pass parent (as opaque) with it's class, 
and with its ctx in separate. Just to simplify the interface of the function, 
we are going to work with a lot.

Hm. I'll take this opportunity to say, that the terminology that calls graph edge 
"BdrvChild" always leads to the mess between parents and children.. 
"child_class" is a class of parent.. list of parents is list of children... It all would 
be a lot simpler to understand if BdrvChild was BdrvEdge or something like this.

--
Best regards,
Vladimir

Re: [PATCH v4 00/23] backup performance: block_status + async

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 18:07, Max Reitz wrote:

On 16.01.21 22:46, Vladimir Sementsov-Ogievskiy wrote:

Hi Max!
I applied my series onto yours 129-fixing and found, that 129 fails for backup.
And setting small max-chunk and even max-workers to 1 doesn't help! (setting
speed like in v3 still helps).

And I found, that the problem is that really, the whole backup job goes during
drain, because in new architecture we do just job_yield() during the whole
background block-copy.


OK, so as it was in v3, the job was drained, but since it was already yielding 
while block-copy was running in the background, nothing happened; the 
block-copy completed and only then was the job woken (and then there was no 
reason to pause, because it was done already).

So now the job is entered on drain, too (not only user pauses), which means 
that it gets a chance to pause background requests.

In backup’s case, that means invoking job_yield() again, which sets a 
job_pause_point(), which will cancel the block-copy.  Once the job is unpaused 
(re-entered by job_resume()), backup sees block-copy is cancelled (and 
finished), leaves the loop, and retries with a new block-copy call.

I think I got it now.


So all that’s left is issuing a thanks to you – thanks! – and announcing that 
I’ve applied this series to my block branch (with s/not unsupported/not 
supported/ in patch 23):

https://git.xanclic.moe/XanClic/qemu/commits/branch/block



Great! Thanks!


--
Best regards,
Vladimir

Re: [PATCH v2 05/36] block: add bdrv_parent_try_set_aio_context

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 18:08, Kevin Wolf wrote:

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:

We already have bdrv_parent_can_set_aio_context(). Add corresponding
bdrv_parent_set_aio_context_ignore() and
bdrv_parent_try_set_aio_context() and use them instead of open-coding.

Make bdrv_parent_try_set_aio_context() public, as it will be used in
further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block.h |  2 ++
  block.c   | 51 +--
  2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index ee3f5a6cca..550c5a7513 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -686,6 +686,8 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, 
AioContext *ctx,
  GSList **ignore, Error **errp);
  bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
+int bdrv_parent_try_set_aio_context(BdrvChild *c, AioContext *ctx,
+Error **errp);
  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
  
diff --git a/block.c b/block.c

index 916087ee1a..5d925c208d 100644
--- a/block.c
+++ b/block.c
@@ -81,6 +81,9 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 BdrvChildRole child_role,
 Error **errp);
  
+static void bdrv_parent_set_aio_context_ignore(BdrvChild *c, AioContext *ctx,

+   GSList **ignore);
+
  /* If non-zero, use only whitelisted block drivers */
  static int use_bdrv_whitelist;
  
@@ -2655,17 +2658,12 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,

   * try moving the parent into the AioContext of child_bs instead. */
  if (bdrv_get_aio_context(child_bs) != ctx) {
  ret = bdrv_try_set_aio_context(child_bs, ctx, _err);
-if (ret < 0 && child_class->can_set_aio_ctx) {
-GSList *ignore = g_slist_prepend(NULL, child);
-ctx = bdrv_get_aio_context(child_bs);


You are losing this line...


-if (child_class->can_set_aio_ctx(child, ctx, , NULL)) {
-error_free(local_err);
+if (ret < 0) {
+if (bdrv_parent_try_set_aio_context(child, ctx, NULL) == 0) {


...before this one, so I think the wrong context is passed now. Instead
of trying to move the parent to the AioContext of the child, we'll try
to move it to the AioContext in which it already is (and which doesn't
match the AioContext of the child).



Oops, right, will fix




  ret = 0;
-g_slist_free(ignore);
-ignore = g_slist_prepend(NULL, child);
-child_class->set_aio_ctx(child, ctx, );
+error_free(local_err);
+local_err = NULL;
  }
-g_slist_free(ignore);
  }
  if (ret < 0) {
  error_propagate(errp, local_err);
@@ -6452,9 +6450,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
  if (g_slist_find(*ignore, child)) {
  continue;
  }
-assert(child->klass->set_aio_ctx);
-*ignore = g_slist_prepend(*ignore, child);
-child->klass->set_aio_ctx(child, new_context, ignore);
+bdrv_parent_set_aio_context_ignore(child, new_context, ignore);
  }
  
  bdrv_detach_aio_context(bs);

@@ -6511,6 +6507,37 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild 
*c, AioContext *ctx,
  return true;
  }
  
+static void bdrv_parent_set_aio_context_ignore(BdrvChild *c, AioContext *ctx,

+   GSList **ignore)
+{
+if (g_slist_find(*ignore, c)) {
+return;
+}
+*ignore = g_slist_prepend(*ignore, c);
+
+assert(c->klass->set_aio_ctx);
+c->klass->set_aio_ctx(c, ctx, ignore);
+}
+
+int bdrv_parent_try_set_aio_context(BdrvChild *c, AioContext *ctx,
+Error **errp)
+{
+GSList *ignore = NULL;
+
+if (!bdrv_parent_can_set_aio_context(c, ctx, , errp)) {
+g_slist_free(ignore);
+return -EPERM;
+}
+
+g_slist_free(ignore);
+ignore = NULL;
+
+bdrv_parent_set_aio_context_ignore(c, ctx, );
+g_slist_free(ignore);
+
+return 0;
+}
+
  bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
  GSList **ignore, Error **errp)
  {
--
2.21.3






--
Best regards,
Vladimir

Re: [PATCH v2 10/36] util: add transactions.c

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Add simple transaction API to use in further update of block graph
> operations.
> 
> Supposed usage is:
> 
> - "prepare" is main function of the action and it should make the main
>   effect of the action to be visible for the following actions, keeping
>   possibility of roll-back, saving necessary things in action state,
>   which is prepended to the list. So, driver struct doesn't include
>   "prepare" field, as it is supposed to be called directly.

So the convention is that tran_prepend() should be called by the
function that does the preparation? Or would we call tran_prepend() and
do the actual action in different places?

> - commit/rollback is supposed to be called for the list of action
>   states, to commit/rollback all the actions in reverse order
> 
> - When possible "commit" should not make visible effect for other
>   actions, which make possible transparent logical interaction between
>   actions.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/qemu/transactions.h | 46 +
>  util/transactions.c | 81 +
>  util/meson.build|  1 +
>  3 files changed, 128 insertions(+)
>  create mode 100644 include/qemu/transactions.h
>  create mode 100644 util/transactions.c
> 
> diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
> new file mode 100644
> index 00..a5b15f46ab
> --- /dev/null
> +++ b/include/qemu/transactions.h
> @@ -0,0 +1,46 @@
> +/*
> + * Simple transactions API
> + *
> + * Copyright (c) 2020 Virtuozzo International GmbH.
> + *
> + * Author:
> + *  Vladimir Sementsov-Ogievskiy 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see .
> + */
> +
> +#ifndef QEMU_TRANSACTIONS_H
> +#define QEMU_TRANSACTIONS_H
> +
> +#include 
> +
> +typedef struct TransactionActionDrv {
> +void (*abort)(void *opeque);
> +void (*commit)(void *opeque);
> +void (*clean)(void *opeque);
> +} TransactionActionDrv;

s/opeque/opaque/

> +void tran_prepend(GSList **list, TransactionActionDrv *drv, void *opaque);
> +void tran_abort(GSList *backup);
> +void tran_commit(GSList *backup);

I'd add an empty line before a full function definition.

> +static inline void tran_finalize(GSList *backup, int ret)
> +{
> +if (ret < 0) {
> +tran_abort(backup);
> +} else {
> +tran_commit(backup);
> +}
> +}

Let's use an opaque struct instead of GSList here and...

> +#endif /* QEMU_TRANSACTIONS_H */
> diff --git a/util/transactions.c b/util/transactions.c
> new file mode 100644
> index 00..ef1b9a36a4
> --- /dev/null
> +++ b/util/transactions.c
> @@ -0,0 +1,81 @@
> +/*
> + * Simple transactions API
> + *
> + * Copyright (c) 2020 Virtuozzo International GmbH.
> + *
> + * Author:
> + *  Sementsov-Ogievskiy Vladimir 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see .
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include "qemu/transactions.h"
> +
> +typedef struct BdrvAction {
> +TransactionActionDrv *drv;
> +void *opaque;
> +} BdrvAction;

...add a QSLIST_ENTRY (or similar) here to make it a type-safe list.

The missing type safety of GSList means that we should avoid it whenever
it's easily possible (i.e. we know the number of lists in which an
element will be). Here, each BdrvAction will only be in a single list,
so typed lists should be simple enough.

Kevin

[RFC PATCH 0/2] hw/usb/dev-uas: Fix Clang 11 -Wgnu-variable-sized-type-not-at-end error

2021-01-18 Thread Philippe Mathieu-Daudé

Another attempt to fix the following Clang 11 warning:

  usb/dev-uas.c:157:31: error: field 'status' with variable sized type 'uas_i=
u' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-vari=
able-sized-type-not-at-end]
  uas_iustatus;
^
If a guest send a packet with additional data, respond
with "Illegal Request - parameter not supported".

Philippe Mathieu-Daud=C3=A9 (2):
  scsi/utils: Add INVALID_PARAM_VALUE sense code definition
  hw/usb/dev-uas: Report command additional adb length as unsupported

 include/scsi/utils.h |  2 ++
 hw/usb/dev-uas.c | 12 +++-
 scsi/utils.c |  5 +
 3 files changed, 18 insertions(+), 1 deletion(-)

--=20
2.26.2

Re: [PATCH v2 03/36] block: bdrv_append(): don't consume reference

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 17:18, Kevin Wolf wrote:

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:

We have too much comments for this feature. It seems better just don't
do it. Most of real users (tests don't count) have to create additional
reference.

Drop also comment in external_snapshot_prepare:
  - bdrv_append doesn't "remove" old bs in common sense, it sounds
strange
  - the fact that bdrv_append can fail is obvious from the context
  - the fact that we must rollback all changes in transaction abort is
known (it's the direct role of abort)

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block.c | 19 +++
  block/backup-top.c  |  1 -
  block/commit.c  |  1 +
  block/mirror.c  |  3 ---
  blockdev.c  |  4 
  tests/test-bdrv-drain.c |  2 +-
  tests/test-bdrv-graph-mod.c |  2 ++
  7 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/block.c b/block.c
index 0dd28f0902..55efef3c9d 100644
--- a/block.c
+++ b/block.c
@@ -3145,11 +3145,6 @@ static BlockDriverState 
*bdrv_append_temp_snapshot(BlockDriverState *bs,
  goto out;
  }
  
-/* bdrv_append() consumes a strong reference to bs_snapshot

- * (i.e. it will call bdrv_unref() on it) even on error, so in
- * order to be able to return one, we have to increase
- * bs_snapshot's refcount here */
-bdrv_ref(bs_snapshot);
  bdrv_append(bs_snapshot, bs, _err);
  if (local_err) {
  error_propagate(errp, local_err);
@@ -4608,10 +4603,8 @@ void bdrv_replace_node(BlockDriverState *from, 
BlockDriverState *to,
   *
   * This function does not create any image files.
   *
- * bdrv_append() takes ownership of a bs_new reference and unrefs it because
- * that's what the callers commonly need. bs_new will be referenced by the old
- * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
- * reference of its own, it must call bdrv_ref().
+ * Recent update: bdrv_append does NOT eat bs_new reference for now. Drop this
+ * comment several moths later.


A comment like this is unusual. Do you think there is a high risk of
somebody introducing a new bdrv_append() in parallel and that they would
read this comment when rebasing their existing patches?


Or even later, someone may remember that bdrv_append() eat the reference, and 
then face some strange behavior with a new call of bdrv_append(), finally go to 
check the function code and see the new comment.. I don't insist, we can drop 
the comment



If we do keep the comment: s/for now/now/ (it has recently changed,
we're not intending to change it later) and s/moths/months/.


   */
  void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
   Error **errp)
@@ -4621,20 +4614,14 @@ void bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
  bdrv_set_backing_hd(bs_new, bs_top, _err);
  if (local_err) {
  error_propagate(errp, local_err);
-goto out;
+return;
  }
  
  bdrv_replace_node(bs_top, bs_new, _err);

  if (local_err) {
  error_propagate(errp, local_err);
  bdrv_set_backing_hd(bs_new, NULL, _abort);
-goto out;


Can we leave a return here just in case that new code will be added at
the end of the function?


sure




  }
-
-/* bs_new is now referenced by its new parents, we don't need the
- * additional reference any more. */
-out:
-bdrv_unref(bs_new);
  }
  
  static void bdrv_delete(BlockDriverState *bs)

diff --git a/block/backup-top.c b/block/backup-top.c
index fe6883cc97..650ed6195c 100644
--- a/block/backup-top.c
+++ b/block/backup-top.c
@@ -222,7 +222,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState 
*source,
  
  bdrv_drained_begin(source);
  
-bdrv_ref(top);

  bdrv_append(top, source, _err);
  if (local_err) {
  error_prepend(_err, "Cannot append backup-top filter: ");
diff --git a/block/commit.c b/block/commit.c
index 71db7ba747..61924bcf66 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -313,6 +313,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
  commit_top_bs->total_sectors = top->total_sectors;
  
  bdrv_append(commit_top_bs, top, _err);

+bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
  if (local_err) {
  commit_top_bs = NULL;
  error_propagate(errp, local_err);
diff --git a/block/mirror.c b/block/mirror.c
index 8e1ad6eceb..13f7ecc998 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1605,9 +1605,6 @@ static BlockJob *mirror_start_job(
  bs_opaque = g_new0(MirrorBDSOpaque, 1);
  mirror_top_bs->opaque = bs_opaque;
  
-/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep

- * it alive until block_job_create() succeeds even if bs has no parent. */
-bdrv_ref(mirror_top_bs);
  bdrv_drained_begin(bs);
  bdrv_append(mirror_top_bs, bs,

Re: [PATCH v6 08/11] iotests: add testenv.py

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 12:59, Kevin Wolf wrote:

Am 16.01.2021 um 12:03 hat Vladimir Sementsov-Ogievskiy geschrieben:

15.01.2021 16:30, Vladimir Sementsov-Ogievskiy wrote:

15.01.2021 16:20, Kevin Wolf wrote:

Am 15.01.2021 um 14:10 hat Vladimir Sementsov-Ogievskiy geschrieben:

15.01.2021 15:45, Kevin Wolf wrote:

Am 15.01.2021 um 13:19 hat Vladimir Sementsov-Ogievskiy geschrieben:

15.01.2021 14:18, Kevin Wolf wrote:

Am 09.01.2021 um 13:26 hat Vladimir Sementsov-Ogievskiy geschrieben:

Add TestEnv class, which will handle test environment in a new python
iotests running framework.

Difference with current ./check interface:
- -v (verbose) option dropped, as it is unused

- -xdiff option is dropped, until somebody complains that it is needed
- same for -n option

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
Â Â Â  tests/qemu-iotests/testenv.py | 328 ++
Â Â Â  1 file changed, 328 insertions(+)
Â Â Â  create mode 100755 tests/qemu-iotests/testenv.py



[..]


+Â Â Â  def init_binaries(self):
+Â Â Â Â Â Â Â  """Init binary path variables:
+Â Â Â Â Â Â Â Â Â Â Â Â  PYTHON (for bash tests)
+Â Â Â Â Â Â Â Â Â Â Â Â  QEMU_PROG, QEMU_IMG_PROG, QEMU_IO_PROG, 
QEMU_NBD_PROG, QSD_PROG
+Â Â Â Â Â Â Â Â Â Â Â Â  SOCKET_SCM_HELPER
+Â Â Â Â Â Â Â  """
+Â Â Â Â Â Â Â  self.python = '/usr/bin/python3 -B'


This doesn't look right, we need to respect the Python binary set in
configure (which I think we get from common.env)


Oh, I missed the change. Then I should just drop this self.python.


Do we still get the value from elsewhere or do we need to manually parse
common.env?


Hmm.. Good question. We have either parse common.env, and still create 
self.python variable.

Or drop it, and include common.env directly to bash tests. For this we'll need 
to export

BUILD_IOTESTS, and do
Â  . $BUILD_IOTESTS/common.env

in common.rc..


check uses it, too, for running Python test cases.



But new check (written in python) doesn't.. Should I keep bash check, which 
will have only one line to call check.py with help of PYTHON?




Or finally, may be just drop it? Can we just use system python for
tests, now when we are already in a python3 world?


You can only assume the Python 3 (or more specifically 3.6+) world if
you are using the Python interpreter that was passed to configure. This
will commonly be the same thing as the first python3 in PATH, but it
doesn't have to be.

So your solution of just using the same interpreter as for check might
be okay as long as we make sure that 'make check' calls check with the
configured Python interpreter instead of relying on the shebang line.



I've sent v7 with exactly this thing: use same interpreter as for check and 
adjust check-block.sh which is called from make check


--
Best regards,
Vladimir

[RFC PATCH 2/2] hw/usb/dev-uas: Report command additional adb length as unsupported

2021-01-18 Thread Philippe Mathieu-Daudé

We are not ready to handle additional CDB data.

If a guest send a packet with such additional data,
report the command parameter as not supported.

We can then explicit there is nothing in this additional
buffer, by fixing its size to zero.

This fixes an error when building with Clang 11:

  usb/dev-uas.c:157:31: error: field 'status' with variable sized type 'uas_iu' 
not at the end of a struct or class is a GNU extension 
[-Werror,-Wgnu-variable-sized-type-not-at-end]
  uas_iustatus;
^

Reported-by: Daniele Buono 
Signed-off-by: Philippe Mathieu-Daudé 
---
Cc: Ed Maste 
Cc: Han Han 
Cc: Marc-André Lureau 
Cc: Paolo Bonzini 
Cc: Gustavo A. R. Silva 
---
 hw/usb/dev-uas.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index cec071d96c4..b6434ad4b9c 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -16,6 +16,7 @@
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
+#include "qemu/log.h"
 
 #include "hw/usb.h"
 #include "migration/vmstate.h"
@@ -70,7 +71,7 @@ typedef struct {
 uint8_treserved_2;
 uint64_t   lun;
 uint8_tcdb[16];
-uint8_tadd_cdb[];
+uint8_tadd_cdb[0];  /* not supported by QEMU */
 } QEMU_PACKED  uas_iu_command;
 
 typedef struct {
@@ -700,6 +701,11 @@ static void usb_uas_command(UASDevice *uas, uas_iu *iu)
 uint32_t len;
 uint16_t tag = be16_to_cpu(iu->hdr.tag);
 
+if (iu->command.add_cdb_length > 0) {
+qemu_log_mask(LOG_UNIMP, "additional adb length not yet supported\n");
+goto unsupported_len;
+}
+
 if (uas_using_streams(uas) && tag > UAS_MAX_STREAMS) {
 goto invalid_tag;
 }
@@ -735,6 +741,10 @@ static void usb_uas_command(UASDevice *uas, uas_iu *iu)
 }
 return;
 
+unsupported_len:
+usb_uas_queue_fake_sense(uas, tag, sense_code_INVALID_PARAM_VALUE);
+return;
+
 invalid_tag:
 usb_uas_queue_fake_sense(uas, tag, sense_code_INVALID_TAG);
 return;
-- 
2.26.2

Re: [PATCH v5] block: report errno when flock fcntl fails

2021-01-18 Thread Max Reitz


On 13.01.21 17:44, David Edmondson wrote:

When a call to fcntl(2) for the purpose of adding file locks fails
with an error other than EAGAIN or EACCES, report the error returned
by fcntl.

EAGAIN or EACCES are elided as they are considered to be common
failures, indicating that a conflicting lock is held by another
process.

No errors are elided when removing file locks.

Signed-off-by: David Edmondson 
---
v3:
- Remove the now unnecessary updates to the test framework (Max).
- Elide the error detail for EAGAIN or EACCES when locking (Kevin,
sort-of Max).
- Philippe and Vladimir sent Reviewed-by, but things have changed
noticeably, so I didn't add them (dme).

v4:
- Really, really remove the unnecessary updates to the test framework.

v5:
- Use a macro to avoid duplicating the EAGAIN/EACCES suppression
   (Vladimir).
- Fix "lock" -> "unlock" (Vladimir).
- Comment on not eliding errors for the unlock case (Vladimir).


Thanks!  I’ve applied this patch to my block branch:

https://git.xanclic.moe/XanClic/qemu/commits/branch/block

Max

Re: [raw] Guest stuck during live live-migration

2021-01-18 Thread Alexandre Arents

Hi,

We made further tests on this, it seems related to disk fragmentation,
I can reproduce on a local-mirroring 5.2.0-rc2(I assume it is the same issue 
than
 live-migration, to confirm).

I put details here If you can have a look:
https://bugs.launchpad.net/qemu/+bug/1912224

Issue is ext4 tools does not consider file as enough fragmented to unfragment, 
and we cannot live-migrate because of the issue.

Alexandre


Kevin Wolf  wrote on mer. [2020-déc.-02 16:33:47 +0100]:
> Am 02.12.2020 um 16:09 hat Quentin Grolleau geschrieben:
> > Do you think that, applying this patch ( replacing by "#if 0" there :
> > https://github.com/qemu/qemu/blob/master/block/file-posix.c#L2601 ),
> > could affect for any reason the customer data ?
> > 
> > As we are on full NVME and 10G networks it should fix our vm which
> > completely freeze.
> 
> It's not a real fix for the general case, of course, but yes, you can do
> that safely. It means that QEMU will assume that all of your raw images
> are fully allocated.
> 
> Kevin
> 
> > De : Qemu-devel 
> >  de la part de 
> > Quentin Grolleau 
> > EnvoyÃ© : mardi 24 novembre 2020 13:58:53
> > Ã : Kevin Wolf
> > Cc : qemu-de...@nongnu.org; qemu-block@nongnu.org
> > Objet : RE: [raw] Guest stuck during live live-migration
> > 
> > Thanks Kevin,
> > 
> > > > Hello,
> > > >
> > > > In our company, we are hosting a large number of Vm, hosted behind 
> > > > Openstack (so libvirt/qemu).
> > > > A large majority of our Vms are runnign with local data only, stored on 
> > > > NVME, and most of them are RAW disks.
> > > >
> > > > With Qemu 4.0 (can be even with older version) we see strange  
> > > > live-migration comportement:
> > 
> > > First of all, 4.0 is relatively old. Generally it is worth retrying with
> > > the most recent code (git master or 5.2.0-rc2) before having a closer
> > > look at problems, because it is frustrating to spend considerable time
> > > debugging an issue and then find out it has already been fixed a year
> > > ago.
> > 
> > > I will try to build it the most recent code
> > 
> > 
> > I will try to build with the most recent code, but it will take me some 
> > time to do it
> > 
> > 
> > > > - some Vms live migrate at very high speed without issue (> 6 Gbps)
> > > > - some Vms are running correctly, but migrating at a strange low 
> > > > speed (3Gbps)
> > > > - some Vms are migrating at a very low speed (1Gbps, sometime less) 
> > > > and during the migration the guest is completely I/O stuck
> > > >
> > > > When this issue happen the VM is completly block, iostat in the Vm show 
> > > > us a latency of 30 secs
> > 
> > > Can you get the stack backtraces of all QEMU threads while the VM is
> > > blocked (e.g. with gdb or pstack)?
> > 
> > (gdb) thread apply all bt
> > 
> > Thread 20 (Thread 0x7f8a0effd700 (LWP 201248)):
> > #0  pthread_cond_wait@@GLIBC_2.3.2 () at 
> > ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
> > #1  0x56520139878b in qemu_cond_wait_impl (cond=0x5652020f27b0, 
> > mutex=0x5652020f27e8, file=0x5652014e4178 
> > "/root/qemu_debug_LSEEK/qemu_debug/qemu/ui/vnc-jobs.c", line=214) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/util/qemu-thread-posix.c:161
> > #2  0x5652012a264d in vnc_worker_thread_loop 
> > (queue=queue@entry=0x5652020f27b0) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/ui/vnc-jobs.c:214
> > #3  0x5652012a2c18 in vnc_worker_thread (arg=arg@entry=0x5652020f27b0) 
> > at /root/qemu_debug_LSEEK/qemu_debug/qemu/ui/vnc-jobs.c:324
> > #4  0x565201398116 in qemu_thread_start (args=) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/util/qemu-thread-posix.c:502
> > #5  0x7f8a5e24a6ba in start_thread (arg=0x7f8a0effd700) at 
> > pthread_create.c:333
> > #6  0x7f8a5df8041d in clone () at 
> > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
> > 
> > Thread 19 (Thread 0x7f8a0700 (LWP 201222)):
> > #0  __lll_lock_wait () at 
> > ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
> > #1  0x7f8a5e24cdbd in __GI___pthread_mutex_lock 
> > (mutex=mutex@entry=0x565201adb680 ) at 
> > ../nptl/pthread_mutex_lock.c:80
> > #2  0x565201398263 in qemu_mutex_lock_impl (mutex=0x565201adb680 
> > , file=0x5652013d7c68 
> > "/root/qemu_debug_LSEEK/qemu_debug/qemu/accel/kvm/kvm-all.c", line=2089) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/util/qemu-thread-posix.c:66
> > #3  0x565200f7d00e in qemu_mutex_lock_iothread_impl 
> > (file=file@entry=0x5652013d7c68 
> > "/root/qemu_debug_LSEEK/qemu_debug/qemu/accel/kvm/kvm-all.c", 
> > line=line@entry=2089) at /root/qemu_debug_LSEEK/qemu_debug/qemu/cpus.c:1850
> > #4  0x565200fa7ca8 in kvm_cpu_exec (cpu=cpu@entry=0x565202354480) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/accel/kvm/kvm-all.c:2089
> > #5  0x565200f7d1ce in qemu_kvm_cpu_thread_fn 
> > (arg=arg@entry=0x565202354480) at 
> > /root/qemu_debug_LSEEK/qemu_debug/qemu/cpus.c:1281
> > #6  0x565201398116 in qemu_thread_start (args=) at 
> >

Re: [PATCH 2/2] virtio-scsi-test: Test writing to scsi-cd device

2021-01-18 Thread Philippe Mathieu-Daudé

On 1/18/21 1:34 PM, Kevin Wolf wrote:
> This tests that trying to write to a (read-only) scsi-cd device backed
> by a read-write image file doesn't crash and results in the correct
> error.
> 
> This is a regression test for https://bugs.launchpad.net/bugs/1906693.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/qtest/virtio-scsi-test.c | 39 ++
>  1 file changed, 39 insertions(+)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 1/2] block: Separate blk_is_writable() and blk_supports_write_perm()

2021-01-18 Thread Philippe Mathieu-Daudé

On 1/18/21 1:34 PM, Kevin Wolf wrote:
> Currently, blk_is_read_only() tells whether a given BlockBackend can
> only be used in read-only mode because its root node is read-only. Some
> callers actually try to answer a slightly different question: Is the
> BlockBackend configured to be writable, by taking write permissions on
> the root node?
> 
> This can differ, for example, for CD-ROM devices which don't take write
> permissions, but may be backed by a writable image file. scsi-cd allows
> write requests to the drive if blk_is_read_only() returns false.
> However, the write request will immediately run into an assertion
> failure because the write permission is missing.
> 
> This patch introduces separate functions for both questions.
> blk_supports_write_perm() answers the question whether the block
> node/image file can support writable devices, whereas blk_is_writable()
> tells whether the BlockBackend is currently configured to be writable.
> 
> All calls of blk_is_read_only() are converted to one of the two new
> functions.
> 
> Fixes: https://bugs.launchpad.net/bugs/1906693
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Kevin Wolf 
> ---
>  include/sysemu/block-backend.h |  3 ++-
>  block/block-backend.c  | 19 ---
>  hw/block/dataplane/xen-block.c |  2 +-
>  hw/block/fdc.c |  9 +
>  hw/block/m25p80.c  |  6 +++---
>  hw/block/nand.c|  2 +-
>  hw/block/nvme-ns.c |  7 ---
>  hw/block/onenand.c |  2 +-
>  hw/block/pflash_cfi01.c|  2 +-
>  hw/block/pflash_cfi02.c|  2 +-
>  hw/block/swim.c|  6 +++---
>  hw/block/virtio-blk.c  |  6 +++---
>  hw/block/xen-block.c   |  2 +-
>  hw/ide/core.c  |  2 +-
>  hw/misc/sifive_u_otp.c |  2 +-
>  hw/ppc/pnv_pnor.c  |  2 +-
>  hw/scsi/scsi-disk.c| 10 +-
>  hw/scsi/scsi-generic.c |  4 ++--
>  hw/sd/sd.c |  6 +++---
>  hw/usb/dev-storage.c   |  4 ++--
>  20 files changed, 57 insertions(+), 41 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

RE: [PATCH] xen-block: fix reporting of discard feature

2021-01-18 Thread Paul Durrant

> -Original Message-
> From: Roger Pau Monne 
> Sent: 18 January 2021 15:34
> To: qemu-de...@nongnu.org
> Cc: Roger Pau Monne ; Arthur Borsboom 
> ; Stefano
> Stabellini ; Anthony Perard 
> ; Paul Durrant
> ; Kevin Wolf ; Max Reitz ; 
> xen-
> de...@lists.xenproject.org; qemu-block@nongnu.org
> Subject: [PATCH] xen-block: fix reporting of discard feature
> 
> Linux blkfront expects both "discard-granularity" and
> "discard-alignment" present on xenbus in order to properly enable the
> feature, not exposing "discard-alignment" left some Linux blkfront
> versions with a broken discard setup. This has also been addressed in
> Linux with:
> 
> https://lore.kernel.org/lkml/20210118151528.81668-1-roger@citrix.com/T/#u
> 
> Fix QEMU to report a "discard-alignment" of 0, in order for it to work
> with older Linux frontends.
> 
> Reported-by: Arthur Borsboom 
> Signed-off-by: Roger Pau Monné 

Reviewed-by: Paul Durrant 

> ---
> Cc: Stefano Stabellini 
> Cc: Anthony Perard 
> Cc: Paul Durrant 
> Cc: Kevin Wolf 
> Cc: Max Reitz 
> Cc: xen-de...@lists.xenproject.org
> Cc: qemu-block@nongnu.org
> ---
>  hw/block/xen-block.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
> index 718d886e5c..246d9c23a2 100644
> --- a/hw/block/xen-block.c
> +++ b/hw/block/xen-block.c
> @@ -253,6 +253,7 @@ static void xen_block_realize(XenDevice *xendev, Error 
> **errp)
>  xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
>  xen_device_backend_printf(xendev, "discard-granularity", "%u",
>conf->discard_granularity);
> +xen_device_backend_printf(xendev, "discard-alignment", "%u", 0);
>  }
> 
>  xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
> --
> 2.29.2

Re: [PATCH v2 09/36] block: return value from bdrv_replace_node()

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Functions with errp argument are not recommened to be void-functions.
> Improve bdrv_replace_node().
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block.h |  4 ++--
>  block.c   | 14 --
>  2 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index 5d59984ad4..8f6100dad7 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -346,8 +346,8 @@ int bdrv_create_file(const char *filename, QemuOpts 
> *opts, Error **errp);
>  BlockDriverState *bdrv_new(void);
>  int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
>  Error **errp);
> -void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
> -   Error **errp);
> +int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
> +  Error **errp);
>  
>  int bdrv_parse_aio(const char *mode, int *flags);
>  int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
> diff --git a/block.c b/block.c
> index 3765c7caed..29082c6d47 100644
> --- a/block.c
> +++ b/block.c
> @@ -4537,14 +4537,14 @@ static bool should_update_child(BdrvChild *c, 
> BlockDriverState *to)
>   * With auto_skip=false the error is returned if from has a parent which 
> should
>   * not be updated.
>   */
> -static void bdrv_replace_node_common(BlockDriverState *from,
> - BlockDriverState *to,
> - bool auto_skip, Error **errp)
> +static int bdrv_replace_node_common(BlockDriverState *from,
> +BlockDriverState *to,
> +bool auto_skip, Error **errp)
>  {
> +int ret = -EPERM;
>  BdrvChild *c, *next;
>  GSList *list = NULL, *p;
>  uint64_t perm = 0, shared = BLK_PERM_ALL;
> -int ret;

I think I'd prefer setting ret in each error path. This makes it more
obvious that ret has the right value and hasn't been modified between
the initialisation and the error.

>  
>  /* Make sure that @from doesn't go away until we have successfully 
> attached
>   * all of its parents to @to. */
> @@ -4600,10 +4600,12 @@ out:

Let's add an explicit ret = 0 right before the out: label.

>  g_slist_free(list);
>  bdrv_drained_end(from);
>  bdrv_unref(from);
> +
> +return ret;
>  }

With these changes:

Reviewed-by: Kevin Wolf

[PATCH] xen-block: fix reporting of discard feature

2021-01-18 Thread roger . pau--- via

Linux blkfront expects both "discard-granularity" and
"discard-alignment" present on xenbus in order to properly enable the
feature, not exposing "discard-alignment" left some Linux blkfront
versions with a broken discard setup. This has also been addressed in
Linux with:

https://lore.kernel.org/lkml/20210118151528.81668-1-roger@citrix.com/T/#u

Fix QEMU to report a "discard-alignment" of 0, in order for it to work
with older Linux frontends.

Reported-by: Arthur Borsboom 
Signed-off-by: Roger Pau Monné 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: xen-de...@lists.xenproject.org
Cc: qemu-block@nongnu.org
---
 hw/block/xen-block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 718d886e5c..246d9c23a2 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -253,6 +253,7 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
 xen_device_backend_printf(xendev, "discard-granularity", "%u",
   conf->discard_granularity);
+xen_device_backend_printf(xendev, "discard-alignment", "%u", 0);
 }
 
 xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
-- 
2.29.2

Re: [PATCH v2 08/36] block: make bdrv_reopen_{prepare,commit,abort} private

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> These functions are called only from bdrv_reopen_multiple() in block.c.
> No reason to publish them.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Reviewed-by: Kevin Wolf

Re: [PATCH v2 06/36] block: BdrvChildClass: add .get_parent_aio_context handler

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Add new handler to get aio context and implement it in all child
> classes. Add corresponding public interface to be used soon.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Hm, are you going to introduce cases where parent and child context
don't match, or why is this a useful function?

Even if so, I feel it shouldn't be any of the child's business what
AioContext the parent uses.

Well, maybe the rest of the series will answer this.

Kevin

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Thomas Huth


On 18/01/2021 15.50, Daniel P. Berrangé wrote:

On Mon, Jan 18, 2021 at 03:44:49PM +0100, Thomas Huth wrote:

On 18/01/2021 14.37, Jiaxun Yang wrote:



On Mon, Jan 18, 2021, at 6:11 PM, Daniel P. Berrangé wrote:

On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:

We only run build test and check-acceptance as their are too many
failures in checks due to minor string mismatch.


Can you give real examples of what's broken here, as that sounds
rather suspicious, and I'm not convinced it should be ignored.


Mostly Input/Output error vs I/O Error.


Right, out of curiosity, I also gave it a try:

  https://gitlab.com/huth/qemu/-/jobs/969225330

Apart from the "I/O Error" vs. "Input/Output Error" difference, there also
seems to be a problem with "sed" in some of the tests.


The "sed" thing sounds like something that ought to be investigated
from a portability POV rather than ignored.


The weird thing is that we explicitly test for GNU sed in 
tests/check-block.sh and skip the iotests if it's not available... so I'm a 
little bit surprised that the iotests are run here with an apparently 
different version of sed...?


 Thomas

Re: [PATCH v2 05/36] block: add bdrv_parent_try_set_aio_context

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> We already have bdrv_parent_can_set_aio_context(). Add corresponding
> bdrv_parent_set_aio_context_ignore() and
> bdrv_parent_try_set_aio_context() and use them instead of open-coding.
> 
> Make bdrv_parent_try_set_aio_context() public, as it will be used in
> further commit.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block.h |  2 ++
>  block.c   | 51 +--
>  2 files changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index ee3f5a6cca..550c5a7513 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -686,6 +686,8 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, 
> AioContext *ctx,
>  GSList **ignore, Error **errp);
>  bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
>GSList **ignore, Error **errp);
> +int bdrv_parent_try_set_aio_context(BdrvChild *c, AioContext *ctx,
> +Error **errp);
>  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
>  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
>  
> diff --git a/block.c b/block.c
> index 916087ee1a..5d925c208d 100644
> --- a/block.c
> +++ b/block.c
> @@ -81,6 +81,9 @@ static BlockDriverState *bdrv_open_inherit(const char 
> *filename,
> BdrvChildRole child_role,
> Error **errp);
>  
> +static void bdrv_parent_set_aio_context_ignore(BdrvChild *c, AioContext *ctx,
> +   GSList **ignore);
> +
>  /* If non-zero, use only whitelisted block drivers */
>  static int use_bdrv_whitelist;
>  
> @@ -2655,17 +2658,12 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
> *child_bs,
>   * try moving the parent into the AioContext of child_bs instead. */
>  if (bdrv_get_aio_context(child_bs) != ctx) {
>  ret = bdrv_try_set_aio_context(child_bs, ctx, _err);
> -if (ret < 0 && child_class->can_set_aio_ctx) {
> -GSList *ignore = g_slist_prepend(NULL, child);
> -ctx = bdrv_get_aio_context(child_bs);

You are losing this line...

> -if (child_class->can_set_aio_ctx(child, ctx, , NULL)) {
> -error_free(local_err);
> +if (ret < 0) {
> +if (bdrv_parent_try_set_aio_context(child, ctx, NULL) == 0) {

...before this one, so I think the wrong context is passed now. Instead
of trying to move the parent to the AioContext of the child, we'll try
to move it to the AioContext in which it already is (and which doesn't
match the AioContext of the child).

Kevin

>  ret = 0;
> -g_slist_free(ignore);
> -ignore = g_slist_prepend(NULL, child);
> -child_class->set_aio_ctx(child, ctx, );
> +error_free(local_err);
> +local_err = NULL;
>  }
> -g_slist_free(ignore);
>  }
>  if (ret < 0) {
>  error_propagate(errp, local_err);
> @@ -6452,9 +6450,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
>  if (g_slist_find(*ignore, child)) {
>  continue;
>  }
> -assert(child->klass->set_aio_ctx);
> -*ignore = g_slist_prepend(*ignore, child);
> -child->klass->set_aio_ctx(child, new_context, ignore);
> +bdrv_parent_set_aio_context_ignore(child, new_context, ignore);
>  }
>  
>  bdrv_detach_aio_context(bs);
> @@ -6511,6 +6507,37 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild 
> *c, AioContext *ctx,
>  return true;
>  }
>  
> +static void bdrv_parent_set_aio_context_ignore(BdrvChild *c, AioContext *ctx,
> +   GSList **ignore)
> +{
> +if (g_slist_find(*ignore, c)) {
> +return;
> +}
> +*ignore = g_slist_prepend(*ignore, c);
> +
> +assert(c->klass->set_aio_ctx);
> +c->klass->set_aio_ctx(c, ctx, ignore);
> +}
> +
> +int bdrv_parent_try_set_aio_context(BdrvChild *c, AioContext *ctx,
> +Error **errp)
> +{
> +GSList *ignore = NULL;
> +
> +if (!bdrv_parent_can_set_aio_context(c, ctx, , errp)) {
> +g_slist_free(ignore);
> +return -EPERM;
> +}
> +
> +g_slist_free(ignore);
> +ignore = NULL;
> +
> +bdrv_parent_set_aio_context_ignore(c, ctx, );
> +g_slist_free(ignore);
> +
> +return 0;
> +}
> +
>  bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
>  GSList **ignore, Error **errp)
>  {
> -- 
> 2.21.3
>

Re: [PATCH v4 00/23] backup performance: block_status + async

2021-01-18 Thread Max Reitz


On 16.01.21 22:46, Vladimir Sementsov-Ogievskiy wrote:

Hi Max!
I applied my series onto yours 129-fixing and found, that 129 fails for backup.
And setting small max-chunk and even max-workers to 1 doesn't help! (setting
speed like in v3 still helps).

And I found, that the problem is that really, the whole backup job goes during
drain, because in new architecture we do just job_yield() during the whole
background block-copy.


OK, so as it was in v3, the job was drained, but since it was already 
yielding while block-copy was running in the background, nothing 
happened; the block-copy completed and only then was the job woken (and 
then there was no reason to pause, because it was done already).


So now the job is entered on drain, too (not only user pauses), which 
means that it gets a chance to pause background requests.


In backup’s case, that means invoking job_yield() again, which sets a 
job_pause_point(), which will cancel the block-copy.  Once the job is 
unpaused (re-entered by job_resume()), backup sees block-copy is 
cancelled (and finished), leaves the loop, and retries with a new 
block-copy call.


I think I got it now.


So all that’s left is issuing a thanks to you – thanks! – and announcing 
that I’ve applied this series to my block branch (with s/not 
unsupported/not supported/ in patch 23):


https://git.xanclic.moe/XanClic/qemu/commits/branch/block

Max

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Daniel P . Berrangé

On Mon, Jan 18, 2021 at 03:44:49PM +0100, Thomas Huth wrote:
> On 18/01/2021 14.37, Jiaxun Yang wrote:
> > 
> > 
> > On Mon, Jan 18, 2021, at 6:11 PM, Daniel P. Berrangé wrote:
> > > On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:
> > > > We only run build test and check-acceptance as their are too many
> > > > failures in checks due to minor string mismatch.
> > > 
> > > Can you give real examples of what's broken here, as that sounds
> > > rather suspicious, and I'm not convinced it should be ignored.
> > 
> > Mostly Input/Output error vs I/O Error.
> 
> Right, out of curiosity, I also gave it a try:
> 
>  https://gitlab.com/huth/qemu/-/jobs/969225330
> 
> Apart from the "I/O Error" vs. "Input/Output Error" difference, there also
> seems to be a problem with "sed" in some of the tests.

The "sed" thing sounds like something that ought to be investigated
from a portability POV rather than ignored.

More worrying is the fact that there's a segv in there too when
running qemu-img, which does not give me confidence in use of
it with musl.



Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Thomas Huth


On 18/01/2021 14.37, Jiaxun Yang wrote:



On Mon, Jan 18, 2021, at 6:11 PM, Daniel P. Berrangé wrote:

On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:

We only run build test and check-acceptance as their are too many
failures in checks due to minor string mismatch.


Can you give real examples of what's broken here, as that sounds
rather suspicious, and I'm not convinced it should be ignored.


Mostly Input/Output error vs I/O Error.


Right, out of curiosity, I also gave it a try:

 https://gitlab.com/huth/qemu/-/jobs/969225330

Apart from the "I/O Error" vs. "Input/Output Error" difference, there also 
seems to be a problem with "sed" in some of the tests.


Jiaxun, I think you could simply add a job like this instead:

check-system-alpine:
  <<: *native_test_job_definition
  needs:
- job: build-system-alpine
  artifacts: true
  variables:
IMAGE: alpine
MAKE_CHECK_ARGS: check-qtest check-unit check-qapi-schema check-softfloat

That will run all the other tests, without the problematic check-block part.

 Thomas

Re: [PATCH v2 04/36] block: bdrv_append(): return status

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Return int status to avoid extra error propagation schemes.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Reviewed-by: Kevin Wolf

Re: [PATCH v4 09/23] job: call job_enter from job_pause

2021-01-18 Thread Max Reitz


On 18.01.21 14:45, Max Reitz wrote:

On 16.01.21 22:46, Vladimir Sementsov-Ogievskiy wrote:

If main job coroutine called job_yield (while some background process
is in progress), we should give it a chance to call job_pause_point().
It will be used in backup, when moved on async block-copy.

Note, that job_user_pause is not enough: we want to handle
child_job_drained_begin() as well, which call job_pause().


OK.


Still, if job is already in job_do_yield() in job_pause_point() we
should not enter it.


Agreed.


iotest 109 output is modified: on stop we do bdrv_drain_all() which now
triggers job pause immediately (and pause after ready is standby).


Sounds like a good thing.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  job.c  |  3 +++
  tests/qemu-iotests/109.out | 24 
  2 files changed, 27 insertions(+)

diff --git a/job.c b/job.c
index 8fecf38960..3aaaebafe2 100644
--- a/job.c
+++ b/job.c
@@ -553,6 +553,9 @@ static bool job_timer_not_pending(Job *job)
  void job_pause(Job *job)
  {
  job->pause_count++;
+    if (!job->paused) {
+    job_enter(job);
+    }
  }


I see job_pause is also called from block_job_error_action() – should we 
reenter the job there, too?


(It looks to me like e.g. mirror would basically just continue to run, 
then, until it needs to yield because of some other issue.  I don’t know 
whether that’s a problem, because I suppose we don’t guarantee to stop 
immediately on an error, though I suspect users would expect us to do 
that as early as possible (i.e., not to launch new requests).


[Quite some time later]

I’ve now tested a mirror job that stops due to a target error, and it 
actually does not make any progress; or at least it doesn’t report any. 
  So it looks like my concern is unjustified.  I don’t know why it’s 
unjustified, though, so perhaps you can explain it before I give my R-b 
O:))


Oh, I guess because job_enter_cond() doesn’t enter if the job is busy 
already.  That would make a lot of sense, so I’m going to assume that’s 
what’s preventing the job_enter() to do anything if the job is already 
running (which it has to be to invoke block_job_error_action()).


Reviewed-by: Max Reitz

Re: [PATCH v2 03/36] block: bdrv_append(): don't consume reference

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> We have too much comments for this feature. It seems better just don't
> do it. Most of real users (tests don't count) have to create additional
> reference.
> 
> Drop also comment in external_snapshot_prepare:
>  - bdrv_append doesn't "remove" old bs in common sense, it sounds
>strange
>  - the fact that bdrv_append can fail is obvious from the context
>  - the fact that we must rollback all changes in transaction abort is
>known (it's the direct role of abort)
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  block.c | 19 +++
>  block/backup-top.c  |  1 -
>  block/commit.c  |  1 +
>  block/mirror.c  |  3 ---
>  blockdev.c  |  4 
>  tests/test-bdrv-drain.c |  2 +-
>  tests/test-bdrv-graph-mod.c |  2 ++
>  7 files changed, 7 insertions(+), 25 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 0dd28f0902..55efef3c9d 100644
> --- a/block.c
> +++ b/block.c
> @@ -3145,11 +3145,6 @@ static BlockDriverState 
> *bdrv_append_temp_snapshot(BlockDriverState *bs,
>  goto out;
>  }
>  
> -/* bdrv_append() consumes a strong reference to bs_snapshot
> - * (i.e. it will call bdrv_unref() on it) even on error, so in
> - * order to be able to return one, we have to increase
> - * bs_snapshot's refcount here */
> -bdrv_ref(bs_snapshot);
>  bdrv_append(bs_snapshot, bs, _err);
>  if (local_err) {
>  error_propagate(errp, local_err);
> @@ -4608,10 +4603,8 @@ void bdrv_replace_node(BlockDriverState *from, 
> BlockDriverState *to,
>   *
>   * This function does not create any image files.
>   *
> - * bdrv_append() takes ownership of a bs_new reference and unrefs it because
> - * that's what the callers commonly need. bs_new will be referenced by the 
> old
> - * parents of bs_top after bdrv_append() returns. If the caller needs to 
> keep a
> - * reference of its own, it must call bdrv_ref().
> + * Recent update: bdrv_append does NOT eat bs_new reference for now. Drop 
> this
> + * comment several moths later.

A comment like this is unusual. Do you think there is a high risk of
somebody introducing a new bdrv_append() in parallel and that they would
read this comment when rebasing their existing patches?

If we do keep the comment: s/for now/now/ (it has recently changed,
we're not intending to change it later) and s/moths/months/.

>   */
>  void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
>   Error **errp)
> @@ -4621,20 +4614,14 @@ void bdrv_append(BlockDriverState *bs_new, 
> BlockDriverState *bs_top,
>  bdrv_set_backing_hd(bs_new, bs_top, _err);
>  if (local_err) {
>  error_propagate(errp, local_err);
> -goto out;
> +return;
>  }
>  
>  bdrv_replace_node(bs_top, bs_new, _err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  bdrv_set_backing_hd(bs_new, NULL, _abort);
> -goto out;

Can we leave a return here just in case that new code will be added at
the end of the function?

>  }
> -
> -/* bs_new is now referenced by its new parents, we don't need the
> - * additional reference any more. */
> -out:
> -bdrv_unref(bs_new);
>  }
>  
>  static void bdrv_delete(BlockDriverState *bs)
> diff --git a/block/backup-top.c b/block/backup-top.c
> index fe6883cc97..650ed6195c 100644
> --- a/block/backup-top.c
> +++ b/block/backup-top.c
> @@ -222,7 +222,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState 
> *source,
>  
>  bdrv_drained_begin(source);
>  
> -bdrv_ref(top);
>  bdrv_append(top, source, _err);
>  if (local_err) {
>  error_prepend(_err, "Cannot append backup-top filter: ");
> diff --git a/block/commit.c b/block/commit.c
> index 71db7ba747..61924bcf66 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -313,6 +313,7 @@ void commit_start(const char *job_id, BlockDriverState 
> *bs,
>  commit_top_bs->total_sectors = top->total_sectors;
>  
>  bdrv_append(commit_top_bs, top, _err);
> +bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
>  if (local_err) {
>  commit_top_bs = NULL;
>  error_propagate(errp, local_err);
> diff --git a/block/mirror.c b/block/mirror.c
> index 8e1ad6eceb..13f7ecc998 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -1605,9 +1605,6 @@ static BlockJob *mirror_start_job(
>  bs_opaque = g_new0(MirrorBDSOpaque, 1);
>  mirror_top_bs->opaque = bs_opaque;
>  
> -/* bdrv_append takes ownership of the mirror_top_bs reference, need to 
> keep
> - * it alive until block_job_create() succeeds even if bs has no parent. 
> */
> -bdrv_ref(mirror_top_bs);
>  bdrv_drained_begin(bs);
>  bdrv_append(mirror_top_bs, bs, _err);
>  bdrv_drained_end(bs);
> diff --git a/blockdev.c b/blockdev.c
> index b5f11c524b..96c96f8ba6 100644
> ---

Re: [PATCH v2 02/36] tests/test-bdrv-graph-mod: add test_parallel_perm_update

2021-01-18 Thread Kevin Wolf

Am 27.11.2020 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Add test to show that simple DFS recursion order is not correct for
> permission update. Correct order is topological-sort order, which will
> be introduced later.
> 
> Consider the block driver which has two filter children: one active
> with exclusive write access and one inactive with no specific
> permissions.
> 
> And, these two children has a common base child, like this:
> 
> ┌─┐ ┌──┐
> │ fl2 │ ◀── │ top  │
> └─┘ └──┘
>   │   │
>   │   │ w
>   │   ▼
>   │ ┌──┐
>   │ │ fl1  │
>   │ └──┘
>   │   │
>   │   │ w
>   │   ▼
>   │ ┌──┐
>   └───▶ │ base │
> └──┘
> 
> So, exclusive write is propagated.
> 
> Assume, we want to make fl2 active instead of fl1.
> So, we set some option for top driver and do permission update.
> 
> If permission update (remember, it's DFS) goes first through
> top->fl1->base branch it will succeed: it firstly drop exclusive write
> permissions and than apply them for another BdrvChildren.
> But if permission update goes first through top->fl2->base branch it
> will fail, as when we try to update fl2->base child, old not yet
> updated fl1->base child will be in conflict.
> 
> Now test fails, so it runs only with -d flag. To run do
> 
>   ./test-bdrv-graph-mod -d -p /bdrv-graph-mod/parallel-perm-update
> 
> from /tests.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  tests/test-bdrv-graph-mod.c | 64 +
>  1 file changed, 64 insertions(+)
> 
> diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c
> index 3b9e6f242f..27e3361a60 100644
> --- a/tests/test-bdrv-graph-mod.c
> +++ b/tests/test-bdrv-graph-mod.c
> @@ -232,6 +232,68 @@ static void test_parallel_exclusive_write(void)
>  bdrv_unref(top);
>  }
>  
> +static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,
> + BdrvChildRole role,
> + BlockReopenQueue *reopen_queue,
> + uint64_t perm, uint64_t shared,
> + uint64_t *nperm, uint64_t *nshared)
> +{
> +if (bs->file && c == bs->file) {
> +*nperm = BLK_PERM_WRITE;
> +*nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
> +} else {
> +*nperm = 0;
> +*nshared = BLK_PERM_ALL;
> +}
> +}
> +
> +static BlockDriver bdrv_write_to_file = {
> +.format_name = "tricky-perm",
> +.bdrv_child_perm = write_to_file_perms,
> +};
> +
> +static void test_parallel_perm_update(void)
> +{
> +BlockDriverState *top = no_perm_node("top");
> +BlockDriverState *tricky =
> +bdrv_new_open_driver(_write_to_file, "tricky", BDRV_O_RDWR,
> + _abort);
> +BlockDriverState *base = no_perm_node("base");
> +BlockDriverState *fl1 = pass_through_node("fl1");
> +BlockDriverState *fl2 = pass_through_node("fl2");
> +BdrvChild *c_fl1, *c_fl2;
> +
> +bdrv_attach_child(top, tricky, "file", _of_bds, BDRV_CHILD_DATA,
> +  _abort);
> +c_fl1 = bdrv_attach_child(tricky, fl1, "first", _of_bds,
> +  BDRV_CHILD_FILTERED, _abort);
> +c_fl2 = bdrv_attach_child(tricky, fl2, "second", _of_bds,
> +  BDRV_CHILD_FILTERED, _abort);
> +bdrv_attach_child(fl1, base, "backing", _of_bds, 
> BDRV_CHILD_FILTERED,
> +  _abort);
> +bdrv_attach_child(fl2, base, "backing", _of_bds, 
> BDRV_CHILD_FILTERED,
> +  _abort);
> +bdrv_ref(base);
> +
> +/* Select fl1 as first child to be active */
> +tricky->file = c_fl1;
> +bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
> +
> +assert(c_fl1->perm & BLK_PERM_WRITE);
> +
> +/* Now, try to switch active child and update permissions */
> +tricky->file = c_fl2;
> +bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
> +
> +assert(c_fl2->perm & BLK_PERM_WRITE);
> +
> +/* Switch once more, to not care about real child order in the list */
> +tricky->file = c_fl1;
> +bdrv_child_refresh_perms(top, top->children.lh_first, _abort);
> +
> +assert(c_fl1->perm & BLK_PERM_WRITE);

Should we also assert in each case that the we don't hole the write
permission for the inactive child?

Kevin

Re: [PATCH v4 23/23] simplebench: add bench-backup.py

2021-01-18 Thread Max Reitz


On 16.01.21 22:47, Vladimir Sementsov-Ogievskiy wrote:

Add script to benchmark new backup architecture.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  scripts/simplebench/bench-backup.py | 167 
  1 file changed, 167 insertions(+)
  create mode 100755 scripts/simplebench/bench-backup.py

diff --git a/scripts/simplebench/bench-backup.py 
b/scripts/simplebench/bench-backup.py
new file mode 100755
index 00..2cf7a852e0
--- /dev/null
+++ b/scripts/simplebench/bench-backup.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+#
+# Bench backup block-job
+#
+# Copyright (c) 2020 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import argparse
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from bench_block_job import bench_block_copy, drv_file, drv_nbd
+
+
+def bench_func(env, case):
+""" Handle one "cell" of benchmarking table. """
+cmd_options = env['cmd-options'] if 'cmd-options' in env else {}
+return bench_block_copy(env['qemu-binary'], env['cmd'],
+cmd_options,
+case['source'], case['target'])
+
+
+def bench(args):
+test_cases = []
+
+sources = {}
+targets = {}
+for d in args.dir:
+label, path = d.split(':')  # paths with colon not unsupported


s/ un//, I think.

Adding these comments (and the assert below) instead of using splitn() 
is fine for me.


Max

Re: [PATCH v4 09/23] job: call job_enter from job_pause

2021-01-18 Thread Max Reitz


On 16.01.21 22:46, Vladimir Sementsov-Ogievskiy wrote:

If main job coroutine called job_yield (while some background process
is in progress), we should give it a chance to call job_pause_point().
It will be used in backup, when moved on async block-copy.

Note, that job_user_pause is not enough: we want to handle
child_job_drained_begin() as well, which call job_pause().


OK.


Still, if job is already in job_do_yield() in job_pause_point() we
should not enter it.


Agreed.


iotest 109 output is modified: on stop we do bdrv_drain_all() which now
triggers job pause immediately (and pause after ready is standby).


Sounds like a good thing.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  job.c  |  3 +++
  tests/qemu-iotests/109.out | 24 
  2 files changed, 27 insertions(+)

diff --git a/job.c b/job.c
index 8fecf38960..3aaaebafe2 100644
--- a/job.c
+++ b/job.c
@@ -553,6 +553,9 @@ static bool job_timer_not_pending(Job *job)
  void job_pause(Job *job)
  {
  job->pause_count++;
+if (!job->paused) {
+job_enter(job);
+}
  }


I see job_pause is also called from block_job_error_action() – should we 
reenter the job there, too?


(It looks to me like e.g. mirror would basically just continue to run, 
then, until it needs to yield because of some other issue.  I don’t know 
whether that’s a problem, because I suppose we don’t guarantee to stop 
immediately on an error, though I suspect users would expect us to do 
that as early as possible (i.e., not to launch new requests).


[Quite some time later]

I’ve now tested a mirror job that stops due to a target error, and it 
actually does not make any progress; or at least it doesn’t report any. 
 So it looks like my concern is unjustified.  I don’t know why it’s 
unjustified, though, so perhaps you can explain it before I give my R-b O:))


Max

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Jiaxun Yang




On Mon, Jan 18, 2021, at 6:11 PM, Daniel P. Berrangé wrote:
> On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:
> > We only run build test and check-acceptance as their are too many
> > failures in checks due to minor string mismatch.
> 
> Can you give real examples of what's broken here, as that sounds
> rather suspicious, and I'm not convinced it should be ignored.

Mostly Input/Output error vs I/O Error.

- Jiaxun

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Klaus Jensen

On Jan 18 22:12, Minwoo Im wrote:
> On 21-01-18 14:10:50, Klaus Jensen wrote:
> > On Jan 18 22:09, Minwoo Im wrote:
> > > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move 
> > > > > onto
> > > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this 
> > > > > device
> > > > > model?
> > > > 
> > > > Next patch moves to v1.4. I wanted to split it because the "bump" patch
> > > > also adds a couple of other v1.4 requires fields. But I understand that
> > > > this is slightly wrong.
> > > 
> > > Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
> > > support in this device model separately.  Maybe I missed the linux-nvme
> > > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > > v1.3 in NVMe driver?
> > 
> > I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> > support both the v1.3 and v1.4 behavior :)
> 
> Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)

My first version of this patch actually included compatibility support
for v1.3 ("legacy cmb"), but Keith suggested we just drop that and keep
this device compliant.

What we could do is allow the spec version to be chosen with a
parameter?


signature.asc
Description: PGP signature

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im

> > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> > model?
> 
> Next patch moves to v1.4. I wanted to split it because the "bump" patch
> also adds a couple of other v1.4 requires fields. But I understand that
> this is slightly wrong.

Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
support in this device model separately.  Maybe I missed the linux-nvme
mailing list for CMB v1.4, but is there no plan to keep supportin CMB
v1.3 in NVMe driver?

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Klaus Jensen

On Jan 18 22:09, Minwoo Im wrote:
> > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> > > model?
> > 
> > Next patch moves to v1.4. I wanted to split it because the "bump" patch
> > also adds a couple of other v1.4 requires fields. But I understand that
> > this is slightly wrong.
> 
> Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
> support in this device model separately.  Maybe I missed the linux-nvme
> mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> v1.3 in NVMe driver?

I posted a patch on linux-nvme for v1.4 support in the kernel. It will
support both the v1.3 and v1.4 behavior :)


signature.asc
Description: PGP signature

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im

On 21-01-18 14:10:50, Klaus Jensen wrote:
> On Jan 18 22:09, Minwoo Im wrote:
> > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> > > > model?
> > > 
> > > Next patch moves to v1.4. I wanted to split it because the "bump" patch
> > > also adds a couple of other v1.4 requires fields. But I understand that
> > > this is slightly wrong.
> > 
> > Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
> > support in this device model separately.  Maybe I missed the linux-nvme
> > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > v1.3 in NVMe driver?
> 
> I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> support both the v1.3 and v1.4 behavior :)

Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Klaus Jensen

On Jan 18 21:58, Minwoo Im wrote:
> Hello,
> 
> On 21-01-18 10:47:03, Klaus Jensen wrote:
> > From: Padmakar Kalghatgi 
> > 
> > Implement v1.4 logic for configuring the Controller Memory Buffer. This
> > is not backward compatible with v1.3, so drivers that only support v1.3
> > will not be able to use the CMB anymore.
> > 
> > Signed-off-by: Padmakar Kalghatgi 
> > Signed-off-by: Klaus Jensen 
> 
> Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> model?

Next patch moves to v1.4. I wanted to split it because the "bump" patch
also adds a couple of other v1.4 requires fields. But I understand that
this is slightly wrong.


signature.asc
Description: PGP signature

Re: [PATCH v2 07/12] hw/block/nvme: remove redundant zeroing of PMR registers

2021-01-18 Thread Klaus Jensen

On Jan 18 21:55, Minwoo Im wrote:
> On 21-01-18 10:47:00, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > The controller registers are initially zero. Remove the redundant
> > zeroing.
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c | 35 ---
> >  1 file changed, 35 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f3bea582b3c0..9ee9570bb65c 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -4179,43 +4179,8 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice 
> > *pci_dev)
> >  
> >  static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
> >  {
> > -/* PMR Capabities register */
> > -n->bar.pmrcap = 0;
> > -NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
> > -NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
> >  NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR);
> > -NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
> > -/* Turn on bit 1 support */
> 
> This comment says that PMRWBM [1]th bit is set to PMRCAP below :).
> 

Thanks!



signature.asc
Description: PGP signature

Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im

> The spec requires aligned 32 bit accesses (or the size of the register),
> so maybe it's about time we actually ignore instead of falling through.

Agreed.

Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im

Hello,

On 21-01-18 10:47:03, Klaus Jensen wrote:
> From: Padmakar Kalghatgi 
> 
> Implement v1.4 logic for configuring the Controller Memory Buffer. This
> is not backward compatible with v1.3, so drivers that only support v1.3
> will not be able to use the CMB anymore.
> 
> Signed-off-by: Padmakar Kalghatgi 
> Signed-off-by: Klaus Jensen 

Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
model?

Re: [PATCH v2 07/12] hw/block/nvme: remove redundant zeroing of PMR registers

2021-01-18 Thread Minwoo Im

On 21-01-18 10:47:00, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The controller registers are initially zero. Remove the redundant
> zeroing.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 35 ---
>  1 file changed, 35 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f3bea582b3c0..9ee9570bb65c 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -4179,43 +4179,8 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  
>  static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
>  {
> -/* PMR Capabities register */
> -n->bar.pmrcap = 0;
> -NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
> -NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
>  NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR);
> -NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
> -/* Turn on bit 1 support */

This comment says that PMRWBM [1]th bit is set to PMRCAP below :).

>  NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
> -NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
> -NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
> -
> -/* PMR Control register */
> -n->bar.pmrctl = 0;
> -NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
> -
> -/* PMR Status register */
> -n->bar.pmrsts = 0;
> -NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
> -NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
> -NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
> -NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
> -
> -/* PMR Elasticity Buffer Size register */
> -n->bar.pmrebs = 0;
> -NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
> -NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
> -NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
> -
> -/* PMR Sustained Write Throughput register */
> -n->bar.pmrswtp = 0;
> -NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
> -NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
> -
> -/* PMR Memory Space Control register */
> -n->bar.pmrmsc = 0;
> -NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
> -NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
>  
>  pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
>   PCI_BASE_ADDRESS_SPACE_MEMORY |
> -- 
> 2.30.0
> 
>

Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Klaus Jensen

On Jan 18 21:41, Minwoo Im wrote:
> On 21-01-18 10:46:55, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > 64 bit registers like ASQ and ACQ should be writable by both a hi/lo 32
> > bit write combination as well as a plain 64 bit write. The spec does not
> > define ordering on the hi/lo split, but the code currently assumes that
> > the low order bits are written first. Additionally, the code does not
> > consider that another address might already have been written into the
> > register, causing the OR'ing to result in a bad address.
> > 
> > Fix this by explicitly overwriting only the low or high order bits for
> > 32 bit writes.
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c | 10 ++
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index bd7e258c3c26..40b9f8ccfc0e 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -3781,19 +3781,21 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> > offset, uint64_t data,
> >  trace_pci_nvme_mmio_aqattr(data & 0x);
> >  break;
> >  case 0x28:  /* ASQ */
> > -n->bar.asq = data;
> > +n->bar.asq = size == 8 ? data :
> > +(n->bar.asq & ~0x) | (data & 0x);
>  ^^^
> If this one is to unmask lower 32bits other than higher 32bits, then
> it should be:
> 
>   (n->bar.asq & ~0xULL)
> 

Ouch. Yes, thanks!

> Also, maybe we should take care of lower than 4bytes as:
> 
>   .min_access_size = 2,
>   .max_access_size = 8,
> 
> Even we have some error messages up there with:
> 
>   if (unlikely(size < sizeof(uint32_t))) {
>   NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
>  "MMIO write smaller than 32-bits,"
>  " offset=0x%"PRIx64", size=%u",
>  offset, size);
>   /* should be ignored, fall through for now */
>   }
> 
> It's a fall-thru error, and that's it.  I would prefer not to have this
> error and support for 2bytes access also, OR do not support for 2bytes
> access for this MMIO area.
> 

The spec requires aligned 32 bit accesses (or the size of the register),
so maybe it's about time we actually ignore instead of falling through.


signature.asc
Description: PGP signature

Re: [PATCH v2 06/12] hw/block/nvme: rename PMR/CMB shift/mask fields

2021-01-18 Thread Minwoo Im

On 21-01-18 10:46:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Use the correct field names.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  include/block/nvme.h | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 86d7fc2f905c..f3cbe17d0971 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -35,8 +35,8 @@ enum NvmeCapShift {
>  CAP_CSS_SHIFT  = 37,
>  CAP_MPSMIN_SHIFT   = 48,
>  CAP_MPSMAX_SHIFT   = 52,
> -CAP_PMR_SHIFT  = 56,
> -CAP_CMB_SHIFT  = 57,
> +CAP_PMRS_SHIFT = 56,
> +CAP_CMBS_SHIFT = 57,
>  };
>  
>  enum NvmeCapMask {
> @@ -49,8 +49,8 @@ enum NvmeCapMask {
>  CAP_CSS_MASK   = 0xff,
>  CAP_MPSMIN_MASK= 0xf,
>  CAP_MPSMAX_MASK= 0xf,
> -CAP_PMR_MASK   = 0x1,
> -CAP_CMB_MASK   = 0x1,
> +CAP_PMRS_MASK  = 0x1,
> +CAP_CMBS_MASK  = 0x1,
>  };
>  
>  #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
> @@ -81,10 +81,10 @@ enum NvmeCapMask {
> << 
> CAP_MPSMIN_SHIFT)
>  #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
> CAP_MPSMAX_MASK)\
> << 
> CAP_MPSMAX_SHIFT)
> -#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMR_MASK) 
>   \
> -   << CAP_PMR_SHIFT)
> -#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMB_MASK) 
>   \
> -   << CAP_CMB_SHIFT)
> +#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & 
> CAP_PMRS_MASK)  \
> +   << CAP_PMRS_SHIFT)
> +#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & 
> CAP_CMBS_MASK)  \
> +   << CAP_CMBS_SHIFT)

Oh, it would have been better folded into [3/12] patch, though.

Changes are looking good to me to represent "Supported".

Reviewed-by: Minwoo Im 

>  
>  enum NvmeCapCss {
>  NVME_CAP_CSS_NVM= 1 << 0,
> -- 
> 2.30.0
> 
>

Re: [PATCH v2 04/12] hw/block/nvme: move msix table and pba to BAR 0

2021-01-18 Thread Minwoo Im



On 21-01-18 10:46:57, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> In the interest of supporting both CMB and PMR to be enabled on the same
> device, move the MSI-X table and pending bit array out of BAR 4 and into
> BAR 0.

Nice!

Reviewed-by: Minwoo Im 
Tested-by: Minwoo Im 

Thanks,

Re: [PATCH v2 05/12] hw/block/nvme: allow cmb and pmr to coexist

2021-01-18 Thread Minwoo Im

Reviewed-by: Minwoo Im

Re: [PATCH 00/28] qemu-option, keyval, vl: switch -object/-M/-accel to keyval parsing

2021-01-18 Thread Paolo Bonzini


On 18/01/21 11:29, Kevin Wolf wrote:

Am 17.01.2021 um 17:48 hat Paolo Bonzini geschrieben:

On 02/12/20 10:02, Paolo Bonzini wrote:

This series switches -object, -M and -accel from QemuOpts to keyval.
Monitor commands device_add and netdev_add are also switched to keyval,
though -device and -netdev for now are not.

Along the way, the syntax of keyval and QemuOpts becomes more consistent
and support for keyval-based options is added to -readconfig.  -writeconfig
instead is removed (see patch 13 for rationale).



Ping?  It's been over a month (even if with the Christmas vacation).

Patches 1-2 were already reviewed so I have included them already.  I would
like at least the next 14 patches to go in as soon as possible. (The rest
can be routed through maintainer trees or I can post them together with the
softmmu/vl.c cleanups).


On which commit is this based? It doesn't seem to apply to master (maybe
no surprise), but also not to 5.2.0-rc4, which was the state of master
when this was posted.


I'll retest and repost.

Paolo

Re: [PATCH v2 03/12] hw/block/nvme: indicate CMB support through controller capabilities register

2021-01-18 Thread Minwoo Im

Reviewed-by: Minwoo Im

Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im

On 21-01-18 10:46:55, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> 64 bit registers like ASQ and ACQ should be writable by both a hi/lo 32
> bit write combination as well as a plain 64 bit write. The spec does not
> define ordering on the hi/lo split, but the code currently assumes that
> the low order bits are written first. Additionally, the code does not
> consider that another address might already have been written into the
> register, causing the OR'ing to result in a bad address.
> 
> Fix this by explicitly overwriting only the low or high order bits for
> 32 bit writes.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index bd7e258c3c26..40b9f8ccfc0e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -3781,19 +3781,21 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>  trace_pci_nvme_mmio_aqattr(data & 0x);
>  break;
>  case 0x28:  /* ASQ */
> -n->bar.asq = data;
> +n->bar.asq = size == 8 ? data :
> +(n->bar.asq & ~0x) | (data & 0x);
 ^^^
If this one is to unmask lower 32bits other than higher 32bits, then
it should be:

(n->bar.asq & ~0xULL)

Also, maybe we should take care of lower than 4bytes as:

.min_access_size = 2,
.max_access_size = 8,

Even we have some error messages up there with:

if (unlikely(size < sizeof(uint32_t))) {
NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
   "MMIO write smaller than 32-bits,"
   " offset=0x%"PRIx64", size=%u",
   offset, size);
/* should be ignored, fall through for now */
}

It's a fall-thru error, and that's it.  I would prefer not to have this
error and support for 2bytes access also, OR do not support for 2bytes
access for this MMIO area.

Thanks!

[PATCH 1/2] block: Separate blk_is_writable() and blk_supports_write_perm()

2021-01-18 Thread Kevin Wolf

Currently, blk_is_read_only() tells whether a given BlockBackend can
only be used in read-only mode because its root node is read-only. Some
callers actually try to answer a slightly different question: Is the
BlockBackend configured to be writable, by taking write permissions on
the root node?

This can differ, for example, for CD-ROM devices which don't take write
permissions, but may be backed by a writable image file. scsi-cd allows
write requests to the drive if blk_is_read_only() returns false.
However, the write request will immediately run into an assertion
failure because the write permission is missing.

This patch introduces separate functions for both questions.
blk_supports_write_perm() answers the question whether the block
node/image file can support writable devices, whereas blk_is_writable()
tells whether the BlockBackend is currently configured to be writable.

All calls of blk_is_read_only() are converted to one of the two new
functions.

Fixes: https://bugs.launchpad.net/bugs/1906693
Cc: qemu-sta...@nongnu.org
Signed-off-by: Kevin Wolf 
---
 include/sysemu/block-backend.h |  3 ++-
 block/block-backend.c  | 19 ---
 hw/block/dataplane/xen-block.c |  2 +-
 hw/block/fdc.c |  9 +
 hw/block/m25p80.c  |  6 +++---
 hw/block/nand.c|  2 +-
 hw/block/nvme-ns.c |  7 ---
 hw/block/onenand.c |  2 +-
 hw/block/pflash_cfi01.c|  2 +-
 hw/block/pflash_cfi02.c|  2 +-
 hw/block/swim.c|  6 +++---
 hw/block/virtio-blk.c  |  6 +++---
 hw/block/xen-block.c   |  2 +-
 hw/ide/core.c  |  2 +-
 hw/misc/sifive_u_otp.c |  2 +-
 hw/ppc/pnv_pnor.c  |  2 +-
 hw/scsi/scsi-disk.c| 10 +-
 hw/scsi/scsi-generic.c |  4 ++--
 hw/sd/sd.c |  6 +++---
 hw/usb/dev-storage.c   |  4 ++--
 20 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8203d7f6f9..880e903293 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -191,7 +191,8 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, 
bool is_read,
   int error);
 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
   bool is_read, int error);
-bool blk_is_read_only(BlockBackend *blk);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_writable(BlockBackend *blk);
 bool blk_is_sg(BlockBackend *blk);
 bool blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
diff --git a/block/block-backend.c b/block/block-backend.c
index ce78d30794..e493f17515 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1826,17 +1826,30 @@ void blk_error_action(BlockBackend *blk, 
BlockErrorAction action,
 }
 }
 
-bool blk_is_read_only(BlockBackend *blk)
+/*
+ * Returns true if the BlockBackend can support taking write permissions
+ * (because its root node is not read-only).
+ */
+bool blk_supports_write_perm(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
 
 if (bs) {
-return bdrv_is_read_only(bs);
+return !bdrv_is_read_only(bs);
 } else {
-return blk->root_state.read_only;
+return !blk->root_state.read_only;
 }
 }
 
+/*
+ * Returns true if the BlockBackend can be written to in its current
+ * configuration (i.e. if write permission have been requested)
+ */
+bool blk_is_writable(BlockBackend *blk)
+{
+return blk->perm & BLK_PERM_WRITE;
+}
+
 bool blk_is_sg(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 71c337c7b7..f5b4f4c079 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -168,7 +168,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
 };
 
 if (request->req.operation != BLKIF_OP_READ &&
-blk_is_read_only(dataplane->blk)) {
+!blk_is_writable(dataplane->blk)) {
 error_report("error: write req for ro device");
 goto err;
 }
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 3636874432..292ea87805 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -444,7 +444,7 @@ static void fd_revalidate(FDrive *drv)
 
 FLOPPY_DPRINTF("revalidate\n");
 if (drv->blk != NULL) {
-drv->ro = blk_is_read_only(drv->blk);
+drv->ro = !blk_is_writable(drv->blk);
 if (!blk_is_inserted(drv->blk)) {
 FLOPPY_DPRINTF("No disk in drive\n");
 drv->disk = FLOPPY_DRIVE_TYPE_NONE;
@@ -479,8 +479,8 @@ static void fd_change_cb(void *opaque, bool load, Error 
**errp)
 blk_set_perm(drive->blk, 0, BLK_PERM_ALL, _abort);
 } else {
 if (!blkconf_apply_backend_options(drive->conf,
-

[PATCH 2/2] virtio-scsi-test: Test writing to scsi-cd device

2021-01-18 Thread Kevin Wolf

This tests that trying to write to a (read-only) scsi-cd device backed
by a read-write image file doesn't crash and results in the correct
error.

This is a regression test for https://bugs.launchpad.net/bugs/1906693.

Signed-off-by: Kevin Wolf 
---
 tests/qtest/virtio-scsi-test.c | 39 ++
 1 file changed, 39 insertions(+)

diff --git a/tests/qtest/virtio-scsi-test.c b/tests/qtest/virtio-scsi-test.c
index 0415e75876..1b7ecc1c8f 100644
--- a/tests/qtest/virtio-scsi-test.c
+++ b/tests/qtest/virtio-scsi-test.c
@@ -200,6 +200,32 @@ static void test_unaligned_write_same(void *obj, void 
*data,
 qvirtio_scsi_pci_free(vs);
 }
 
+static void test_write_to_cdrom(void *obj, void *data,
+QGuestAllocator *t_alloc)
+{
+QVirtioSCSI *scsi = obj;
+QVirtioSCSIQueues *vs;
+uint8_t buf[2048] = { 0 };
+const uint8_t write_cdb[VIRTIO_SCSI_CDB_SIZE] = {
+/* WRITE(10) to LBA 0, transfer length 1 */
+0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
+};
+struct virtio_scsi_cmd_resp resp;
+
+alloc = t_alloc;
+vs = qvirtio_scsi_init(scsi->vdev);
+
+virtio_scsi_do_command(vs, write_cdb, NULL, 0, buf, 2048, );
+g_assert_cmphex(resp.response, ==, 0);
+g_assert_cmphex(resp.status, ==, CHECK_CONDITION);
+g_assert_cmphex(resp.sense[0], ==, 0x70);
+g_assert_cmphex(resp.sense[2], ==, DATA_PROTECT);
+g_assert_cmphex(resp.sense[12], ==, 0x27); /* WRITE PROTECTED */
+g_assert_cmphex(resp.sense[13], ==, 0x00); /* WRITE PROTECTED */
+
+qvirtio_scsi_pci_free(vs);
+}
+
 static void test_iothread_attach_node(void *obj, void *data,
   QGuestAllocator *t_alloc)
 {
@@ -267,6 +293,16 @@ static void *virtio_scsi_setup(GString *cmd_line, void 
*arg)
 return arg;
 }
 
+static void *virtio_scsi_setup_cd(GString *cmd_line, void *arg)
+{
+g_string_append(cmd_line,
+" -drive file=null-co://,"
+"file.read-zeroes=on,"
+"if=none,id=dr1,format=raw "
+"-device scsi-cd,drive=dr1,lun=0,scsi-id=1");
+return arg;
+}
+
 static void *virtio_scsi_setup_iothread(GString *cmd_line, void *arg)
 {
 g_string_append(cmd_line,
@@ -287,6 +323,9 @@ static void register_virtio_scsi_test(void)
 qos_add_test("unaligned-write-same", "virtio-scsi",
  test_unaligned_write_same, );
 
+opts.before = virtio_scsi_setup_cd;
+qos_add_test("write-to-cdrom", "virtio-scsi", test_write_to_cdrom, );
+
 opts.before = virtio_scsi_setup_iothread;
 opts.edge = (QOSGraphEdgeOptions) {
 .extra_device_opts = "iothread=thread0",
-- 
2.29.2

[PATCH 0/2] block: Fix crash on write to read-only devices

2021-01-18 Thread Kevin Wolf

Kevin Wolf (2):
  block: Separate blk_is_writable() and blk_supports_write_perm()
  virtio-scsi-test: Test writing to scsi-cd device

 include/sysemu/block-backend.h |  3 ++-
 block/block-backend.c  | 19 ++---
 hw/block/dataplane/xen-block.c |  2 +-
 hw/block/fdc.c |  9 
 hw/block/m25p80.c  |  6 +++---
 hw/block/nand.c|  2 +-
 hw/block/nvme-ns.c |  7 +++---
 hw/block/onenand.c |  2 +-
 hw/block/pflash_cfi01.c|  2 +-
 hw/block/pflash_cfi02.c|  2 +-
 hw/block/swim.c|  6 +++---
 hw/block/virtio-blk.c  |  6 +++---
 hw/block/xen-block.c   |  2 +-
 hw/ide/core.c  |  2 +-
 hw/misc/sifive_u_otp.c |  2 +-
 hw/ppc/pnv_pnor.c  |  2 +-
 hw/scsi/scsi-disk.c| 10 -
 hw/scsi/scsi-generic.c |  4 ++--
 hw/sd/sd.c |  6 +++---
 hw/usb/dev-storage.c   |  4 ++--
 tests/qtest/virtio-scsi-test.c | 39 ++
 21 files changed, 96 insertions(+), 41 deletions(-)

-- 
2.29.2

Re: [PATCH 0/9] hw/block: m25p80: Fix the mess of dummy bytes needed for fast read commands

2021-01-18 Thread Bin Meng

Hi Francisco,

On Mon, Jan 18, 2021 at 6:06 PM Francisco Iglesias
 wrote:
>
> Hi Bin,
>
> On [2021 Jan 15] Fri 22:38:18, Bin Meng wrote:
> > Hi Francisco,
> >
> > On Fri, Jan 15, 2021 at 8:26 PM Francisco Iglesias
> >  wrote:
> > >
> > > Hi Bin,
> > >
> > > On [2021 Jan 15] Fri 10:07:52, Bin Meng wrote:
> > > > Hi Francisco,
> > > >
> > > > On Fri, Jan 15, 2021 at 2:13 AM Francisco Iglesias
> > > >  wrote:
> > > > >
> > > > > Hi Bin,
> > > > >
> > > > > On [2021 Jan 14] Thu 23:08:53, Bin Meng wrote:
> > > > > > From: Bin Meng 
> > > > > >
> > > > > > The m25p80 model uses s->needed_bytes to indicate how many follow-up
> > > > > > bytes are expected to be received after it receives a command. For
> > > > > > example, depending on the address mode, either 3-byte address or
> > > > > > 4-byte address is needed.
> > > > > >
> > > > > > For fast read family commands, some dummy cycles are required after
> > > > > > sending the address bytes, and the dummy cycles need to be counted
> > > > > > in s->needed_bytes. This is where the mess began.
> > > > > >
> > > > > > As the variable name (needed_bytes) indicates, the unit is in byte.
> > > > > > It is not in bit, or cycle. However for some reason the model has
> > > > > > been using the number of dummy cycles for s->needed_bytes. The right
> > > > > > approach is to convert the number of dummy cycles to bytes based on
> > > > > > the SPI protocol, for example, 6 dummy cycles for the Fast Read Quad
> > > > > > I/O (EBh) should be converted to 3 bytes per the formula (6 * 4 / 
> > > > > > 8).
> > > > >
> > > > > While not being the original implementor I must assume that above 
> > > > > solution was
> > > > > considered but not chosen by the developers due to it is inaccuracy 
> > > > > (it
> > > > > wouldn't be possible to model exacly 6 dummy cycles, only a multiple 
> > > > > of 8,
> > > > > meaning that if the controller is wrongly programmed to generate 7 
> > > > > the error
> > > > > wouldn't be caught and the controller will still be considered 
> > > > > "correct"). Now
> > > > > that we have this detail in the implementation I'm in favor of 
> > > > > keeping it, this
> > > > > also because the detail is already in use for catching exactly above 
> > > > > error.
> > > > >
> > > >
> > > > I found no clue from the commit message that my proposed solution here
> > > > was ever considered, otherwise all SPI controller models supporting
> > > > software generation should have been found out seriously broken long
> > > > time ago!
> > >
> > >
> > > The controllers you are referring to might lack support for commands 
> > > requiring
> > > dummy clock cycles but I really hope they work with the other commands? 
> > > If so I
> >
> > I am not sure why you view dummy clock cycles as something special
> > that needs some special support from the SPI controller. For the case
> > 1 controller, it's nothing special from the controller perspective,
> > just like sending out a command, or address bytes, or data. The
> > controller just shifts data bit by bit from its tx fifo and that's it.
> > In the Xilinx GQSPI controller case, the dummy cycles can either be
> > sent via a regular data (the case 1 controller) in the tx fifo, or
> > automatically generated (case 2 controller) by the hardware.
>
> Ok, I'll try to explain my view point a little differently. For that we also
> need to keep in mind that QEMU models HW, and any binary that runs on a HW
> board supported in QEMU should ideally run on that board inside QEMU aswell
> (this can be a bare metal application equaly well as a modified u-boot/Linux
> using SPI commands with a non multiple of 8 number of dummy clock cycles).
>
> Once functionality has been introduced into QEMU it is not easy to know which
> intentional or untentional features provided by the functionality are being
> used by users. One of the (perhaps not well known) features I'm aware of that
> is in use and is provided by the accurate dummy clock cycle modeling inside
> m25p80 is the be ability to test drivers accurately regarding the dummy clock
> cycles (even when using commands with a non-multiple of 8 number of dummy 
> clock
> cycles), but there might be others aswell. So by removing this functionality
> above use case will brake, this since those test will not be reliable.
> Furthermore, since users tend to be creative it is not possible to know if
> there are other use cases that will be affected. This means that in case [1]
> needs to be followed the safe path is to add functionality instead of 
> removing.
> Luckily it also easier in this case, see below.

I understand there might be users other than U-Boot/Linux that use an
odd number of dummy bits (not multiple of 8). If your concern was
about model behavior changes, sure I can update
qemu/docs/system/deprecated.rst to mention that some flashes in the
m25p80 model now implement dummy cycles as bytes.

> >
> > > don't think it is fair to call them 'seriously broken' (and else we should

Re: [PATCH v2 01/12] hw/block/nvme: add size to mmio read/write trace events

2021-01-18 Thread Minwoo Im

Reviewed-by: Minwoo Im

Re: [PATCH v5 02/10] iotests/297: Rewrite in Python and extend reach

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


18.01.2021 13:57, Max Reitz wrote:

Instead of checking iotests.py only, check all Python files in the
qemu-iotests/ directory.  Of course, most of them do not pass, so there
is an extensive skip list for now.  (The only files that do pass are
209, 254, 283, and iotests.py.)

(Alternatively, we could have the opposite, i.e. an explicit list of
files that we do want to check, but I think it is better to check files
by default.)

Unless started in debug mode (./check -d), the output has no information
on which files are tested, so we will not have a problem e.g. with
backports, where some files may be missing when compared to upstream.

Besides the technical rewrite, some more things are changed:

- For the pylint invocation, PYTHONPATH is adjusted.  This mirrors
   setting MYPYPATH for mypy.

- Also, MYPYPATH is now derived from PYTHONPATH, so that we include
   paths set by the environment.  Maybe at some point we want to let the
   check script add '../../python/' to PYTHONPATH so that iotests.py does
   not need to do that.

- Passing --notes=FIXME,XXX to pylint suppresses warnings for TODO
   comments.  TODO is fine, we do not need 297 to complain about such
   comments.

- The "Success" line from mypy's output is suppressed, because (A) it
   does not add useful information, and (B) it would leak information
   about the files having been tested to the reference output, which we
   decidedly do not want.

Suggested-by: Vladimir Sementsov-Ogievskiy
Signed-off-by: Max Reitz


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir

[PATCH v5 09/10] iotests/129: Clean up pylint and mypy complaints

2021-01-18 Thread Max Reitz

And consequentially drop it from 297's skip list.

Signed-off-by: Max Reitz 
Reviewed-by: Willian Rampazzo 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/129 | 4 ++--
 tests/qemu-iotests/297 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 80a5db521b..9a56217bf8 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -20,7 +20,6 @@
 
 import os
 import iotests
-import time
 
 class TestStopWithBlockJob(iotests.QMPTestCase):
 test_img = os.path.join(iotests.test_dir, 'test.img')
@@ -32,7 +31,8 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, "1G")
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.test_img,
  "-b", self.base_img, '-F', iotests.imgfmt)
-iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M', 
self.test_img)
+iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M',
+self.test_img)
 self.vm = iotests.VM()
 self.vm.add_object('throttle-group,id=tg0,x-bps-total=1024')
 
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index e3db3e061e..234e1da2fb 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -27,7 +27,7 @@ import iotests
 # TODO: Empty this list!
 SKIP_FILES = (
 '030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
-'096', '118', '124', '129', '132', '136', '139', '147', '148', '149',
+'096', '118', '124', '132', '136', '139', '147', '148', '149',
 '151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
 '203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
 '218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
-- 
2.29.2

[PATCH v5 08/10] iotests/129: Limit mirror job's buffer size

2021-01-18 Thread Max Reitz

Issuing 'stop' on the VM drains all nodes.  If the mirror job has many
large requests in flight, this may lead to significant I/O that looks a
bit like 'stop' would make the job try to complete (which is what 129
should verify not to happen).

We can limit the I/O in flight by limiting the buffer size, so mirror
will make very little progress during the 'stop' drain.

(We do not need to do anything about commit, which has a buffer size of
512 kB by default; or backup, which goes cluster by cluster.  Once we
have asynchronous requests for backup, that will change, but then we can
fine-tune the backup job to only perform a single request on a very
small chunk, too.)

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/129 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 104be6dded..80a5db521b 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -67,7 +67,7 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 def test_drive_mirror(self):
 self.do_test_stop("drive-mirror", device="drive0",
   target=self.target_img, format=iotests.imgfmt,
-  sync="full")
+  sync="full", buf_size=65536)
 
 def test_drive_backup(self):
 self.do_test_stop("drive-backup", device="drive0",
-- 
2.29.2

[PATCH v5 10/10] iotests/300: Clean up pylint and mypy complaints

2021-01-18 Thread Max Reitz

And consequentially drop it from 297's skip list.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/297 |  2 +-
 tests/qemu-iotests/300 | 18 +++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 234e1da2fb..eab3d660bb 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -33,7 +33,7 @@ SKIP_FILES = (
 '218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
 '240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
 '262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
-'299', '300', '302', '303', '304', '307',
+'299', '302', '303', '304', '307',
 'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
 )
 
diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index b864a21d5e..4115f90578 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -22,7 +22,11 @@ import os
 import random
 import re
 from typing import Dict, List, Optional, Union
+
 import iotests
+
+# Import qemu after iotests.py has amended sys.path
+# pylint: disable=wrong-import-order
 import qemu
 
 BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]
@@ -110,10 +114,14 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 If @msg is None, check that there has not been any error.
 """
 self.vm_b.shutdown()
+
+log = self.vm_b.get_log()
+assert log is not None  # Loaded after shutdown
+
 if msg is None:
-self.assertNotIn('qemu-system-', self.vm_b.get_log())
+self.assertNotIn('qemu-system-', log)
 else:
-self.assertIn(msg, self.vm_b.get_log())
+self.assertIn(msg, log)
 
 @staticmethod
 def mapping(node_name: str, node_alias: str,
@@ -445,9 +453,13 @@ class 
TestBlockBitmapMappingErrors(TestDirtyBitmapMigration):
 
 # Check for the error in the source's log
 self.vm_a.shutdown()
+
+log = self.vm_a.get_log()
+assert log is not None  # Loaded after shutdown
+
 self.assertIn(f"Cannot migrate bitmap '{name}' on node "
   f"'{self.src_node_name}': Name is longer than 255 bytes",
-  self.vm_a.get_log())
+  log)
 
 # Expect abnormal shutdown of the destination VM because of
 # the failed migration
-- 
2.29.2

[PATCH v5 05/10] iotests/129: Do not check @busy

2021-01-18 Thread Max Reitz

@busy is false when the job is paused, which happens all the time
because that is how jobs yield (e.g. for mirror at least since commit
565ac01f8d3).

Back when 129 was added (2015), perhaps there was no better way of
checking whether the job was still actually running.  Now we have the
@status field (as of 58b295ba52c, i.e. 2018), which can give us exactly
that information.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/129 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 2fc65ada6a..dd23bb2e5a 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -69,7 +69,7 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 result = self.vm.qmp("stop")
 self.assert_qmp(result, 'return', {})
 result = self.vm.qmp("query-block-jobs")
-self.assert_qmp(result, 'return[0]/busy', True)
+self.assert_qmp(result, 'return[0]/status', 'running')
 self.assert_qmp(result, 'return[0]/ready', False)
 
 def test_drive_mirror(self):
-- 
2.29.2

[PATCH v5 02/10] iotests/297: Rewrite in Python and extend reach

2021-01-18 Thread Max Reitz

Instead of checking iotests.py only, check all Python files in the
qemu-iotests/ directory.  Of course, most of them do not pass, so there
is an extensive skip list for now.  (The only files that do pass are
209, 254, 283, and iotests.py.)

(Alternatively, we could have the opposite, i.e. an explicit list of
files that we do want to check, but I think it is better to check files
by default.)

Unless started in debug mode (./check -d), the output has no information
on which files are tested, so we will not have a problem e.g. with
backports, where some files may be missing when compared to upstream.

Besides the technical rewrite, some more things are changed:

- For the pylint invocation, PYTHONPATH is adjusted.  This mirrors
  setting MYPYPATH for mypy.

- Also, MYPYPATH is now derived from PYTHONPATH, so that we include
  paths set by the environment.  Maybe at some point we want to let the
  check script add '../../python/' to PYTHONPATH so that iotests.py does
  not need to do that.

- Passing --notes=FIXME,XXX to pylint suppresses warnings for TODO
  comments.  TODO is fine, we do not need 297 to complain about such
  comments.

- The "Success" line from mypy's output is suppressed, because (A) it
  does not add useful information, and (B) it would leak information
  about the files having been tested to the reference output, which we
  decidedly do not want.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/297 | 112 +
 tests/qemu-iotests/297.out |   5 +-
 2 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 5c5420712b..e3db3e061e 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/usr/bin/env python3
 #
 # Copyright (C) 2020 Red Hat, Inc.
 #
@@ -15,30 +15,98 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see .
 
-seq=$(basename $0)
-echo "QA output created by $seq"
+import os
+import re
+import shutil
+import subprocess
+import sys
 
-status=1   # failure is the default!
+import iotests
 
-# get standard environment
-. ./common.rc
 
-if ! type -p "pylint-3" > /dev/null; then
-_notrun "pylint-3 not found"
-fi
-if ! type -p "mypy" > /dev/null; then
-_notrun "mypy not found"
-fi
+# TODO: Empty this list!
+SKIP_FILES = (
+'030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
+'096', '118', '124', '129', '132', '136', '139', '147', '148', '149',
+'151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
+'203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
+'218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
+'240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
+'262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
+'299', '300', '302', '303', '304', '307',
+'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
+)
 
-pylint-3 --score=n iotests.py
 
-MYPYPATH=../../python/ mypy --warn-unused-configs --disallow-subclassing-any \
---disallow-any-generics --disallow-incomplete-defs \
---disallow-untyped-decorators --no-implicit-optional \
---warn-redundant-casts --warn-unused-ignores \
---no-implicit-reexport iotests.py
+def is_python_file(filename):
+if not os.path.isfile(filename):
+return False
 
-# success, all done
-echo "*** done"
-rm -f $seq.full
-status=0
+if filename.endswith('.py'):
+return True
+
+with open(filename) as f:
+try:
+first_line = f.readline()
+return re.match('^#!.*python', first_line) is not None
+except UnicodeDecodeError:  # Ignore binary files
+return False
+
+
+def run_linters():
+files = [filename for filename in (set(os.listdir('.')) - set(SKIP_FILES))
+ if is_python_file(filename)]
+
+iotests.logger.debug('Files to be checked:')
+iotests.logger.debug(', '.join(sorted(files)))
+
+print('=== pylint ===')
+sys.stdout.flush()
+
+# Todo notes are fine, but fixme's or xxx's should probably just be
+# fixed (in tests, at least)
+env = os.environ.copy()
+qemu_module_path = os.path.join(os.path.dirname(__file__),
+'..', '..', 'python')
+try:
+env['PYTHONPATH'] += os.pathsep + qemu_module_path
+except KeyError:
+env['PYTHONPATH'] = qemu_module_path
+subprocess.run(('pylint-3', '--score=n', '--notes=FIXME,XXX', *files),
+   env=env, check=False)
+
+print('=== mypy ===')
+sys.stdout.flush()
+
+# We have to call mypy separately for each file.  Otherwise, it
+# will interpret all given files as belonging together (i.e., they
+# may not both define the same classes, etc.; most notably, they
+# must not

[PATCH v5 07/10] iotests/129: Actually test a commit job

2021-01-18 Thread Max Reitz

Before this patch, test_block_commit() performs an active commit, which
under the hood is a mirror job.  If we want to test various different
block jobs, we should perhaps run an actual commit job instead.

Doing so requires adding an overlay above the source node before the
commit is done (and then specifying the source node as the top node for
the commit job).

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/129 | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index d40e2db24e..104be6dded 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -26,6 +26,7 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 base_img = os.path.join(iotests.test_dir, 'base.img')
+overlay_img = os.path.join(iotests.test_dir, 'overlay.img')
 
 def setUp(self):
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, "1G")
@@ -36,6 +37,7 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 self.vm.add_object('throttle-group,id=tg0,x-bps-total=1024')
 
 source_drive = 'driver=throttle,' \
+   'node-name=source,' \
'throttle-group=tg0,' \
f'file.driver={iotests.imgfmt},' \
f'file.file.filename={self.test_img}'
@@ -45,7 +47,8 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 
 def tearDown(self):
 self.vm.shutdown()
-for img in (self.test_img, self.target_img, self.base_img):
+for img in (self.test_img, self.target_img, self.base_img,
+self.overlay_img):
 iotests.try_remove(img)
 
 def do_test_stop(self, cmd, **args):
@@ -72,7 +75,27 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
   sync="full")
 
 def test_block_commit(self):
-self.do_test_stop("block-commit", device="drive0")
+# Add overlay above the source node so that we actually use a
+# commit job instead of a mirror job
+
+iotests.qemu_img('create', '-f', iotests.imgfmt, self.overlay_img,
+ '1G')
+
+result = self.vm.qmp('blockdev-add', **{
+ 'node-name': 'overlay',
+ 'driver': iotests.imgfmt,
+ 'file': {
+ 'driver': 'file',
+ 'filename': self.overlay_img
+ }
+ })
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-snapshot',
+ node='source', overlay='overlay')
+self.assert_qmp(result, 'return', {})
+
+self.do_test_stop('block-commit', device='drive0', top_node='source')
 
 if __name__ == '__main__':
 iotests.main(supported_fmts=["qcow2"],
-- 
2.29.2

[PATCH v5 06/10] iotests/129: Use throttle node

2021-01-18 Thread Max Reitz

Throttling on the BB has not affected block jobs in a while, so it is
possible that one of the jobs in 129 finishes before the VM is stopped.
We can fix that by running the job from a throttle node.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/129 | 37 +
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index dd23bb2e5a..d40e2db24e 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -32,20 +32,18 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.test_img,
  "-b", self.base_img, '-F', iotests.imgfmt)
 iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M', 
self.test_img)
-self.vm = iotests.VM().add_drive(self.test_img)
+self.vm = iotests.VM()
+self.vm.add_object('throttle-group,id=tg0,x-bps-total=1024')
+
+source_drive = 'driver=throttle,' \
+   'throttle-group=tg0,' \
+   f'file.driver={iotests.imgfmt},' \
+   f'file.file.filename={self.test_img}'
+
+self.vm.add_drive(None, source_drive)
 self.vm.launch()
 
 def tearDown(self):
-params = {"device": "drive0",
-  "bps": 0,
-  "bps_rd": 0,
-  "bps_wr": 0,
-  "iops": 0,
-  "iops_rd": 0,
-  "iops_wr": 0,
- }
-result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
- **params)
 self.vm.shutdown()
 for img in (self.test_img, self.target_img, self.base_img):
 iotests.try_remove(img)
@@ -53,33 +51,24 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 def do_test_stop(self, cmd, **args):
 """Test 'stop' while block job is running on a throttled drive.
 The 'stop' command shouldn't drain the job"""
-params = {"device": "drive0",
-  "bps": 1024,
-  "bps_rd": 0,
-  "bps_wr": 0,
-  "iops": 0,
-  "iops_rd": 0,
-  "iops_wr": 0,
- }
-result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
- **params)
-self.assert_qmp(result, 'return', {})
 result = self.vm.qmp(cmd, **args)
 self.assert_qmp(result, 'return', {})
+
 result = self.vm.qmp("stop")
 self.assert_qmp(result, 'return', {})
 result = self.vm.qmp("query-block-jobs")
+
 self.assert_qmp(result, 'return[0]/status', 'running')
 self.assert_qmp(result, 'return[0]/ready', False)
 
 def test_drive_mirror(self):
 self.do_test_stop("drive-mirror", device="drive0",
-  target=self.target_img,
+  target=self.target_img, format=iotests.imgfmt,
   sync="full")
 
 def test_drive_backup(self):
 self.do_test_stop("drive-backup", device="drive0",
-  target=self.target_img,
+  target=self.target_img, format=iotests.imgfmt,
   sync="full")
 
 def test_block_commit(self):
-- 
2.29.2

[PATCH v5 00/10] iotests: Fix 129 and expand 297’s reach

2021-01-18 Thread Max Reitz

Cover letters:
v1: https://lists.nongnu.org/archive/html/qemu-block/2021-01/msg00254.html
v2: https://lists.nongnu.org/archive/html/qemu-block/2021-01/msg00296.html
v3: https://lists.nongnu.org/archive/html/qemu-block/2021-01/msg00371.html
v4: https://lists.nongnu.org/archive/html/qemu-block/2021-01/msg00371.html

git:
https://github.com/XanClic/qemu.git fix-129-2-v5
https://git.xanclic.moe/XanClic/qemu.git fix-129-2-v5


Change in v5 (from v4):
- Patch 2:
  - Construct PYTHONPATH/MYPYPATH platform-independently
- Use os.pathsep instead of hard-coding ':'
- Use os.path.dirname(__file__) as the basis for '../../python'
  instead of adding a relative path to PYTHONPATH
- Use os.path.join() instead of joining with '/'


git-backport-diff of v4 <-> v5:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/10:[] [--] 'iotests.py: Assume a couple of variables as given'
002/10:[0006] [FC] 'iotests/297: Rewrite in Python and extend reach'
003/10:[] [--] 'iotests: Move try_remove to iotests.py'
004/10:[] [--] 'iotests/129: Remove test images in tearDown()'
005/10:[] [--] 'iotests/129: Do not check @busy'
006/10:[] [--] 'iotests/129: Use throttle node'
007/10:[] [--] 'iotests/129: Actually test a commit job'
008/10:[] [--] 'iotests/129: Limit mirror job's buffer size'
009/10:[] [--] 'iotests/129: Clean up pylint and mypy complaints'
010/10:[] [--] 'iotests/300: Clean up pylint and mypy complaints'


Max Reitz (10):
  iotests.py: Assume a couple of variables as given
  iotests/297: Rewrite in Python and extend reach
  iotests: Move try_remove to iotests.py
  iotests/129: Remove test images in tearDown()
  iotests/129: Do not check @busy
  iotests/129: Use throttle node
  iotests/129: Actually test a commit job
  iotests/129: Limit mirror job's buffer size
  iotests/129: Clean up pylint and mypy complaints
  iotests/300: Clean up pylint and mypy complaints

 tests/qemu-iotests/124|   8 +--
 tests/qemu-iotests/129|  72 +-
 tests/qemu-iotests/297| 112 +++---
 tests/qemu-iotests/297.out|   5 +-
 tests/qemu-iotests/300|  19 --
 tests/qemu-iotests/iotests.py |  37 +--
 6 files changed, 171 insertions(+), 82 deletions(-)

-- 
2.29.2

[PATCH v5 04/10] iotests/129: Remove test images in tearDown()

2021-01-18 Thread Max Reitz

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/129 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 0e13244d85..2fc65ada6a 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -47,6 +47,8 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
  **params)
 self.vm.shutdown()
+for img in (self.test_img, self.target_img, self.base_img):
+iotests.try_remove(img)
 
 def do_test_stop(self, cmd, **args):
 """Test 'stop' while block job is running on a throttled drive.
-- 
2.29.2

[PATCH v5 01/10] iotests.py: Assume a couple of variables as given

2021-01-18 Thread Max Reitz

There are a couple of environment variables that we fetch with
os.environ.get() without supplying a default.  Clearly they are required
and expected to be set by the ./check script (as evidenced by
execute_setup_common(), which checks for test_dir and
qemu_default_machine to be set, and aborts if they are not).

Using .get() this way has the disadvantage of returning an Optional[str]
type, which mypy will complain about when tests just assume these values
to be str.

Use [] instead, which raises a KeyError for environment variables that
are not set.  When this exception is raised, catch it and move the abort
code from execute_setup_common() there.

Drop the 'assert iotests.sock_dir is not None' from iotest 300, because
that sort of thing is precisely what this patch wants to prevent.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/300|  1 -
 tests/qemu-iotests/iotests.py | 26 +-
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index 5b75121b84..b864a21d5e 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -27,7 +27,6 @@ import qemu
 
 BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]
 
-assert iotests.sock_dir is not None
 mig_sock = os.path.join(iotests.sock_dir, 'mig_sock')
 
 
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index dcdcd0387f..52facb8e04 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -75,12 +75,20 @@ qemu_opts = os.environ.get('QEMU_OPTIONS', 
'').strip().split(' ')
 
 imgfmt = os.environ.get('IMGFMT', 'raw')
 imgproto = os.environ.get('IMGPROTO', 'file')
-test_dir = os.environ.get('TEST_DIR')
-sock_dir = os.environ.get('SOCK_DIR')
 output_dir = os.environ.get('OUTPUT_DIR', '.')
-cachemode = os.environ.get('CACHEMODE')
-aiomode = os.environ.get('AIOMODE')
-qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE')
+
+try:
+test_dir = os.environ['TEST_DIR']
+sock_dir = os.environ['SOCK_DIR']
+cachemode = os.environ['CACHEMODE']
+aiomode = os.environ['AIOMODE']
+qemu_default_machine = os.environ['QEMU_DEFAULT_MACHINE']
+except KeyError:
+# We are using these variables as proxies to indicate that we're
+# not being run via "check". There may be other things set up by
+# "check" that individual test cases rely on.
+sys.stderr.write('Please run this test via the "check" script\n')
+sys.exit(os.EX_USAGE)
 
 socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
 
@@ -1294,14 +1302,6 @@ def execute_setup_common(supported_fmts: Sequence[str] = 
(),
 """
 # Note: Python 3.6 and pylint do not like 'Collection' so use 'Sequence'.
 
-# We are using TEST_DIR and QEMU_DEFAULT_MACHINE as proxies to
-# indicate that we're not being run via "check". There may be
-# other things set up by "check" that individual test cases rely
-# on.
-if test_dir is None or qemu_default_machine is None:
-sys.stderr.write('Please run this test via the "check" script\n')
-sys.exit(os.EX_USAGE)
-
 debug = '-d' in sys.argv
 if debug:
 sys.argv.remove('-d')
-- 
2.29.2

[PATCH v5 03/10] iotests: Move try_remove to iotests.py

2021-01-18 Thread Max Reitz

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Willian Rampazzo 
---
 tests/qemu-iotests/124|  8 +---
 tests/qemu-iotests/iotests.py | 11 +++
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 3705cbb6b3..e40eeb50b9 100755
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -22,6 +22,7 @@
 
 import os
 import iotests
+from iotests import try_remove
 
 
 def io_write_patterns(img, patterns):
@@ -29,13 +30,6 @@ def io_write_patterns(img, patterns):
 iotests.qemu_io('-c', 'write -P%s %s %s' % pattern, img)
 
 
-def try_remove(img):
-try:
-os.remove(img)
-except OSError:
-pass
-
-
 def transaction_action(action, **kwargs):
 return {
 'type': action,
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 52facb8e04..a69b4cdc4e 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -523,12 +523,15 @@ class FilePath:
 return False
 
 
+def try_remove(img):
+try:
+os.remove(img)
+except OSError:
+pass
+
 def file_path_remover():
 for path in reversed(file_path_remover.paths):
-try:
-os.remove(path)
-except OSError:
-pass
+try_remove(path)
 
 
 def file_path(*names, base_dir=test_dir):
-- 
2.29.2

Re: [PATCH v2 8/9] tests/docker: Add dockerfile for Alpine Linux

2021-01-18 Thread Daniel P . Berrangé

On Mon, Jan 18, 2021 at 02:38:07PM +0800, Jiaxun Yang wrote:
> Alpine Linux[1] is a security-oriented, lightweight Linux distribution
> based on musl libc and busybox.
> 
> It it popular among Docker guests and embedded applications.
> 
> Adding it to test against different libc.
> 
> [1]: https://alpinelinux.org/
> 
> Signed-off-by: Jiaxun Yang 
> ---
>  tests/docker/dockerfiles/alpine.docker | 57 ++
>  1 file changed, 57 insertions(+)
>  create mode 100644 tests/docker/dockerfiles/alpine.docker
> 
> diff --git a/tests/docker/dockerfiles/alpine.docker 
> b/tests/docker/dockerfiles/alpine.docker
> new file mode 100644
> index 00..5be5198d00
> --- /dev/null
> +++ b/tests/docker/dockerfiles/alpine.docker
> @@ -0,0 +1,57 @@
> +
> +FROM alpine:edge
> +
> +RUN apk update
> +RUN apk upgrade
> +
> +# Please keep this list sorted alphabetically
> +ENV PACKAGES \
> + alsa-lib-dev \
> + bash \
> + bison \

This shouldn't be required.

> + build-base \

This seems to be a meta packae that pulls in other
misc toolchain packages. Please list the pieces we
need explicitly instead.

> + coreutils \
> + curl-dev \
> + flex \

This shouldn't be needed.

> + git \
> + glib-dev \
> + glib-static \
> + gnutls-dev \
> + gtk+3.0-dev \
> + libaio-dev \
> + libcap-dev \

Should not be required, as we use cap-ng.

> + libcap-ng-dev \
> + libjpeg-turbo-dev \
> + libnfs-dev \
> + libpng-dev \
> + libseccomp-dev \
> + libssh-dev \
> + libusb-dev \
> + libxml2-dev \
> + linux-headers \

Is this really needed ? We don't install kernel-headers on other
distros AFAICT.

> + lzo-dev \
> + mesa-dev \
> + mesa-egl \
> + mesa-gbm \
> + meson \
> + ncurses-dev \
> + ninja \
> + paxmark \

What is this needed for ?

> + perl \
> + pulseaudio-dev \
> + python3 \
> + py3-sphinx \
> + shadow \

Is this really needed ?

> + snappy-dev \
> + spice-dev \
> + texinfo \
> + usbredir-dev \
> + util-linux-dev \
> + vde2-dev \
> + virglrenderer-dev \
> + vte3-dev \
> + xfsprogs-dev \
> + zlib-dev \
> + zlib-static
> +
> +RUN apk add $PACKAGES

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 00/28] qemu-option, keyval, vl: switch -object/-M/-accel to keyval parsing

2021-01-18 Thread Kevin Wolf

Am 17.01.2021 um 17:48 hat Paolo Bonzini geschrieben:
> On 02/12/20 10:02, Paolo Bonzini wrote:
> > This series switches -object, -M and -accel from QemuOpts to keyval.
> > Monitor commands device_add and netdev_add are also switched to keyval,
> > though -device and -netdev for now are not.
> > 
> > Along the way, the syntax of keyval and QemuOpts becomes more consistent
> > and support for keyval-based options is added to -readconfig.  -writeconfig
> > instead is removed (see patch 13 for rationale).

> Ping?  It's been over a month (even if with the Christmas vacation).
> 
> Patches 1-2 were already reviewed so I have included them already.  I would
> like at least the next 14 patches to go in as soon as possible. (The rest
> can be routed through maintainer trees or I can post them together with the
> softmmu/vl.c cleanups).

On which commit is this based? It doesn't seem to apply to master (maybe
no surprise), but also not to 5.2.0-rc4, which was the state of master
when this was posted.

Kevin

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Daniel P . Berrangé

On Mon, Jan 18, 2021 at 11:22:47AM +0100, Thomas Huth wrote:
> On 18/01/2021 11.11, Daniel P. Berrangé wrote:
> > On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:
> > > We only run build test and check-acceptance as their are too many
> > > failures in checks due to minor string mismatch.
> > 
> > Can you give real examples of what's broken here, as that sounds
> > rather suspicious, and I'm not convinced it should be ignored.
> 
> I haven't tried, but I guess it's the "check-block" iotests that are likely
> failing with a different libc, since they do string comparison on the
> textual output of the tests. If that's the case, it would maybe be still ok
> to run "check-qtest" and "check-union" on Alpine instead of the whole
> "check" test suite.

Perhaps errno strings are different due to libc hitting block tests. I
would expect this to be explained in the commit message though with
example, so we have a clear record of why we needed to disable this.

It might indicate a need to change our test suite to be more robust
in this area, because that would also suggest the tests might fail
on  non-Linux.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Thomas Huth


On 18/01/2021 11.11, Daniel P. Berrangé wrote:

On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:

We only run build test and check-acceptance as their are too many
failures in checks due to minor string mismatch.


Can you give real examples of what's broken here, as that sounds
rather suspicious, and I'm not convinced it should be ignored.


I haven't tried, but I guess it's the "check-block" iotests that are likely 
failing with a different libc, since they do string comparison on the 
textual output of the tests. If that's the case, it would maybe be still ok 
to run "check-qtest" and "check-union" on Alpine instead of the whole 
"check" test suite.


 Thomas

Re: [RFC PATCH 0/2] Allow changing bs->file on reopen

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


15.01.2021 16:02, Alberto Garcia wrote:

Hi,

during the past months we talked about making x-blockdev-reopen stable
API, and one of the missing things was having support for changing
bs->file. See here for the discusssion (I can't find the message from
Kashyap that started the thread in the web archives):

https://lists.gnu.org/archive/html/qemu-block/2020-10/msg00922.html

I was testing this and one of the problems that I found was that
removing a filter node using this command is tricky because of the
permission system, see here for details:

https://lists.gnu.org/archive/html/qemu-block/2020-12/msg00092.html

The good news is that Vladimir posted a set of patches that changes
the way that permissions are updated on reopen:

https://lists.gnu.org/archive/html/qemu-block/2020-11/msg00745.html

I was testing if this would be useful to solve the problem that I
mentioned earlier and it seems to be the case so I wrote a patch to
add support for changing bs->file, along with a couple of test cases.

This is still an RFC but you can see the idea.


Good idea and I glad to see that my patches help:)

Hmm, still, removing a filter which want to unshare WRITE even when doesn't 
have any parents will be a problem anyway, so we'll need a new command to drop 
filter with a logic like in bdrv_drop_filter in my series.

Or, we can introduce multiple reopen.. So that x-blockdev-reopen will take a 
list of BlockdevOptions, and do all modifications in one transaction. Than 
we'll be able to drop filter by transactional update of top node child and 
removing filter child link.



These patches apply on top of Vladimir's branch:

git: https://src.openvz.org/scm/~vsementsov/qemu.git
tag: up-block-topologic-perm-v2

Opinions are very welcome!

Berto

Alberto Garcia (2):
   block: Allow changing bs->file on reopen
   iotests: Update 245 to support replacing files with x-blockdev-reopen

  include/block/block.h  |  1 +
  block.c| 61 ++
  tests/qemu-iotests/245 | 61 +++---
  tests/qemu-iotests/245.out |  4 +--
  4 files changed, 121 insertions(+), 6 deletions(-)




--
Best regards,
Vladimir

Re: [RFC PATCH 1/2] block: Allow changing bs->file on reopen

2021-01-18 Thread Vladimir Sementsov-Ogievskiy


15.01.2021 16:02, Alberto Garcia wrote:

When the x-blockdev-reopen was added it allowed reconfiguring the
graph by replacing backing files, but changing the 'file' option was
forbidden. Because of this restriction some operations are not
possible, notably inserting and removing block filters.

This patch adds support for replacing the 'file' option. This is
similar to replacing the backing file and the user is likewise
responsible for the correctness of the resulting graph, otherwise this
can lead to data corruption.

Signed-off-by: Alberto Garcia   
---
  include/block/block.h  |  1 +
  block.c| 61 ++
  tests/qemu-iotests/245 |  7 ++---
  3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 82271d9ccd..6dd687a69e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -196,6 +196,7 @@ typedef struct BDRVReopenState {
  bool backing_missing;
  bool replace_backing_bs;  /* new_backing_bs is ignored if this is false */
  BlockDriverState *old_backing_bs; /* keep pointer for permissions update 
*/
+BlockDriverState *old_file_bs;/* keep pointer for permissions update */
  uint64_t perm, shared_perm;
  QDict *options;
  QDict *explicit_options;
diff --git a/block.c b/block.c
index 576b145cbf..114788e58e 100644
--- a/block.c
+++ b/block.c
@@ -3978,6 +3978,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
  refresh_list = bdrv_topological_dfs(refresh_list, found,
  state->old_backing_bs);
  }
+if (state->old_file_bs) {
+refresh_list = bdrv_topological_dfs(refresh_list, found,
+state->old_file_bs);
+}
  }
  
  ret = bdrv_list_refresh_perms(refresh_list, bs_queue, , errp);

@@ -4196,6 +4200,57 @@ static int bdrv_reopen_parse_backing(BDRVReopenState 
*reopen_state,
  return 0;
  }
  
+static int bdrv_reopen_parse_file(BDRVReopenState *reopen_state,

+  GSList **tran,
+  Error **errp)
+{
+BlockDriverState *bs = reopen_state->bs;
+BlockDriverState *new_file_bs;
+QObject *value;
+const char *str;
+
+value = qdict_get(reopen_state->options, "file");
+if (value == NULL) {
+return 0;
+}
+
+/* The 'file' option only allows strings */
+assert(qobject_type(value) == QTYPE_QSTRING);
+
+str = qobject_get_try_str(value);
+new_file_bs = bdrv_lookup_bs(NULL, str, errp);
+if (new_file_bs == NULL) {
+return -EINVAL;
+} else if (bdrv_recurse_has_child(new_file_bs, bs)) {
+error_setg(errp, "Making '%s' a file of '%s' "
+   "would create a cycle", str, bs->node_name);
+return -EINVAL;
+}
+
+assert(bs->file && bs->file->bs);


why are we sure at this point? Probably, we should just return an error..


+
+/* If 'file' points to the current child then there's nothing to do */
+if (bs->file->bs == new_file_bs) {
+return 0;
+}
+
+/* Check AioContext compatibility */
+if (!bdrv_reopen_can_attach(bs, bs->file, new_file_bs, errp)) {
+return -EINVAL;
+}
+
+/* At the moment only backing links are frozen */
+assert(!bs->file->frozen);


I think it can: file-child based filters can be a part of frozen backing chain 
currently.


+
+/* Store the old file bs because we'll need to refresh its permissions */
+reopen_state->old_file_bs = bs->file->bs;
+
+/* And finally replace the child */
+bdrv_replace_child(bs->file, new_file_bs, tran);
+
+return 0;
+}
+
  /*
   * Prepares a BlockDriverState for reopen. All changes are staged in the
   * 'opaque' field of the BDRVReopenState, which is used and allocated by
@@ -4347,6 +4402,12 @@ static int bdrv_reopen_prepare(BDRVReopenState 
*reopen_state,
  }
  qdict_del(reopen_state->options, "backing");
  
+ret = bdrv_reopen_parse_file(reopen_state, set_backings_tran, errp);

+if (ret < 0) {
+goto error;
+}
+qdict_del(reopen_state->options, "file");
+
  /* Options that are not handled are only okay if they are unchanged
   * compared to the old state. It is expected that some options are only
   * used for the initial open, but not reopen (e.g. filename) */
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index e60c8326d3..f9d68b3958 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -145,8 +145,8 @@ class TestBlockdevReopen(iotests.QMPTestCase):
  self.reopen(opts, {'driver': 'raw'}, "Cannot change the option 
'driver'")
  self.reopen(opts, {'driver': ''}, "Invalid parameter ''")
  self.reopen(opts, {'driver': None}, "Invalid parameter type for 'driver', 
expected: string")
-self.reopen(opts, {'file': 'not-found'}, "Cannot change the

[PATCH v2 12/12] hw/block/nvme: lift cmb restrictions

2021-01-18 Thread Klaus Jensen

From: Klaus Jensen 

The controller now implements v1.4 and we can lift the restrictions on
CMB Data Pointer and Command Independent Locations Support (CDPCILS) and
CMB Data Pointer Mixed Locations Support (CDPMLS) since the device
really does not care about mixed host/cmb pointers in those cases.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 992665376a71..23a836a343e2 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -509,7 +509,6 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
 uint16_t status;
-bool prp_list_in_cmb = false;
 int ret;
 
 QEMUSGList *qsg = >qsg;
@@ -535,10 +534,6 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 uint32_t nents, prp_trans;
 int i = 0;
 
-if (nvme_addr_is_cmb(n, prp2)) {
-prp_list_in_cmb = true;
-}
-
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
 ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
@@ -555,10 +550,6 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 return NVME_INVALID_PRP_OFFSET | NVME_DNR;
 }
 
-if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
-return NVME_INVALID_USE_OF_CMB | NVME_DNR;
-}
-
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
@@ -692,7 +683,6 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 uint64_t nsgld;
 uint32_t seg_len;
 uint16_t status;
-bool sgl_in_cmb = false;
 hwaddr addr;
 int ret;
 
@@ -714,18 +704,6 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 goto out;
 }
 
-/*
- * If the segment is located in the CMB, the submission queue of the
- * request must also reside there.
- */
-if (nvme_addr_is_cmb(n, addr)) {
-if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) {
-return NVME_INVALID_USE_OF_CMB | NVME_DNR;
-}
-
-sgl_in_cmb = true;
-}
-
 for (;;) {
 switch (NVME_SGL_TYPE(sgld->type)) {
 case NVME_SGL_DESCR_TYPE_SEGMENT:
@@ -814,15 +792,6 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 if (status) {
 goto unmap;
 }
-
-/*
- * If the next segment is in the CMB, make sure that the sgl was
- * already located there.
- */
-if (sgl_in_cmb != nvme_addr_is_cmb(n, addr)) {
-status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
-goto unmap;
-}
 }
 
 out:
@@ -3739,6 +3708,8 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 
 static void nvme_cmb_enable_regs(NvmeCtrl *n)
 {
+NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1);
+NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1);
 NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR);
 
 NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
-- 
2.30.0

Re: [PATCH v2 6/9] tests: Rename PAGE_SIZE definitions

2021-01-18 Thread Philippe Mathieu-Daudé

On 1/18/21 7:38 AM, Jiaxun Yang wrote:
> As per POSIX specification of limits.h [1], OS libc may define
> PAGE_SIZE in limits.h.
> 
> Self defined PAGE_SIZE is frequently used in tests, to prevent
> collosion of definition, we give PAGE_SIZE definitons reasonable
> prefixs.
> 
> [1]: https://pubs.opengroup.org/onlinepubs/7908799/xsh/limits.h.html
> 
> Signed-off-by: Jiaxun Yang 
> Reviewed-by: Thomas Huth 
> ---
>  tests/migration/stress.c| 10 ++---
>  tests/qtest/libqos/malloc-pc.c  |  4 +-
>  tests/qtest/libqos/malloc-spapr.c   |  4 +-
>  tests/qtest/m25p80-test.c   | 54 +++---
>  tests/tcg/multiarch/system/memory.c |  6 +--
>  tests/test-xbzrle.c | 70 ++---
>  6 files changed, 74 insertions(+), 74 deletions(-)
...

> diff --git a/tests/tcg/multiarch/system/memory.c 
> b/tests/tcg/multiarch/system/memory.c
> index d124502d73..eb0ec6f8eb 100644
> --- a/tests/tcg/multiarch/system/memory.c
> +++ b/tests/tcg/multiarch/system/memory.c
> @@ -20,12 +20,12 @@
>  # error "Target does not specify CHECK_UNALIGNED"
>  #endif
>  
> -#define PAGE_SIZE 4096 /* nominal 4k "pages" */
> -#define TEST_SIZE (PAGE_SIZE * 4)  /* 4 pages */
> +#define MEM_PAGE_SIZE 4096 /* nominal 4k "pages" */
> +#define TEST_SIZE (MEM_PAGE_SIZE * 4)  /* 4 pages */

Clearer renaming TEST_PAGE_SIZE and TEST_MEM_SIZE.

If possible using TEST_PAGE_SIZE / TEST_MEM_SIZE:
Reviewed-by: Philippe Mathieu-Daudé 

>  
>  #define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0])))
>  
> -__attribute__((aligned(PAGE_SIZE)))
> +__attribute__((aligned(MEM_PAGE_SIZE)))
>  static uint8_t test_data[TEST_SIZE];

[PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Klaus Jensen

From: Padmakar Kalghatgi 

Implement v1.4 logic for configuring the Controller Memory Buffer. This
is not backward compatible with v1.3, so drivers that only support v1.3
will not be able to use the CMB anymore.

Signed-off-by: Padmakar Kalghatgi 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.h   |   1 +
 include/block/nvme.h  | 107 +-
 hw/block/nvme.c   |  78 +++---
 hw/block/trace-events |   2 +
 4 files changed, 159 insertions(+), 29 deletions(-)

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 5988d9b36e12..de9164dd52e6 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -145,6 +145,7 @@ typedef struct NvmeCtrl {
 uint32_tmax_q_ents;
 uint8_t outstanding_aers;
 uint8_t *cmbuf;
+boolcmb_cmse;
 uint32_tirq_status;
 uint64_thost_timestamp; /* Timestamp sent by the host 
*/
 uint64_ttimestamp_set_qemu_clock_ms;/* QEMU clock time */
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 183dc5c0ecf6..7dcd8f9b4e78 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -15,14 +15,19 @@ typedef struct QEMU_PACKED NvmeBar {
 uint64_tacq;
 uint32_tcmbloc;
 uint32_tcmbsz;
-uint8_t padding[3520]; /* not used by QEMU */
+uint32_tbpinfo;
+uint32_tbprsel;
+uint64_tbpmbl;
+uint64_tcmbmsc;
+uint32_tcmbsts;
+uint8_t rsvd92[3492];
 uint32_tpmrcap;
 uint32_tpmrctl;
 uint32_tpmrsts;
 uint32_tpmrebs;
 uint32_tpmrswtp;
 uint64_tpmrmsc;
-uint8_t reserved[484];
+uint8_t css[484];
 } NvmeBar;
 
 enum NvmeCapShift {
@@ -63,6 +68,7 @@ enum NvmeCapMask {
 #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
 #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
 #define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
+#define NVME_CAP_CMBS(cap)  (((cap) >> CAP_CMBS_SHIFT)   & CAP_CMBS_MASK)
 
 #define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
<< CAP_MQES_SHIFT)
@@ -184,25 +190,64 @@ enum NvmeAqaMask {
 #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK)
 
 enum NvmeCmblocShift {
-CMBLOC_BIR_SHIFT  = 0,
-CMBLOC_OFST_SHIFT = 12,
+CMBLOC_BIR_SHIFT = 0,
+CMBLOC_CQMMS_SHIFT   = 3,
+CMBLOC_CQPDS_SHIFT   = 4,
+CMBLOC_CDPMLS_SHIFT  = 5,
+CMBLOC_CDPCILS_SHIFT = 6,
+CMBLOC_CDMMMS_SHIFT  = 7,
+CMBLOC_CQDA_SHIFT= 8,
+CMBLOC_OFST_SHIFT= 12,
 };
 
 enum NvmeCmblocMask {
-CMBLOC_BIR_MASK  = 0x7,
-CMBLOC_OFST_MASK = 0xf,
+CMBLOC_BIR_MASK = 0x7,
+CMBLOC_CQMMS_MASK   = 0x1,
+CMBLOC_CQPDS_MASK   = 0x1,
+CMBLOC_CDPMLS_MASK  = 0x1,
+CMBLOC_CDPCILS_MASK = 0x1,
+CMBLOC_CDMMMS_MASK  = 0x1,
+CMBLOC_CQDA_MASK= 0x1,
+CMBLOC_OFST_MASK= 0xf,
 };
 
-#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT)  & \
- CMBLOC_BIR_MASK)
-#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \
- CMBLOC_OFST_MASK)
+#define NVME_CMBLOC_BIR(cmbloc) \
+((cmbloc >> CMBLOC_BIR_SHIFT) & CMBLOC_BIR_MASK)
+#define NVME_CMBLOC_CQMMS(cmbloc) \
+((cmbloc >> CMBLOC_CQMMS_SHIFT) & CMBLOC_CQMMS_MASK)
+#define NVME_CMBLOC_CQPDS(cmbloc) \
+((cmbloc >> CMBLOC_CQPDS_SHIFT) & CMBLOC_CQPDS_MASK)
+#define NVME_CMBLOC_CDPMLS(cmbloc) \
+((cmbloc >> CMBLOC_CDPMLS_SHIFT) & CMBLOC_CDPMLS_MASK)
+#define NVME_CMBLOC_CDPCILS(cmbloc) \
+((cmbloc >> CMBLOC_CDPCILS_SHIFT) & CMBLOC_CDPCILS_MASK)
+#define NVME_CMBLOC_CDMMMS(cmbloc) \
+((cmbloc >> CMBLOC_CDMMMS_SHIFT) & CMBLOC_CDMMMS_MASK)
+#define NVME_CMBLOC_CQDA(cmbloc) \
+((cmbloc >> CMBLOC_CQDA_SHIFT) & CMBLOC_CQDA_MASK)
+#define NVME_CMBLOC_OFST(cmbloc) \
+((cmbloc >> CMBLOC_OFST_SHIFT) & CMBLOC_OFST_MASK)
 
-#define NVME_CMBLOC_SET_BIR(cmbloc, val)  \
+#define NVME_CMBLOC_SET_BIR(cmbloc, val) \
 (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT)
+#define NVME_CMBLOC_SET_CQMMS(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CQMMS_MASK) << CMBLOC_CQMMS_SHIFT)
+#define NVME_CMBLOC_SET_CQPDS(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CQPDS_MASK) << CMBLOC_CQPDS_SHIFT)
+#define NVME_CMBLOC_SET_CDPMLS(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CDPMLS_MASK) << CMBLOC_CDPMLS_SHIFT)
+#define NVME_CMBLOC_SET_CDPCILS(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CDPCILS_MASK) << CMBLOC_CDPCILS_SHIFT)
+#define NVME_CMBLOC_SET_CDMMMS(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CDMMMS_MASK) << CMBLOC_CDMMMS_SHIFT)
+#define NVME_CMBLOC_SET_CQDA(cmbloc, val) \
+(cmbloc |= (uint64_t)(val & CMBLOC_CQDA_MASK) << CMBLOC_CQDA_SHIFT)
 #define NVME_CMBLOC_SET_OFST(cmbloc, val) \
 (cmbloc

Re: [PATCH v2 9/9] gitlab-ci: Add alpine to pipeline

2021-01-18 Thread Daniel P . Berrangé

On Mon, Jan 18, 2021 at 02:38:08PM +0800, Jiaxun Yang wrote:
> We only run build test and check-acceptance as their are too many
> failures in checks due to minor string mismatch.

Can you give real examples of what's broken here, as that sounds
rather suspicious, and I'm not convinced it should be ignored.

> 
> Signed-off-by: Jiaxun Yang 
> ---
>  .gitlab-ci.d/containers.yml |  5 +
>  .gitlab-ci.yml  | 23 +++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/.gitlab-ci.d/containers.yml b/.gitlab-ci.d/containers.yml
> index 910754a699..90fac85ce4 100644
> --- a/.gitlab-ci.d/containers.yml
> +++ b/.gitlab-ci.d/containers.yml
> @@ -28,6 +28,11 @@
>  - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
>  - if: '$CI_COMMIT_REF_NAME == "testing/next"'
>  
> +amd64-alpine-container:
> +  <<: *container_job_definition
> +  variables:
> +NAME: alpine
> +
>  amd64-centos7-container:
><<: *container_job_definition
>variables:
> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
> index 4532f1718a..6cc922aedb 100644
> --- a/.gitlab-ci.yml
> +++ b/.gitlab-ci.yml
> @@ -72,6 +72,29 @@ include:
>  - cd build
>  - du -chs ${CI_PROJECT_DIR}/avocado-cache
>  
> +build-system-alpine:
> +  <<: *native_build_job_definition
> +  variables:
> +IMAGE: alpine
> +TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu
> +  moxie-softmmu microblazeel-softmmu mips64el-softmmu
> +MAKE_CHECK_ARGS: check-build
> +CONFIGURE_ARGS: --enable-docs
> +  artifacts:
> +expire_in: 2 days
> +paths:
> +  - build
> +
> +acceptance-system-alpine:
> +  <<: *native_test_job_definition
> +  needs:
> +- job: build-system-alpine
> +  artifacts: true
> +  variables:
> +IMAGE: alpine
> +MAKE_CHECK_ARGS: check-acceptance
> +  <<: *acceptance_definition
> +
>  build-system-ubuntu:
><<: *native_build_job_definition
>variables:
> -- 
> 2.30.0
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v4 02/10] iotests/297: Rewrite in Python and extend reach

2021-01-18 Thread Max Reitz


On 15.01.21 20:27, Willian Rampazzo wrote:

On Fri, Jan 15, 2021 at 2:43 PM Max Reitz  wrote:


Instead of checking iotests.py only, check all Python files in the
qemu-iotests/ directory.  Of course, most of them do not pass, so there
is an extensive skip list for now.  (The only files that do pass are
209, 254, 283, and iotests.py.)

(Alternatively, we could have the opposite, i.e. an explicit list of
files that we do want to check, but I think it is better to check files
by default.)

Unless started in debug mode (./check -d), the output has no information
on which files are tested, so we will not have a problem e.g. with
backports, where some files may be missing when compared to upstream.

Besides the technical rewrite, some more things are changed:

- For the pylint invocation, PYTHONPATH is adjusted.  This mirrors
   setting MYPYPATH for mypy.

- Also, MYPYPATH is now derived from PYTHONPATH, so that we include
   paths set by the environment.  Maybe at some point we want to let the
   check script add '../../python/' to PYTHONPATH so that iotests.py does
   not need to do that.

- Passing --notes=FIXME,XXX to pylint suppresses warnings for TODO
   comments.  TODO is fine, we do not need 297 to complain about such
   comments.

- The "Success" line from mypy's output is suppressed, because (A) it
   does not add useful information, and (B) it would leak information
   about the files having been tested to the reference output, which we
   decidedly do not want.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/297 | 110 +
  tests/qemu-iotests/297.out |   5 +-
  2 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 5c5420712b..fa9e2cac78 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/usr/bin/env python3
  #
  # Copyright (C) 2020 Red Hat, Inc.
  #
@@ -15,30 +15,96 @@
  # You should have received a copy of the GNU General Public License
  # along with this program.  If not, see .

-seq=$(basename $0)
-echo "QA output created by $seq"
+import os
+import re
+import shutil
+import subprocess
+import sys

-status=1   # failure is the default!
+import iotests

-# get standard environment
-. ./common.rc

-if ! type -p "pylint-3" > /dev/null; then
-_notrun "pylint-3 not found"
-fi
-if ! type -p "mypy" > /dev/null; then
-_notrun "mypy not found"
-fi
+# TODO: Empty this list!
+SKIP_FILES = (
+'030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
+'096', '118', '124', '129', '132', '136', '139', '147', '148', '149',
+'151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
+'203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
+'218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
+'240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
+'262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
+'299', '300', '302', '303', '304', '307',
+'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
+)

-pylint-3 --score=n iotests.py

-MYPYPATH=../../python/ mypy --warn-unused-configs --disallow-subclassing-any \
---disallow-any-generics --disallow-incomplete-defs \
---disallow-untyped-decorators --no-implicit-optional \
---warn-redundant-casts --warn-unused-ignores \
---no-implicit-reexport iotests.py
+def is_python_file(filename):
+if not os.path.isfile(filename):
+return False

-# success, all done
-echo "*** done"
-rm -f $seq.full
-status=0
+if filename.endswith('.py'):
+return True
+
+with open(filename) as f:
+try:
+first_line = f.readline()
+return re.match('^#!.*python', first_line) is not None
+except UnicodeDecodeError:  # Ignore binary files
+return False
+
+
+def run_linters():
+files = [filename for filename in (set(os.listdir('.')) - set(SKIP_FILES))
+ if is_python_file(filename)]
+
+iotests.logger.debug('Files to be checked:')
+iotests.logger.debug(', '.join(sorted(files)))
+
+print('=== pylint ===')
+sys.stdout.flush()
+
+# Todo notes are fine, but fixme's or xxx's should probably just be
+# fixed (in tests, at least)
+env = os.environ.copy()
+try:
+env['PYTHONPATH'] += ':../../python/'


Do you have any objection to using os.path.dirname and os.path.join
here? This would make the code more pythonic.


Intuitively, I felt a bit uneasy about os.path.join here, because it 
would make it look like this was platform-independent, when it is not: 
The colon as a PATH separator is probably more platform-dependent than 
the slashes.


So turns out there is os.pathsep, which yields a colon on e.g. Linux and 
a semicolon on e.g. Windows.


I

Re: [PATCH 0/9] hw/block: m25p80: Fix the mess of dummy bytes needed for fast read commands

2021-01-18 Thread Francisco Iglesias

Hi Bin,

On [2021 Jan 15] Fri 22:38:18, Bin Meng wrote:
> Hi Francisco,
> 
> On Fri, Jan 15, 2021 at 8:26 PM Francisco Iglesias
>  wrote:
> >
> > Hi Bin,
> >
> > On [2021 Jan 15] Fri 10:07:52, Bin Meng wrote:
> > > Hi Francisco,
> > >
> > > On Fri, Jan 15, 2021 at 2:13 AM Francisco Iglesias
> > >  wrote:
> > > >
> > > > Hi Bin,
> > > >
> > > > On [2021 Jan 14] Thu 23:08:53, Bin Meng wrote:
> > > > > From: Bin Meng 
> > > > >
> > > > > The m25p80 model uses s->needed_bytes to indicate how many follow-up
> > > > > bytes are expected to be received after it receives a command. For
> > > > > example, depending on the address mode, either 3-byte address or
> > > > > 4-byte address is needed.
> > > > >
> > > > > For fast read family commands, some dummy cycles are required after
> > > > > sending the address bytes, and the dummy cycles need to be counted
> > > > > in s->needed_bytes. This is where the mess began.
> > > > >
> > > > > As the variable name (needed_bytes) indicates, the unit is in byte.
> > > > > It is not in bit, or cycle. However for some reason the model has
> > > > > been using the number of dummy cycles for s->needed_bytes. The right
> > > > > approach is to convert the number of dummy cycles to bytes based on
> > > > > the SPI protocol, for example, 6 dummy cycles for the Fast Read Quad
> > > > > I/O (EBh) should be converted to 3 bytes per the formula (6 * 4 / 8).
> > > >
> > > > While not being the original implementor I must assume that above 
> > > > solution was
> > > > considered but not chosen by the developers due to it is inaccuracy (it
> > > > wouldn't be possible to model exacly 6 dummy cycles, only a multiple of 
> > > > 8,
> > > > meaning that if the controller is wrongly programmed to generate 7 the 
> > > > error
> > > > wouldn't be caught and the controller will still be considered 
> > > > "correct"). Now
> > > > that we have this detail in the implementation I'm in favor of keeping 
> > > > it, this
> > > > also because the detail is already in use for catching exactly above 
> > > > error.
> > > >
> > >
> > > I found no clue from the commit message that my proposed solution here
> > > was ever considered, otherwise all SPI controller models supporting
> > > software generation should have been found out seriously broken long
> > > time ago!
> >
> >
> > The controllers you are referring to might lack support for commands 
> > requiring
> > dummy clock cycles but I really hope they work with the other commands? If 
> > so I
> 
> I am not sure why you view dummy clock cycles as something special
> that needs some special support from the SPI controller. For the case
> 1 controller, it's nothing special from the controller perspective,
> just like sending out a command, or address bytes, or data. The
> controller just shifts data bit by bit from its tx fifo and that's it.
> In the Xilinx GQSPI controller case, the dummy cycles can either be
> sent via a regular data (the case 1 controller) in the tx fifo, or
> automatically generated (case 2 controller) by the hardware.

Ok, I'll try to explain my view point a little differently. For that we also
need to keep in mind that QEMU models HW, and any binary that runs on a HW
board supported in QEMU should ideally run on that board inside QEMU aswell
(this can be a bare metal application equaly well as a modified u-boot/Linux
using SPI commands with a non multiple of 8 number of dummy clock cycles).

Once functionality has been introduced into QEMU it is not easy to know which
intentional or untentional features provided by the functionality are being
used by users. One of the (perhaps not well known) features I'm aware of that
is in use and is provided by the accurate dummy clock cycle modeling inside
m25p80 is the be ability to test drivers accurately regarding the dummy clock
cycles (even when using commands with a non-multiple of 8 number of dummy clock
cycles), but there might be others aswell. So by removing this functionality
above use case will brake, this since those test will not be reliable.
Furthermore, since users tend to be creative it is not possible to know if
there are other use cases that will be affected. This means that in case [1]
needs to be followed the safe path is to add functionality instead of removing.
Luckily it also easier in this case, see below.

> 
> > don't think it is fair to call them 'seriously broken' (and else we should
> > probably let the maintainers know about it). Most likely the lack of support
> 
> I called it "seriously broken" because current implementation only
> considered one type of SPI controllers while completely ignoring the
> other type.

If we change view and see this from the perspective of m25p80, it models the
commands a certain way and provides an API that the SPI controllers need to
implement for interacting with it. It is true that there are SPI controllers
referred to above that do not support the portion of that API that corresponds
to commands with dummy clock cycles,

[PATCH v2 08/12] hw/block/nvme: disable PMR at boot up

2021-01-18 Thread Klaus Jensen

From: Klaus Jensen 

The PMR should not be enabled at boot up. Disable the PMR MemoryRegion
initially and implement MMIO for PMRCTL, allowing the host to enable the
PMR explicitly.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9ee9570bb65c..937a6ed0a70d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -3810,8 +3810,16 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
"invalid write to PMRCAP register, ignored");
 return;
-case 0xE04: /* TODO PMRCTL */
-break;
+case 0xE04: /* PMRCTL */
+n->bar.pmrctl = data;
+if (NVME_PMRCTL_EN(data)) {
+memory_region_set_enabled(>pmrdev->mr, true);
+n->bar.pmrsts = 0;
+} else {
+memory_region_set_enabled(>pmrdev->mr, false);
+NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1);
+}
+return;
 case 0xE08: /* PMRSTS */
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
"invalid write to PMRSTS register, ignored");
@@ -4186,6 +4194,8 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
  PCI_BASE_ADDRESS_SPACE_MEMORY |
  PCI_BASE_ADDRESS_MEM_TYPE_64 |
  PCI_BASE_ADDRESS_MEM_PREFETCH, >pmrdev->mr);
+
+memory_region_set_enabled(>pmrdev->mr, false);
 }
 
 static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
-- 
2.30.0

[PATCH v2 09/12] hw/block/nvme: add PMR RDS/WDS support

2021-01-18 Thread Klaus Jensen

From: Naveen Nagar 

Add support for the PMRMSCL and PMRMSCU MMIO registers. This allows
adding RDS/WDS support for PMR as well.

Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.h  |  1 +
 include/block/nvme.h |  1 +
 hw/block/nvme.c  | 89 ++--
 3 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 2a25bc84f3f9..5988d9b36e12 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -152,6 +152,7 @@ typedef struct NvmeCtrl {
 uint16_ttemperature;
 
 HostMemoryBackend *pmrdev;
+bool  pmr_cmse;
 
 uint8_t aer_mask;
 NvmeRequest **aer_reqs;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index f3cbe17d0971..183dc5c0ecf6 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -62,6 +62,7 @@ enum NvmeCapMask {
 #define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)& CAP_CSS_MASK)
 #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
 #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
+#define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
 
 #define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
<< CAP_MQES_SHIFT)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 937a6ed0a70d..cbc2b32f7c87 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -273,6 +273,26 @@ static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr 
addr)
 return >cmbuf[addr - n->ctrl_mem.addr];
 }
 
+static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
+{
+hwaddr hi, low;
+
+if (!n->pmr_cmse) {
+return false;
+}
+
+low = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
+hi  = low + int128_get64(n->pmrdev->mr.size);
+
+return addr >= low && addr < hi;
+}
+
+static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
+{
+hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
+return memory_region_get_ram_ptr(>pmrdev->mr) + (addr - cba);
+}
+
 static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 hwaddr hi = addr + size - 1;
@@ -285,6 +305,11 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void 
*buf, int size)
 return 0;
 }
 
+if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
+memcpy(buf, nvme_addr_to_pmr(n, addr), size);
+return 0;
+}
+
 return pci_dma_read(>parent_obj, addr, buf, size);
 }
 
@@ -406,9 +431,27 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, 
QEMUIOVector *iov, hwaddr addr,
 return NVME_SUCCESS;
 }
 
+static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
+size_t len)
+{
+if (!len) {
+return NVME_SUCCESS;
+}
+
+if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
+return NVME_DATA_TRAS_ERROR;
+}
+
+qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
+
+return NVME_SUCCESS;
+}
+
 static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
   hwaddr addr, size_t len)
 {
+bool cmb = false, pmr = false;
+
 if (!len) {
 return NVME_SUCCESS;
 }
@@ -416,6 +459,12 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 trace_pci_nvme_map_addr(addr, len);
 
 if (nvme_addr_is_cmb(n, addr)) {
+cmb = true;
+} else if (nvme_addr_is_pmr(n, addr)) {
+pmr = true;
+}
+
+if (cmb || pmr) {
 if (qsg && qsg->sg) {
 return NVME_INVALID_USE_OF_CMB | NVME_DNR;
 }
@@ -426,7 +475,11 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 qemu_iovec_init(iov, 1);
 }
 
-return nvme_map_addr_cmb(n, iov, addr, len);
+if (cmb) {
+return nvme_map_addr_cmb(n, iov, addr, len);
+} else {
+return nvme_map_addr_pmr(n, iov, addr, len);
+}
 }
 
 if (iov && iov->iov) {
@@ -459,7 +512,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 
 trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
 
-if (nvme_addr_is_cmb(n, prp1)) {
+if (nvme_addr_is_cmb(n, prp1) || (nvme_addr_is_pmr(n, prp1))) {
 qemu_iovec_init(iov, num_prps);
 } else {
 pci_dma_sglist_init(qsg, >parent_obj, num_prps);
@@ -3818,6 +3871,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 } else {
 memory_region_set_enabled(>pmrdev->mr, false);
 NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1);
+n->pmr_cmse = false;
 }
 return;
 case 0xE08: /* PMRSTS */
@@ -3832,8 +3886,32 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,

[PATCH v2 06/12] hw/block/nvme: rename PMR/CMB shift/mask fields

2021-01-18 Thread Klaus Jensen

From: Klaus Jensen 

Use the correct field names.

Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 86d7fc2f905c..f3cbe17d0971 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -35,8 +35,8 @@ enum NvmeCapShift {
 CAP_CSS_SHIFT  = 37,
 CAP_MPSMIN_SHIFT   = 48,
 CAP_MPSMAX_SHIFT   = 52,
-CAP_PMR_SHIFT  = 56,
-CAP_CMB_SHIFT  = 57,
+CAP_PMRS_SHIFT = 56,
+CAP_CMBS_SHIFT = 57,
 };
 
 enum NvmeCapMask {
@@ -49,8 +49,8 @@ enum NvmeCapMask {
 CAP_CSS_MASK   = 0xff,
 CAP_MPSMIN_MASK= 0xf,
 CAP_MPSMAX_MASK= 0xf,
-CAP_PMR_MASK   = 0x1,
-CAP_CMB_MASK   = 0x1,
+CAP_PMRS_MASK  = 0x1,
+CAP_CMBS_MASK  = 0x1,
 };
 
 #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
@@ -81,10 +81,10 @@ enum NvmeCapMask {
<< CAP_MPSMIN_SHIFT)
 #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMAX_MASK)\
<< CAP_MPSMAX_SHIFT)
-#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMR_MASK)   
\
-   << CAP_PMR_SHIFT)
-#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMB_MASK)   
\
-   << CAP_CMB_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMRS_MASK)  
\
+   << CAP_PMRS_SHIFT)
+#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMBS_MASK)  
\
+   << CAP_CMBS_SHIFT)
 
 enum NvmeCapCss {
 NVME_CAP_CSS_NVM= 1 << 0,
-- 
2.30.0

1 2 >

1 - 100 of 115 matches

Mail list logo