from:"\"Maxim Levitsky\""

Re: [PATCH v6 12/42] nvme: add support for the get log page command

2020-03-25 Thread Maxim Levitsky

 %"PRIu16" lid 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off 
> %"PRIu64""
>  nvme_dev_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, 
> interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
>  nvme_dev_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, 
> interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
>  nvme_dev_mmio_cfg(uint64_t data) "wrote MMIO, config controller 
> config=0x%"PRIx64""
> @@ -85,6 +86,7 @@ nvme_dev_err_invalid_create_cq_qflags(uint16_t qflags) 
> "failed creating completi
>  nvme_dev_err_invalid_identify_cns(uint16_t cns) "identify, invalid 
> cns=0x%"PRIx16""
>  nvme_dev_err_invalid_getfeat(int dw10) "invalid get features, 
> dw10=0x%"PRIx32""
>  nvme_dev_err_invalid_setfeat(uint32_t dw10) "invalid set features, 
> dw10=0x%"PRIx32""
> +nvme_dev_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 
> 0x%"PRIx16""
>  nvme_dev_err_startfail_cq(void) "nvme_start_ctrl failed because there are 
> non-admin completion queues"
>  nvme_dev_err_startfail_sq(void) "nvme_start_ctrl failed because there are 
> non-admin submission queues"
>  nvme_dev_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the 
> admin submission queue address is null"


Best regards,
Maxim Levitsky

Re: [PATCH v6 11/42] nvme: add temperature threshold feature

2020-03-25 Thread Maxim Levitsky

it a/include/block/nvme.h b/include/block/nvme.h
> index a083c1b3a613..91fc4738a3e0 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -688,7 +688,13 @@ enum NvmeIdCtrlOncs {
>  typedef struct NvmeFeatureVal {
>  uint32_tarbitration;
>  uint32_tpower_mgmt;
> -uint32_ttemp_thresh;
> +union {
> +struct {
> +uint16_t temp_thresh_hi;
> +uint16_t temp_thresh_low;
> +};
> +uint32_t temp_thresh;
> +};
>  uint32_terr_rec;
>  uint32_tvolatile_wc;
>  uint32_tnum_queues;

With 'temperature' field removed from the header:

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 06/42] nvme: add identify cns values in header

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f716f690a594..b38d7e548a60 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -709,11 +709,11 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  NvmeIdentify *c = (NvmeIdentify *)cmd;
>  
>  switch (le32_to_cpu(c->cns)) {
> -case 0x00:
> +case NVME_ID_CNS_NS:
>  return nvme_identify_ns(n, c);
> -case 0x01:
> +case NVME_ID_CNS_CTRL:
>  return nvme_identify_ctrl(n, c);
> -case 0x02:
> +case NVME_ID_CNS_NS_ACTIVE_LIST:
>  return nvme_identify_nslist(n, c);
>  default:
>  trace_nvme_dev_err_invalid_identify_cns(le32_to_cpu(c->cns));

This is a very good candidate to be squished with the patch 5 IMHO,
but you can leave this as is as well. I don't mind.

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 10/42] nvme: refactor device realization

2020-03-25 Thread Maxim Levitsky

BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> -PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> +/* no thin provisioning */
> +id_ns->ncap = id_ns->nsze;
> +id_ns->nuse = id_ns->ncap;
>  
> +return 0;
> +}
> +
> +static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> +{
> +NvmeCtrl *n = NVME(pci_dev);
> +int i;
> +
> +if (nvme_check_constraints(n, errp)) {
> +return;
> +}
> +
> +nvme_init_state(n);
> +
> +if (nvme_init_blk(n, errp)) {
> +return;
>  }
>  
>  for (i = 0; i < n->num_namespaces; i++) {
> -NvmeNamespace *ns = &n->namespaces[i];
> -NvmeIdNs *id_ns = &ns->id_ns;
> -id_ns->nsfeat = 0;
> -id_ns->nlbaf = 0;
> -id_ns->flbas = 0;
> -id_ns->mc = 0;
> -id_ns->dpc = 0;
> -id_ns->dps = 0;
> -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> -id_ns->ncap  = id_ns->nuse = id_ns->nsze =
> -cpu_to_le64(n->ns_size >>
> -id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
> +if (nvme_init_namespace(n, &n->namespaces[i], errp)) {
> +return;
> +}
>  }
> +
> +nvme_init_pci(n, pci_dev);
> +nvme_init_ctrl(n);
>  }
>  
>  static void nvme_exit(PCIDevice *pci_dev)
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 98f5b9479244..b7c465560eea 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -67,6 +67,22 @@ typedef struct NvmeNamespace {
>  NvmeIdNsid_ns;
>  } NvmeNamespace;
>  
> +static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> +{
> +NvmeIdNs *id_ns = &ns->id_ns;
> +return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
> +}
> +
> +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
> +{
> +return nvme_ns_lbaf(ns)->ds;
> +}
> +
> +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
> +{
> +return 1 << nvme_ns_lbads(ns);
> +}
> +
>  #define TYPE_NVME "nvme"
>  #define NVME(obj) \
>  OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> @@ -88,8 +104,6 @@ typedef struct NvmeCtrl {
>  uint32_tnum_namespaces;
>  uint32_tmax_q_ents;
>  uint64_tns_size;
> -uint32_tcmbsz;
> -uint32_tcmbloc;
>  uint8_t *cmbuf;
>  uint64_tirq_status;
>  uint64_thost_timestamp; /* Timestamp sent by the 
> host */
> @@ -103,4 +117,9 @@ typedef struct NvmeCtrl {
>  NvmeIdCtrl  id_ctrl;
>  } NvmeCtrl;
>  
> +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +return n->ns_size >> nvme_ns_lbads(ns);
> +}
> +
>  #endif /* HW_NVME_H */

Small nitpick: To be honest this not only refactoring in the device realization 
since you also (rightfully)
removed the duplicated cmbsz/cmbloc so I would add a mention for this in the 
commit message.
But that doesn't matter that much, so

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 08/42] nvme: add support for the abort command

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1,
> Section 5.1 ("Abort command").
> 
> The Abort command is a best effort command; for now, the device always
> fails to abort the given command.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> ---
>  hw/block/nvme.c | 27 +++
>  1 file changed, 27 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 08a83d449de3..7cf7cf55143e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -729,6 +729,18 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  }
>  }
>  
> +static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0x;
> +
> +req->cqe.result = 1;
> +if (nvme_check_sqid(n, sqid)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +return NVME_SUCCESS;
> +}
> +
>  static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
>  {
>  trace_nvme_dev_setfeat_timestamp(ts);
> @@ -863,6 +875,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  return nvme_create_cq(n, cmd);
>  case NVME_ADM_CMD_IDENTIFY:
>  return nvme_identify(n, cmd);
> +case NVME_ADM_CMD_ABORT:
> +return nvme_abort(n, cmd, req);
>  case NVME_ADM_CMD_SET_FEATURES:
>  return nvme_set_feature(n, cmd, req);
>  case NVME_ADM_CMD_GET_FEATURES:
> @@ -1375,6 +1389,19 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  id->ieee[1] = 0x02;
>  id->ieee[2] = 0xb3;
>  id->oacs = cpu_to_le16(0);
> +
> +/*
> + * Because the controller always completes the Abort command immediately,
> + * there can never be more than one concurrently executing Abort command,
> + * so this value is never used for anything. Note that there can easily 
> be
> + * many Abort commands in the queues, but they are not considered
> + * "executing" until processed by nvme_abort.
> + *
> + * The specification recommends a value of 3 for Abort Command Limit 
> (four
> + * concurrently outstanding Abort commands), so lets use that though it 
> is
> + * inconsequential.
> + */
> +id->acl = 3;
>  id->frmw = 7 << 1;
>  id->lpa = 1 << 0;
>  id->sqes = (0x6 << 4) | 0x6;

You forgot to move my reviewed-by from the previous version
I see that you also fixed the white space problem, thanks!
So, 

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 07/42] nvme: refactor nvme_addr_read

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Pull the controller memory buffer check to its own function. The check
> will be used on its own in later patches.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> ---
>  hw/block/nvme.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b38d7e548a60..08a83d449de3 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -52,14 +52,22 @@
>  
>  static void nvme_process_sq(void *opaque);
>  
> +static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> +{
> +hwaddr low = n->ctrl_mem.addr;
> +hwaddr hi  = n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size);
> +
> +return addr >= low && addr < hi;
> +}
> +
>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
> -if (n->cmbsz && addr >= n->ctrl_mem.addr &&
> -addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
> +if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
>  memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
> -} else {
> -pci_dma_read(&n->parent_obj, addr, buf, size);
> +return;
>  }
> +
> +pci_dma_read(&n->parent_obj, addr, buf, size);
>  }
>  
>  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)

Note that this patch still contains a bug that it removes the check against the 
accessed
size, which you fix in later patch.
I prefer to not add a bug in first place
However if you have a reason for this, I won't mind.

Best regards,
Maxim Levitsky

Re: [PATCH v6 01/42] nvme: rename trace events to nvme_dev

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Change the prefix of all nvme device related trace events to 'nvme_dev'
> to not clash with trace events from the nvme block driver.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c   | 188 +-
>  hw/block/trace-events | 172 +++---
>  2 files changed, 180 insertions(+), 180 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index d28335cbf377..3e4b18956ed2 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -112,16 +112,16 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
>  {
>  if (cq->irq_enabled) {
>  if (msix_enabled(&(n->parent_obj))) {
> -trace_nvme_irq_msix(cq->vector);
> +trace_nvme_dev_irq_msix(cq->vector);
>  msix_notify(&(n->parent_obj), cq->vector);
>  } else {
> -trace_nvme_irq_pin();
> +trace_nvme_dev_irq_pin();
>  assert(cq->cqid < 64);
>  n->irq_status |= 1 << cq->cqid;
>  nvme_irq_check(n);
>  }
>  } else {
> -trace_nvme_irq_masked();
> +trace_nvme_dev_irq_masked();
>  }
>  }
>  
> @@ -146,7 +146,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  int num_prps = (len >> n->page_bits) + 1;
>  
>  if (unlikely(!prp1)) {
> -trace_nvme_err_invalid_prp();
> +trace_nvme_dev_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
>  } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> @@ -160,7 +160,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  len -= trans_len;
>  if (len) {
>  if (unlikely(!prp2)) {
> -trace_nvme_err_invalid_prp2_missing();
> +trace_nvme_dev_err_invalid_prp2_missing();
>  goto unmap;
>  }
>  if (len > n->page_size) {
> @@ -176,7 +176,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  
>  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> -trace_nvme_err_invalid_prplist_ent(prp_ent);
> +trace_nvme_dev_err_invalid_prplist_ent(prp_ent);
>  goto unmap;
>  }
>  
> @@ -189,7 +189,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  }
>  
>  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> -trace_nvme_err_invalid_prplist_ent(prp_ent);
> +trace_nvme_dev_err_invalid_prplist_ent(prp_ent);
>  goto unmap;
>  }
>  
> @@ -204,7 +204,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  }
>  } else {
>  if (unlikely(prp2 & (n->page_size - 1))) {
> -trace_nvme_err_invalid_prp2_align(prp2);
> +trace_nvme_dev_err_invalid_prp2_align(prp2);
>  goto unmap;
>  }
>  if (qsg->nsg) {
> @@ -252,20 +252,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t 
> *ptr, uint32_t len,
>  QEMUIOVector iov;
>  uint16_t status = NVME_SUCCESS;
>  
> -trace_nvme_dma_read(prp1, prp2);
> +trace_nvme_dev_dma_read(prp1, prp2);
>  
>  if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  if (qsg.nsg > 0) {
>  if (unlikely(dma_buf_read(ptr, len, &qsg))) {
> -trace_nvme_err_invalid_dma();
> +trace_nvme_dev_err_invalid_dma();
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  qemu_sglist_destroy(&qsg);
>  } else {
>  if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) {
> -trace_nvme_err_invalid_dma();
> +trace_nvme_dev_err_invalid_dma();
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  qemu_iovec_destroy(&iov);
> @@ -354,7 +354,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, 
> NvmeNamespace *ns, Nvme

Re: [PATCH v6 05/42] nvme: use constant for identify data size

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 40cb176dea3c..f716f690a594 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -679,7 +679,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
> NvmeIdentify *c)
>  
>  static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
>  {
> -static const int data_len = 4 * KiB;
> +static const int data_len = NVME_IDENTIFY_DATA_SIZE;
>  uint32_t min_nsid = le32_to_cpu(c->nsid);
>  uint64_t prp1 = le64_to_cpu(c->prp1);
>  uint64_t prp2 = le64_to_cpu(c->prp2);

I'll probably squash this with some other refactoring patch,
but I absolutely don't mind leaving this as is.
Fine grained patches never cause any harm.

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 04/42] nvme: bump spec data structures to v1.3

2020-03-25 Thread Maxim Levitsky

 +uint16_tmtfa;
> +uint32_thmpre;
> +uint32_thmmin;
> +uint8_t tnvmcap[16];
> +uint8_t unvmcap[16];
> +uint32_trpmbs;
> +uint16_tedstt;
> +uint8_t dsto;
> +uint8_t fwug;
> +uint16_tkas;
> +uint16_thctma;
> +uint16_tmntmt;
> +uint16_tmxtmt;
> +uint32_tsanicap;
> +uint8_t rsvd332[180];
>  uint8_t sqes;
>  uint8_t cqes;
> -uint16_trsvd515;
> +uint16_tmaxcmd;
>  uint32_tnn;
>  uint16_toncs;
>  uint16_tfuses;
> @@ -562,8 +642,14 @@ typedef struct NvmeIdCtrl {
>  uint8_t vwc;
>  uint16_tawun;
>  uint16_tawupf;
> -uint8_t rsvd703[174];
> -uint8_t rsvd2047[1344];
> +uint8_t nvscc;
> +uint8_t rsvd531;
> +uint16_tacwu;
> +uint8_t rsvd534[2];
> +uint32_tsgls;
> +uint8_t rsvd540[228];
> +uint8_t subnqn[256];
> +uint8_t rsvd1024[1024];
>  NvmePSD psd[32];
>  uint8_t vs[1024];
>  } NvmeIdCtrl;
I checked the diff versus V5, cross referenced the spec and it looks correct 
now,
plus you documented even more fields which is welcome.


> @@ -589,6 +675,16 @@ enum NvmeIdCtrlOncs {
>  #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
>  #define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf)
>  
> +#define NVME_CTRL_SGLS_SUPPORTED_MASK(0x3 <<  0)
> +#define NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT(0x1 <<  0)
> +#define NVME_CTRL_SGLS_SUPPORTED_DWORD_ALIGNMENT (0x1 <<  1)
> +#define NVME_CTRL_SGLS_KEYED (0x1 <<  2)
> +#define NVME_CTRL_SGLS_BITBUCKET (0x1 << 16)
> +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS   (0x1 << 17)
> +#define NVME_CTRL_SGLS_EXCESS_LENGTH (0x1 << 18)
> +#define NVME_CTRL_SGLS_MPTR_SGL  (0x1 << 19)
> +#define NVME_CTRL_SGLS_ADDR_OFFSET   (0x1 << 20)
OK
> +
>  typedef struct NvmeFeatureVal {
>  uint32_tarbitration;
>  uint32_tpower_mgmt;
> @@ -611,6 +707,10 @@ typedef struct NvmeFeatureVal {
>  #define NVME_INTC_THR(intc) (intc & 0xff)
>  #define NVME_INTC_TIME(intc)((intc >> 8) & 0xff)
>  
> +#define NVME_TEMP_THSEL(temp)  ((temp >> 20) & 0x3)
Nitpick: If we are adding this, I'll add a #define for the values as well

> +#define NVME_TEMP_TMPSEL(temp) ((temp >> 16) & 0xf)
> +#define NVME_TEMP_TMPTH(temp)  ((temp >>  0) & 0x)
> +
>  enum NvmeFeatureIds {
>  NVME_ARBITRATION        = 0x1,
>  NVME_POWER_MANAGEMENT   = 0x2,
> @@ -653,18 +753,37 @@ typedef struct NvmeIdNs {
>  uint8_t mc;
>  uint8_t dpc;
>  uint8_t dps;
> -
>  uint8_t nmic;
>  uint8_t rescap;
>  uint8_t fpi;
>  uint8_t dlfeat;
> -
> -uint8_t res34[94];
> +uint16_tnawun;
> +uint16_tnawupf;
> +uint16_tnacwu;
> +uint16_tnabsn;
> +uint16_tnabo;
> +uint16_tnabspf;
> +uint16_tnoiob;
> +uint8_t nvmcap[16];
> +uint8_t rsvd64[40];
> +uint8_t nguid[16];
> +uint64_teui64;
>  NvmeLBAFlbaf[16];
> -uint8_t res192[192];
> +uint8_t rsvd192[192];
>  uint8_t vs[3712];
>  } NvmeIdNs;
Also checked this against V5, looks OK now

>  
> +typedef struct NvmeIdNsDescr {
> +uint8_t nidt;
> +uint8_t nidl;
> +uint8_t rsvd2[2];
> +} NvmeIdNsDescr;
OK



> +
> +#define NVME_NIDT_UUID_LEN 16
> +
> +enum {
> +NVME_NIDT_UUID = 0x3,
Very minor nitpick: I'll would add others as well just for the sake
of better understanding what this is

> +};
>  
>  /*Deallocate Logical Block Features*/
>  #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat)   ((dlfeat) & 0x10)

Looks very good.

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v6 03/42] nvme: move device parameters to separate struct

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Move device configuration parameters to separate struct to make it
> explicit what is configurable and what is set internally.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 44 ++--
>  hw/block/nvme.h | 16 +---
>  2 files changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9740948b354a..b532818b4b76 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -64,12 +64,12 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void 
> *buf, int size)
>  
>  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
>  {
> -return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
> +return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1;
>  }
>  
>  static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
>  {
> -return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1;
> +return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1;
>  }
>  
>  static void nvme_inc_cq_tail(NvmeCQueue *cq)
> @@ -631,7 +631,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
>  trace_nvme_dev_err_invalid_create_cq_addr(prp1);
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
> -if (unlikely(vector > n->num_queues)) {
> +if (unlikely(vector > n->params.num_queues)) {
>  trace_nvme_dev_err_invalid_create_cq_vector(vector);
>  return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
>  }
> @@ -783,7 +783,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  trace_nvme_dev_getfeat_vwcache(result ? "enabled" : "disabled");
>  break;
>  case NVME_NUMBER_OF_QUEUES:
> -result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 
> 16));
> +result = cpu_to_le32((n->params.num_queues - 2) |
> + ((n->params.num_queues - 2) << 16));
>  trace_nvme_dev_getfeat_numq(result);
>  break;
>  case NVME_TIMESTAMP:
> @@ -827,9 +828,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  case NVME_NUMBER_OF_QUEUES:
>  trace_nvme_dev_setfeat_numq((dw11 & 0x) + 1,
>  ((dw11 >> 16) & 0x) + 1,
> -n->num_queues - 1, n->num_queues - 1);
> -req->cqe.result =
> -cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
> +n->params.num_queues - 1,
> +n->params.num_queues - 1);
> +req->cqe.result = cpu_to_le32((n->params.num_queues - 2) |
> +  ((n->params.num_queues - 2) << 16));
>  break;
>  case NVME_TIMESTAMP:
>  return nvme_set_feature_timestamp(n, cmd);
> @@ -900,12 +902,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
>  
>  blk_drain(n->conf.blk);
>  
> -for (i = 0; i < n->num_queues; i++) {
> +for (i = 0; i < n->params.num_queues; i++) {
>  if (n->sq[i] != NULL) {
>  nvme_free_sq(n->sq[i], n);
>  }
>  }
> -for (i = 0; i < n->num_queues; i++) {
> +for (i = 0; i < n->params.num_queues; i++) {
>  if (n->cq[i] != NULL) {
>  nvme_free_cq(n->cq[i], n);
>  }
> @@ -1308,7 +1310,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  int64_t bs_size;
>  uint8_t *pci_conf;
>  
> -if (!n->num_queues) {
> +if (!n->params.num_queues) {
>  error_setg(errp, "num_queues can't be zero");
>  return;
>  }
> @@ -1324,7 +1326,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  return;
>  }
>  
> -if (!n->serial) {
> +if (!n->params.serial) {
>  error_setg(errp, "serial property not set");
>  return;
>  }
> @@ -1341,25 +1343,25 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  pcie_endpoint_cap_init(pci_dev, 0x80);
>  
>  n->num_namespaces = 1;
> -n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4);
> +n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
>  n->ns_size = bs_size / (uint64_

Re: [PATCH v6 00/42] nvme: support NVMe v1.3d, SGLs and multiple namespaces

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 07:28 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Hi,
> 
> So this patchset kinda blew up in size (wrt. number of patches) after
> Maxim's comments (26 -> 42), but Maxim's comments about splitting up a
> bunch of the patches made a lot of sense.
I don't think this is bad. 
You might actually found the ultimate question of life the universe and 
everything.
;-)

Best regards,
Maxim Levitsky

> 
> v6 primarily splits up the big nasty patches into more digestible parts.
> Specifically the 'nvme: refactor prp mapping' and 'nvme: allow multiple
> aios per command' patches has been split up according to Maxim's
> comments. Most additions to the shared include/block/nvme.h has also
> been consolidated into a single patch (also according to Maxim's
> comments). A lot of the patches still carries a 'Reviewed-By', but
> git-backport-diff reports some changes due to changes/additions in some
> of the early patches.
> 
> The only real "addition" is a new "max_ioqpairs" parameter for the
> device. This is to fix some confusion about the current "num_queues"
> parameter. See "nvme: add max_ioqpairs device parameter".
> 
> Maxim, I responded to your comments in the original thread and I believe
> that all your comments has been adressed.
> 
> Also, I *did* change the line indentation style - I hope I caught 'em
> all :)
> 
> 
> Klaus Jensen (42):
>   nvme: rename trace events to nvme_dev
>   nvme: remove superfluous breaks
>   nvme: move device parameters to separate struct
>   nvme: bump spec data structures to v1.3
>   nvme: use constant for identify data size
>   nvme: add identify cns values in header
>   nvme: refactor nvme_addr_read
>   nvme: add support for the abort command
>   nvme: add max_ioqpairs device parameter
>   nvme: refactor device realization
>   nvme: add temperature threshold feature
>   nvme: add support for the get log page command
>   nvme: add support for the asynchronous event request command
>   nvme: add missing mandatory features
>   nvme: additional tracing
>   nvme: make sure ncqr and nsqr is valid
>   nvme: add log specific field to trace events
>   nvme: support identify namespace descriptor list
>   nvme: enforce valid queue creation sequence
>   nvme: provide the mandatory subnqn field
>   nvme: bump supported version to v1.3
>   nvme: memset preallocated requests structures
>   nvme: add mapping helpers
>   nvme: remove redundant has_sg member
>   nvme: refactor dma read/write
>   nvme: pass request along for tracing
>   nvme: add request mapping helper
>   nvme: verify validity of prp lists in the cmb
>   nvme: refactor request bounds checking
>   nvme: add check for mdts
>   nvme: add check for prinfo
>   nvme: allow multiple aios per command
>   nvme: use preallocated qsg/iov in nvme_dma_prp
>   pci: pass along the return value of dma_memory_rw
>   nvme: handle dma errors
>   nvme: add support for scatter gather lists
>   nvme: refactor identify active namespace id list
>   nvme: support multiple namespaces
>   pci: allocate pci id for nvme
>   nvme: change controller pci id
>   nvme: remove redundant NvmeCmd pointer parameter
>   nvme: make lba data size configurable
> 
>  MAINTAINERS|1 +
>  block/nvme.c   |   18 +-
>  docs/specs/nvme.txt|   25 +
>  docs/specs/pci-ids.txt |1 +
>  hw/block/Makefile.objs |2 +-
>  hw/block/nvme-ns.c |  162 
>  hw/block/nvme-ns.h |   62 ++
>  hw/block/nvme.c| 2041 
>  hw/block/nvme.h|  205 +++-
>  hw/block/trace-events  |  206 ++--
>  hw/core/machine.c  |1 +
>  include/block/nvme.h   |  178 +++-
>  include/hw/pci/pci.h   |4 +-
>  13 files changed, 2347 insertions(+), 559 deletions(-)
>  create mode 100644 docs/specs/nvme.txt
>  create mode 100644 hw/block/nvme-ns.c
>  create mode 100644 hw/block/nvme-ns.h
>

Re: [PATCH v5 10/26] nvme: add support for the get log page command

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:45 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 11:35, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Add support for the Get Log Page command and basic implementations of
> > > the mandatory Error Information, SMART / Health Information and Firmware
> > > Slot Information log pages.
> > > 
> > > In violation of the specification, the SMART / Health Information log
> > > page does not persist information over the lifetime of the controller
> > > because the device has no place to store such persistent state.
> > 
> > Yea, not the end of the world.
> > > 
> > > Note that the LPA field in the Identify Controller data structure
> > > intentionally has bit 0 cleared because there is no namespace specific
> > > information in the SMART / Health information log page.
> > 
> > Makes sense.
> > > 
> > > Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1,
> > > Section 5.10 ("Get Log Page command").
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 122 +-
> > >  hw/block/nvme.h   |  10 
> > >  hw/block/trace-events |   2 +
> > >  include/block/nvme.h  |   2 +-
> > >  4 files changed, 134 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f72348344832..468c36918042 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -569,6 +569,123 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd 
> > > *cmd)
> > >  return NVME_SUCCESS;
> > >  }
> > >  
> > > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > > buf_len,
> > > +uint64_t off, NvmeRequest *req)
> > > +{
> > > +uint64_t prp1 = le64_to_cpu(cmd->prp1);
> > > +uint64_t prp2 = le64_to_cpu(cmd->prp2);
> > > +uint32_t nsid = le32_to_cpu(cmd->nsid);
> > > +
> > > +uint32_t trans_len;
> > > +time_t current_ms;
> > > +uint64_t units_read = 0, units_written = 0, read_commands = 0,
> > > +write_commands = 0;
> > > +NvmeSmartLog smart;
> > > +BlockAcctStats *s;
> > > +
> > > +if (nsid && nsid != 0x) {
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +s = blk_get_stats(n->conf.blk);
> > > +
> > > +units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > > +units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > > +read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > > +write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > > +
> > > +if (off > sizeof(smart)) {
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +trans_len = MIN(sizeof(smart) - off, buf_len);
> > > +
> > > +memset(&smart, 0x0, sizeof(smart));
> > > +
> > > +smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> > > +smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> > > +smart.host_read_commands[0] = cpu_to_le64(read_commands);
> > > +smart.host_write_commands[0] = cpu_to_le64(write_commands);
> > > +
> > > +smart.temperature[0] = n->temperature & 0xff;
> > > +smart.temperature[1] = (n->temperature >> 8) & 0xff;
> > > +
> > > +if ((n->temperature > n->features.temp_thresh_hi) ||
> > > +(n->temperature < n->features.temp_thresh_low)) {
> > > +smart.critical_warning |= NVME_SMART_TEMPERATURE;
> > > +}
> > > +
> > > +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > +smart.power_on_hours[0] = cpu_to_le64(
> > > +(((current_ms - n->starttime_ms) / 1000) / 60) / 60);
> > > +
> > > +return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, 
> > > prp1,
> > > +prp2);
> > > +}
> > 
> > Looks OK.
> > > +
> > > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > > buf_len,
> > > +uint64_t off, NvmeRequest *req)
> > > +{
> > > +uint32_t trans_len;
> > > +uint64_t

Re: [PATCH v5 21/26] nvme: add support for scatter gather lists

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:54 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 14:07, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > For now, support the Data Block, Segment and Last Segment descriptor
> > > types.
> > > 
> > > See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)").
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Acked-by: Fam Zheng 
> > > ---
> > >  block/nvme.c  |  18 +-
> > >  hw/block/nvme.c   | 375 +++---
> > >  hw/block/trace-events |   4 +
> > >  include/block/nvme.h  |  62 ++-
> > >  4 files changed, 389 insertions(+), 70 deletions(-)
> > > 
> > > diff --git a/block/nvme.c b/block/nvme.c
> > > index d41c4bda6e39..521f521054d5 100644
> > > --- a/block/nvme.c
> > > +++ b/block/nvme.c
> > > @@ -446,7 +446,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> > > namespace, Error **errp)
> > >  error_setg(errp, "Cannot map buffer for DMA");
> > >  goto out;
> > >  }
> > > -cmd.prp1 = cpu_to_le64(iova);
> > > +cmd.dptr.prp.prp1 = cpu_to_le64(iova);
> > >  
> > >  if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
> > >  error_setg(errp, "Failed to identify controller");
> > > @@ -545,7 +545,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, 
> > > Error **errp)
> > >  }
> > >  cmd = (NvmeCmd) {
> > >  .opcode = NVME_ADM_CMD_CREATE_CQ,
> > > -.prp1 = cpu_to_le64(q->cq.iova),
> > > +.dptr.prp.prp1 = cpu_to_le64(q->cq.iova),
> > >  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
> > >  .cdw11 = cpu_to_le32(0x3),
> > >  };
> > > @@ -556,7 +556,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, 
> > > Error **errp)
> > >  }
> > >  cmd = (NvmeCmd) {
> > >  .opcode = NVME_ADM_CMD_CREATE_SQ,
> > > -.prp1 = cpu_to_le64(q->sq.iova),
> > > +.dptr.prp.prp1 = cpu_to_le64(q->sq.iova),
> > >  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
> > >  .cdw11 = cpu_to_le32(0x1 | (n << 16)),
> > >  };
> > > @@ -906,16 +906,16 @@ try_map:
> > >  case 0:
> > >  abort();
> > >  case 1:
> > > -cmd->prp1 = pagelist[0];
> > > -cmd->prp2 = 0;
> > > +cmd->dptr.prp.prp1 = pagelist[0];
> > > +cmd->dptr.prp.prp2 = 0;
> > >  break;
> > >  case 2:
> > > -cmd->prp1 = pagelist[0];
> > > -cmd->prp2 = pagelist[1];
> > > +cmd->dptr.prp.prp1 = pagelist[0];
> > > +cmd->dptr.prp.prp2 = pagelist[1];
> > >  break;
> > >  default:
> > > -cmd->prp1 = pagelist[0];
> > > -cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t));
> > > +cmd->dptr.prp.prp1 = pagelist[0];
> > > +cmd->dptr.prp.prp2 = cpu_to_le64(req->prp_list_iova + 
> > > sizeof(uint64_t));
> > >  break;
> > >  }
> > >  trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries);
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 204ae1d33234..a91c60fdc111 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -75,8 +75,10 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, 
> > > hwaddr addr)
> > >  
> > >  static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > >  {
> > > -if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > > -memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size);
> > > +hwaddr hi = addr + size;
> > 
> > Are you sure you don't want to check for overflow here?
> > Its theoretical issue since addr has to be almost full 64 bit
> > but still for those things I check this very defensively.
> > 
> 
> The use of nvme_addr_read in map_prp simply cannot overflow due to how
> the size is calculated, but for SGLs it's different. But the overflow is
> checked in map_sgl because we have to return a special error code in
> that case.
> 
> On the other hand there may be other callers of nvme_add

Re: [PATCH v5 22/26] nvme: support multiple namespaces

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:55 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 14:34, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > This adds support for multiple namespaces by introducing a new 'nvme-ns'
> > > device model. The nvme device creates a bus named from the device name
> > > ('id'). The nvme-ns devices then connect to this and registers
> > > themselves with the nvme device.
> > > 
> > > This changes how an nvme device is created. Example with two namespaces:
> > > 
> > >   -drive file=nvme0n1.img,if=none,id=disk1
> > >   -drive file=nvme0n2.img,if=none,id=disk2
> > >   -device nvme,serial=deadbeef,id=nvme0
> > >   -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
> > >   -device nvme-ns,drive=disk2,bus=nvme0,nsid=2
> > > 
> > > The drive property is kept on the nvme device to keep the change
> > > backward compatible, but the property is now optional. Specifying a
> > > drive for the nvme device will always create the namespace with nsid 1.
> > 
> > Very reasonable way to do it. 
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/Makefile.objs |   2 +-
> > >  hw/block/nvme-ns.c | 158 +++
> > >  hw/block/nvme-ns.h |  60 +++
> > >  hw/block/nvme.c| 235 +
> > >  hw/block/nvme.h|  47 -
> > >  hw/block/trace-events  |   6 +-
> > >  6 files changed, 389 insertions(+), 119 deletions(-)
> > >  create mode 100644 hw/block/nvme-ns.c
> > >  create mode 100644 hw/block/nvme-ns.h
> > > 
> > > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
> > > index 28c2495a00dc..45f463462f1e 100644
> > > --- a/hw/block/Makefile.objs
> > > +++ b/hw/block/Makefile.objs
> > > @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
> > >  common-obj-$(CONFIG_XEN) += xen-block.o
> > >  common-obj-$(CONFIG_ECC) += ecc.o
> > >  common-obj-$(CONFIG_ONENAND) += onenand.o
> > > -common-obj-$(CONFIG_NVME_PCI) += nvme.o
> > > +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
> > >  common-obj-$(CONFIG_SWIM) += swim.o
> > >  
> > >  obj-$(CONFIG_SH4) += tc58128.o
> > > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> > > new file mode 100644
> > > index ..0e5be44486f4
> > > --- /dev/null
> > > +++ b/hw/block/nvme-ns.c
> > > @@ -0,0 +1,158 @@
> > > +#include "qemu/osdep.h"
> > > +#include "qemu/units.h"
> > > +#include "qemu/cutils.h"
> > > +#include "qemu/log.h"
> > > +#include "hw/block/block.h"
> > > +#include "hw/pci/msix.h"
> > 
> > Do you need this include?
> 
> No, I needed hw/pci/pci.h instead :)
I think it compiled without that include,
but including pci.h for  a pci device a a right thing
anyway.

> 
> > > +#include "sysemu/sysemu.h"
> > > +#include "sysemu/block-backend.h"
> > > +#include "qapi/error.h"
> > > +
> > > +#include "hw/qdev-properties.h"
> > > +#include "hw/qdev-core.h"
> > > +
> > > +#include "nvme.h"
> > > +#include "nvme-ns.h"
> > > +
> > > +static int nvme_ns_init(NvmeNamespace *ns)
> > > +{
> > > +NvmeIdNs *id_ns = &ns->id_ns;
> > > +
> > > +id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > > +id_ns->nuse = id_ns->ncap = id_ns->nsze =
> > > +cpu_to_le64(nvme_ns_nlbas(ns));
> > 
> > Nitpick: To be honest I don't really like that chain assignment, 
> > especially since it forces to wrap the line, but that is just my
> > personal taste.
> 
> Fixed, and also added a comment as to why they are the same.
> 
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl 
> > > *id,
> > > +Error **errp)
> > > +{
> > > +uint64_t perm, shared_perm;
> > > +
> > > +Error *local_err = NULL;
> > > +int ret;
> > > +
> > > +perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
> > > +shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
> > > +BLK_PERM_GRAPH

Re: [PATCH v5 15/26] nvme: bump supported specification to 1.3

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:50 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 12:35, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Add new fields to the Identify Controller and Identify Namespace data
> > > structures accoding to NVM Express 1.3d.
> > > 
> > > NVM Express 1.3d requires the following additional features:
> > >   - addition of the Namespace Identification Descriptor List (CNS 03h)
> > > for the Identify command
> > >   - support for returning Command Sequence Error if a Set Features
> > > command is submitted for the Number of Queues feature after any I/O
> > > queues have been created.
> > >   - The addition of the Log Specific Field (LSP) in the Get Log Page
> > > command.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 57 ---
> > >  hw/block/nvme.h   |  1 +
> > >  hw/block/trace-events |  3 ++-
> > >  include/block/nvme.h  | 20 ++-
> > >  4 files changed, 71 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 900732bb2f38..4acfc85b56a2 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -9,7 +9,7 @@
> > >   */
> > >  
> > >  /**
> > > - * Reference Specification: NVM Express 1.2.1
> > > + * Reference Specification: NVM Express 1.3d
> > >   *
> > >   *   https://nvmexpress.org/resources/specifications/
> > >   */
> > > @@ -43,7 +43,7 @@
> > >  #include "trace.h"
> > >  #include "nvme.h"
> > >  
> > > -#define NVME_SPEC_VER 0x00010201
> > > +#define NVME_SPEC_VER 0x00010300
> > >  #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
> > >  #define NVME_TEMPERATURE 0x143
> > >  #define NVME_TEMPERATURE_WARNING 0x157
> > > @@ -735,6 +735,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd 
> > > *cmd, NvmeRequest *req)
> > >  uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> > >  uint32_t dw13 = le32_to_cpu(cmd->cdw13);
> > >  uint8_t  lid = dw10 & 0xff;
> > > +uint8_t  lsp = (dw10 >> 8) & 0xf;
> > >  uint8_t  rae = (dw10 >> 15) & 0x1;
> > >  uint32_t numdl, numdu;
> > >  uint64_t off, lpol, lpou;
> > > @@ -752,7 +753,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd 
> > > *cmd, NvmeRequest *req)
> > >  return NVME_INVALID_FIELD | NVME_DNR;
> > >  }
> > >  
> > > -trace_nvme_dev_get_log(nvme_cid(req), lid, rae, len, off);
> > > +trace_nvme_dev_get_log(nvme_cid(req), lid, lsp, rae, len, off);
> > >  
> > >  switch (lid) {
> > >  case NVME_LOG_ERROR_INFO:
> > > @@ -863,6 +864,8 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd 
> > > *cmd)
> > >  cq = g_malloc0(sizeof(*cq));
> > >  nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
> > >  NVME_CQ_FLAGS_IEN(qflags));
> > 
> > Code alignment on that '('
> > > +
> > > +n->qs_created = true;
> > 
> > Should be done also at nvme_create_sq
> 
> No, because you can't create a SQ without a matching CQ:
True, I missed that.

> 
> if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
> trace_nvme_dev_err_invalid_create_sq_cqid(cqid);
> return NVME_INVALID_CQID | NVME_DNR;
> }
> 
> 
> So if there is a matching cq, then qs_created = true.
> 
> > >  return NVME_SUCCESS;
> > >  }
> > >  
> > > @@ -924,6 +927,47 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, 
> > > NvmeIdentify *c)
> > >  return ret;
> > >  }
> > >  
> > > +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *c)
> > > +{
> > > +static const int len = 4096;
> > 
> > The spec caps the Identify payload size to 4K,
> > thus this should go to nvme.h
> 
> Done.
> 
> > > +
> > > +struct ns_descr {
> > > +uint8_t nidt;
> > > +uint8_t nidl;
> > > +uint8_t rsvd2[2];
> > > +uint8_t nid[16];
> > > +};
> > 
> > This is also part of the spec, thus should
> > move to nvme.h
> > 
> 
> Done - and cleaned up.
Perfect, thanks!
> 
> > > +
> > > +uint32_t nsid =

Re: [PATCH v5 16/26] nvme: refactor prp mapping

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:51 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:44, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Refactor nvme_map_prp and allow PRPs to be located in the CMB. The logic
> > > ensures that if some of the PRP is in the CMB, all of it must be located
> > > there, as per the specification.
> > 
> > To be honest this looks like not refactoring but a bugfix
> > (old code was just assuming that if first prp entry is in cmb, the rest 
> > also is)
> 
> I split it up into a separate bugfix patch.
> 
> > > 
> > > Also combine nvme_dma_{read,write}_prp into a single nvme_dma_prp that
> > > takes an additional DMADirection parameter.
> > 
> > To be honest 'nvme_dma_prp' was not a clear function name to me at first 
> > glance.
> > Could you rename this to nvme_dma_prp_rw or so? (Although even that is 
> > somewhat unclear
> > to convey the meaning of read/write the data to/from the guest memory areas 
> > defined by the prp list.
> > Also could you split this change into a new patch?
> > 
> 
> Splitting into new patch.
> 
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Signed-off-by: Klaus Jensen 
> > 
> > Now you even use your both addresses :-)
> > 
> > > ---
> > >  hw/block/nvme.c   | 245 +++---
> > >  hw/block/nvme.h   |   2 +-
> > >  hw/block/trace-events |   1 +
> > >  include/block/nvme.h  |   1 +
> > >  4 files changed, 160 insertions(+), 89 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 4acfc85b56a2..334265efb21e 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -58,6 +58,11 @@
> > >  
> > >  static void nvme_process_sq(void *opaque);
> > >  
> > > +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> > > +{
> > > +return &n->cmbuf[addr - n->ctrl_mem.addr];
> > > +}
> > 
> > To my taste I would put this together with the patch that
> > added nvme_addr_is_cmb. I know that some people are against
> > this citing the fact that you should use the code you add
> > in the same patch. Your call.
> > 
> > Regardless of this I also prefer to put refactoring patches first in the 
> > series.
Thanks!
> > 
> > > +
> > >  static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> > >  {
> > >  hwaddr low = n->ctrl_mem.addr;
> > > @@ -152,138 +157,187 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
> > > NvmeCQueue *cq)
> > >  }
> > >  }
> > >  
> > > -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, 
> > > uint64_t prp1,
> > > - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> > > +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector 
> > > *iov,
> > > +uint64_t prp1, uint64_t prp2, uint32_t len, NvmeRequest *req)
> > 
> > Split line alignment (it was correct before).
> > Also while at the refactoring, it would be great to add some documentation
> > to this and few more functions, since its not clear immediately what this 
> > does.
> > 
> > 
> > >  {
> > >  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
> > >  trans_len = MIN(len, trans_len);
> > >  int num_prps = (len >> n->page_bits) + 1;
> > > +uint16_t status = NVME_SUCCESS;
> > > +bool is_cmb = false;
> > > +bool prp_list_in_cmb = false;
> > > +
> > > +trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > > +prp1, prp2, num_prps);
> > >  
> > >  if (unlikely(!prp1)) {
> > >  trace_nvme_dev_err_invalid_prp();
> > >  return NVME_INVALID_FIELD | NVME_DNR;
> > > -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> > > -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) 
> > > {
> > > -qsg->nsg = 0;
> > > +}
> > > +
> > > +if (nvme_addr_is_cmb(n, prp1)) {
> > > +is_cmb = true;
> > > +
> > >  qemu_iovec_init(iov, num_prps);
> > > -qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr

Re: [PATCH v5 14/26] nvme: make sure ncqr and nsqr is valid

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:48 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 12:30, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > 0x is not an allowed value for NCQR and NSQR in Set Features on
> > > Number of Queues.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c | 4 
> > >  1 file changed, 4 insertions(+)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 30c5b3e7a67d..900732bb2f38 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -1133,6 +1133,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
> > >  break;
> > >  case NVME_NUMBER_OF_QUEUES:
> > > +if ((dw11 & 0x) == 0x || ((dw11 >> 16) & 0x) == 
> > > 0x) {
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > 
> > Very minor nitpick: since this spec requirement is not obvious, a 
> > quote/reference to the spec
> > would be nice to have here. 
> > 
> 
> Added.
Thanks!
> 
> > > +
> > >  trace_nvme_dev_setfeat_numq((dw11 & 0x) + 1,
> > >  ((dw11 >> 16) & 0x) + 1, n->params.num_queues - 1,
> > >  n->params.num_queues - 1);
> > 
> > Reviewed-by: Maxim Levitsky 
> > 
> > Best regards,
> > Maxim Levitsky
> > 
> 
> 

Best regards,
Maxim Levitsky

Re: [PATCH v5 17/26] nvme: allow multiple aios per command

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:48, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > This refactors how the device issues asynchronous block backend
> > > requests. The NvmeRequest now holds a queue of NvmeAIOs that are
> > > associated with the command. This allows multiple aios to be issued for
> > > a command. Only when all requests have been completed will the device
> > > post a completion queue entry.
> > > 
> > > Because the device is currently guaranteed to only issue a single aio
> > > request per command, the benefit is not immediately obvious. But this
> > > functionality is required to support metadata, the dataset management
> > > command and other features.
> > 
> > I don't know what the strategy will be chosen for supporting metadata
> > (qemu doesn't have any notion of metadata in the block layer), but for 
> > dataset management
> > you are right. Dataset management command can contain a table of areas to 
> > discard
> > (although in reality I have seen no driver putting there more that one 
> > entry).
> > 
> 
> The strategy is different depending on how the metadata is transferred
> between host and device. For the "separate buffer" case, metadata is
> transferred using a separate memory pointer in the nvme command (MPTR).
> In this case the metadata is kept separately on a new blockdev attached
> to the namespace.
Looks reasonable.
> 


> In the other case, metadata is transferred as part of an extended lba
> (say 512 + 8 bytes) and kept inline on the main namespace blockdev. This
> is challenging for QEMU as it breaks interoperability of the image with
> other devices. But that is a discussion for fresh RFC ;)

Yes, this one is quite problemetic. IMHO even the kernel opted out to not
support this kind of metadata (I know that since I played with one of Intel's 
enterprise
SSDs when I developed nvme-mdev, and sadly this is the only kind of metadata it 
supports).
I guess if we have to support this format (for the sake of making our nvme 
virtual device
as feature complete as possible for driver development), I would emulate this 
with a
separate drive as well.

> 
> Note that the support for multiple AIOs is also used for DULBE support
This is a typo? I don't recall something like that from the spec.

> down the line when I get around to posting those patches. So this is
> preparatory for a lot of features that requires persistant state across
> device power off.
All right. Thanks again for your work. I wish I had all these features
when I developed nvme-mdev, it would make my life much easier.

> 
> > 
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 449 +-
> > >  hw/block/nvme.h   | 134 +++--
> > >  hw/block/trace-events |   8 +
> > >  3 files changed, 480 insertions(+), 111 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 334265efb21e..e97da35c4ca1 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -19,7 +19,8 @@
> > >   *  -drive file=,if=none,id=
> > >   *  -device nvme,drive=,serial=,id=, 
> > > \
> > >   *  cmb_size_mb=, \
> > > - *  num_queues=
> > > + *  num_queues=, \
> > > + *  mdts=
> > 
> > Could you split mdts checks into a separate patch? This is not related to 
> > the series.
> 
> Absolutely. Done.
Perfect, thanks!
> 
> > 
> > >   *
> > >   * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
> > >   * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
> > > @@ -57,6 +58,7 @@
> > >  } while (0)
> > >  
> > >  static void nvme_process_sq(void *opaque);
> > > +static void nvme_aio_cb(void *opaque, int ret);
> > >  
> > >  static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> > >  {
> > > @@ -341,6 +343,107 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t 
> > > *ptr, uint32_t len,
> > >  return status;
> > >  }
> > >  
> > > +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > > +{
> > > +NvmeNamespace *ns = req->ns;
> > > +
> > > +uint32_t len = req->nlb << nvme_ns_lbads(ns);
> > > +uint64_t prp1 = le64_to_cpu(cm

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:52, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > Handling DMA errors gracefully is required for the device to pass the
> > > block/011 test ("disable PCI device while doing I/O") in the blktests
> > > suite.
> > > 
> > > With this patch the device passes the test by retrying "critical"
> > > transfers (posting of completion entries and processing of submission
> > > queue entries).
> > > 
> > > If DMA errors occur at any other point in the execution of the command
> > > (say, while mapping the PRPs), the command is aborted with a Data
> > > Transfer Error status code.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 42 +-
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  2 +-
> > >  3 files changed, 36 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f8c81b9e2202..204ae1d33234 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, 
> > > hwaddr addr)
> > >  return addr >= low && addr < hi;
> > >  }
> > >  
> > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > >  {
> > >  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > >  memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size);
> > > -return;
> > > +return 0;
> > >  }
> > >  
> > > -pci_dma_read(&n->parent_obj, addr, buf, size);
> > > +return pci_dma_read(&n->parent_obj, addr, buf, size);
> > >  }
> > >  
> > >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  uint16_t status = NVME_SUCCESS;
> > >  bool is_cmb = false;
> > >  bool prp_list_in_cmb = false;
> > > +int ret;
> > >  
> > >  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > >  prp1, prp2, num_prps);
> > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp2);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  while (len != 0) {
> > >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> > >  
> > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  i = 0;
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * 
> > > sizeof(uint64_t);
> > > -nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > > prp_trans);
> > > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > > +prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp_ent);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  prp_ent = le64_to_cpu(prp_list[i]);
> > >  }
> > >  
> > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> > >  NvmeCQueue *cq = opaque;
> > >  NvmeCtrl *n = cq->ctrl;
> > >  NvmeRequest *req, *next;
> > > +int ret;
> > >

Re: [PATCH v5 10/26] nvme: add support for the get log page command

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:45 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 11:35, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Add support for the Get Log Page command and basic implementations of
> > > the mandatory Error Information, SMART / Health Information and Firmware
> > > Slot Information log pages.
> > > 
> > > In violation of the specification, the SMART / Health Information log
> > > page does not persist information over the lifetime of the controller
> > > because the device has no place to store such persistent state.
> > 
> > Yea, not the end of the world.
> > > 
> > > Note that the LPA field in the Identify Controller data structure
> > > intentionally has bit 0 cleared because there is no namespace specific
> > > information in the SMART / Health information log page.
> > 
> > Makes sense.
> > > 
> > > Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1,
> > > Section 5.10 ("Get Log Page command").
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 122 +-
> > >  hw/block/nvme.h   |  10 
> > >  hw/block/trace-events |   2 +
> > >  include/block/nvme.h  |   2 +-
> > >  4 files changed, 134 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f72348344832..468c36918042 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -569,6 +569,123 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd 
> > > *cmd)
> > >  return NVME_SUCCESS;
> > >  }
> > >  
> > > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > > buf_len,
> > > +uint64_t off, NvmeRequest *req)
> > > +{
> > > +uint64_t prp1 = le64_to_cpu(cmd->prp1);
> > > +uint64_t prp2 = le64_to_cpu(cmd->prp2);
> > > +uint32_t nsid = le32_to_cpu(cmd->nsid);
> > > +
> > > +uint32_t trans_len;
> > > +time_t current_ms;
> > > +uint64_t units_read = 0, units_written = 0, read_commands = 0,
> > > +write_commands = 0;
> > > +NvmeSmartLog smart;
> > > +BlockAcctStats *s;
> > > +
> > > +if (nsid && nsid != 0x) {
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +s = blk_get_stats(n->conf.blk);
> > > +
> > > +units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > > +units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > > +read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > > +write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > > +
> > > +if (off > sizeof(smart)) {
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +trans_len = MIN(sizeof(smart) - off, buf_len);
> > > +
> > > +memset(&smart, 0x0, sizeof(smart));
> > > +
> > > +smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> > > +smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> > > +smart.host_read_commands[0] = cpu_to_le64(read_commands);
> > > +smart.host_write_commands[0] = cpu_to_le64(write_commands);
> > > +
> > > +smart.temperature[0] = n->temperature & 0xff;
> > > +smart.temperature[1] = (n->temperature >> 8) & 0xff;
> > > +
> > > +if ((n->temperature > n->features.temp_thresh_hi) ||
> > > +(n->temperature < n->features.temp_thresh_low)) {
> > > +smart.critical_warning |= NVME_SMART_TEMPERATURE;
> > > +}
> > > +
> > > +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > +smart.power_on_hours[0] = cpu_to_le64(
> > > +(((current_ms - n->starttime_ms) / 1000) / 60) / 60);
> > > +
> > > +return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, 
> > > prp1,
> > > +prp2);
> > > +}
> > 
> > Looks OK.
> > > +
> > > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > > buf_len,
> > > +uint64_t off, NvmeRequest *req)
> > > +{
> > > +uint32_t trans_len;
> > > +uint64_t

Re: [PATCH v5 12/26] nvme: add missing mandatory features

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:47 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 12:27, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Add support for returning a resonable response to Get/Set Features of
> > > mandatory features.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 57 ---
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  3 ++-
> > >  3 files changed, 58 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index a186d95df020..3267ee2de47a 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -1008,7 +1008,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >  uint32_t result;
> > >  
> > > +trace_nvme_dev_getfeat(nvme_cid(req), dw10);
> > > +
> > >  switch (dw10) {
> > > +case NVME_ARBITRATION:
> > > +result = cpu_to_le32(n->features.arbitration);
> > > +break;
> > > +case NVME_POWER_MANAGEMENT:
> > > +result = cpu_to_le32(n->features.power_mgmt);
> > > +break;
> > >  case NVME_TEMPERATURE_THRESHOLD:
> > >  result = 0;
> > >  
> > > @@ -1029,6 +1037,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  break;
> > >  }
> > >  
> > > +break;
> > > +case NVME_ERROR_RECOVERY:
> > > +result = cpu_to_le32(n->features.err_rec);
> > >  break;
> > >  case NVME_VOLATILE_WRITE_CACHE:
> > >  result = blk_enable_write_cache(n->conf.blk);
> > 
> > This is existing code but still like to point out that endianess conversion 
> > is missing.
> 
> Fixed.
> 
> > Also we need to think if we need to do some flush if the write cache is 
> > disabled.
> > I don't know yet that area well enough.
> > 
> 
> Looking at the block layer code it just sets a flag when disabling, but
> subsequent requests will have BDRV_REQ_FUA set. So to make sure that
> stuff in the cache is flushed, let's do a flush.
Good to know!

> 
> > > @@ -1041,6 +1052,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  break;
> > >  case NVME_TIMESTAMP:
> > >  return nvme_get_feature_timestamp(n, cmd);
> > > +case NVME_INTERRUPT_COALESCING:
> > > +result = cpu_to_le32(n->features.int_coalescing);
> > > +break;
> > > +case NVME_INTERRUPT_VECTOR_CONF:
> > > +if ((dw11 & 0x) > n->params.num_queues) {
> > 
> > Looks like it should be >= since interrupt vector is not zero based.
> 
> Fixed in other patch.
> 
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +result = cpu_to_le32(n->features.int_vector_config[dw11 & 
> > > 0x]);
> > > +break;
> > > +case NVME_WRITE_ATOMICITY:
> > > +result = cpu_to_le32(n->features.write_atomicity);
> > > +break;
> > >  case NVME_ASYNCHRONOUS_EVENT_CONF:
> > >  result = cpu_to_le32(n->features.async_config);
> > >  break;
> > > @@ -1076,6 +1100,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >  
> > > +trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11);
> > > +
> > >  switch (dw10) {
> > >  case NVME_TEMPERATURE_THRESHOLD:
> > >  if (NVME_TEMP_TMPSEL(dw11)) {
> > > @@ -1116,6 +1142,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  case NVME_ASYNCHRONOUS_EVENT_CONF:
> > >  n->features.async_config = dw11;
> > >  break;
> > > +case NVME_ARBITRATION:
> > > +case NVME_POWER_MANAGEMENT:
> > > +case NVME_ERROR_RECOVERY:
> > > +case NVME_INTERRUPT_COALESCING:
> > > +case NVME_INTERRUPT_VECTOR_CONF:
> > > +

Re: [PATCH v5 09/26] nvme: add temperature threshold feature

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:44 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 11:31, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > It might seem wierd to implement this feature for an emulated device,
> > > but it is mandatory to support and the feature is useful for testing
> > > asynchronous event request support, which will be added in a later
> > > patch.
> > 
> > Absolutely but as the old saying is, rules are rules.
> > At least, to the defense of the spec, making this mandatory
> > forced the vendors to actually report some statistics about
> > the device in neutral format as opposed to yet another
> > vendor proprietary thing (I am talking about SMART log page).
> > 
> > > 
> > > Signed-off-by: Klaus Jensen 
> > 
> > I noticed that you sign off some patches with your @samsung.com email,
> > and some with @cnexlabs.com
> > Is there a reason for that?
> 
> Yeah. Some of this code was made while I was at CNEX Labs. I've since
> moved to Samsung. But credit where credit's due.
I suspected something like that, but I just wanted to be sure that this is 
intentional,
and it looks all right to me now.

> 
> > 
> > 
> > > ---
> > >  hw/block/nvme.c  | 50 
> > >  hw/block/nvme.h  |  2 ++
> > >  include/block/nvme.h |  7 ++-
> > >  3 files changed, 58 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 81514eaef63a..f72348344832 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -45,6 +45,9 @@
> > >  
> > >  #define NVME_SPEC_VER 0x00010201
> > >  #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
> > > +#define NVME_TEMPERATURE 0x143
> > > +#define NVME_TEMPERATURE_WARNING 0x157
> > > +#define NVME_TEMPERATURE_CRITICAL 0x175
> > >  
> > >  #define NVME_GUEST_ERR(trace, fmt, ...) \
> > >  do { \
> > > @@ -798,9 +801,31 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl 
> > > *n, NvmeCmd *cmd)
> > >  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest 
> > > *req)
> > >  {
> > >  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > > +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >  uint32_t result;
> > >  
> > >  switch (dw10) {
> > > +case NVME_TEMPERATURE_THRESHOLD:
> > > +result = 0;
> > > +
> > > +/*
> > > + * The controller only implements the Composite Temperature 
> > > sensor, so
> > > + * return 0 for all other sensors.
> > > + */
> > > +if (NVME_TEMP_TMPSEL(dw11)) {
> > > +break;
> > > +}
> > > +
> > > +switch (NVME_TEMP_THSEL(dw11)) {
> > > +case 0x0:
> > > +result = cpu_to_le16(n->features.temp_thresh_hi);
> > > +break;
> > > +case 0x1:
> > > +result = cpu_to_le16(n->features.temp_thresh_low);
> > > +break;
> > > +}
> > > +
> > > +break;
> > >  case NVME_VOLATILE_WRITE_CACHE:
> > >  result = blk_enable_write_cache(n->conf.blk);
> > >  trace_nvme_dev_getfeat_vwcache(result ? "enabled" : "disabled");
> > > @@ -845,6 +870,23 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
> > > NvmeCmd *cmd, NvmeRequest *req)
> > >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >  
> > >  switch (dw10) {
> > > +case NVME_TEMPERATURE_THRESHOLD:
> > > +if (NVME_TEMP_TMPSEL(dw11)) {
> > > +break;
> > > +}
> > > +
> > > +switch (NVME_TEMP_THSEL(dw11)) {
> > > +case 0x0:
> > > +n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
> > > +break;
> > > +case 0x1:
> > > +n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
> > > +break;
> > > +default:
> > > +return NVME_INVALID_FIELD | NVME_DNR;
> > > +}
> > > +
> > > +break;
> > >  case NVME_VOLATILE_WRITE_CACHE:
> > >  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
> > >  break;
> > > @@ -1366,6 +1408,9 @@

Re: [PATCH v5 08/26] nvme: refactor device realization

2020-03-25 Thread Maxim Levitsky

On Mon, 2020-03-16 at 00:43 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 11:27, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > This patch splits up nvme_realize into multiple individual functions,
> > > each initializing a different subset of the device.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c | 175 +++-
> > >  hw/block/nvme.h |  21 ++
> > >  2 files changed, 133 insertions(+), 63 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index e1810260d40b..81514eaef63a 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -44,6 +44,7 @@
> > >  #include "nvme.h"
> > >  
> > >  #define NVME_SPEC_VER 0x00010201
> > > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
> > >  
> > >  #define NVME_GUEST_ERR(trace, fmt, ...) \
> > >  do { \
> > > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = {
> > >  },
> > >  };
> > >  
> > > -static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
> > >  {
> > > -NvmeCtrl *n = NVME(pci_dev);
> > > -NvmeIdCtrl *id = &n->id_ctrl;
> > > -
> > > -int i;
> > > -int64_t bs_size;
> > > -uint8_t *pci_conf;
> > > -
> > > -if (!n->params.num_queues) {
> > > -error_setg(errp, "num_queues can't be zero");
> > > -return;
> > > -}
> > > +NvmeParams *params = &n->params;
> > >  
> > >  if (!n->conf.blk) {
> > > -error_setg(errp, "drive property not set");
> > > -return;
> > > +error_setg(errp, "nvme: block backend not configured");
> > > +return 1;
> > 
> > As a matter of taste, negative values indicate error, and 0 is the success 
> > value.
> > In Linux kernel this is even an official rule.
> > >  }
> 
> Fixed.
> 
> > >  
> > > -bs_size = blk_getlength(n->conf.blk);
> > > -if (bs_size < 0) {
> > > -error_setg(errp, "could not get backing file size");
> > > -return;
> > > +if (!params->serial) {
> > > +error_setg(errp, "nvme: serial not configured");
> > > +return 1;
> > >  }
> > >  
> > > -if (!n->params.serial) {
> > > -error_setg(errp, "serial property not set");
> > > -return;
> > > +if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
> > > +error_setg(errp, "nvme: invalid queue configuration");
> > 
> > Maybe something like "nvme: invalid queue count specified, should be 
> > between 1 and ..."?
> > > +return 1;
> > >  }
> 
> Fixed.
Thanks
> 
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +static int nvme_init_blk(NvmeCtrl *n, Error **errp)
> > > +{
> > >  blkconf_blocksizes(&n->conf);
> > >  if (!blkconf_apply_backend_options(&n->conf, 
> > > blk_is_read_only(n->conf.blk),
> > > -   false, errp)) {
> > > -return;
> > > +false, errp)) {
> > > +return 1;
> > >  }
> > >  
> > > -pci_conf = pci_dev->config;
> > > -pci_conf[PCI_INTERRUPT_PIN] = 1;
> > > -pci_config_set_prog_interface(pci_dev->config, 0x2);
> > > -pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
> > > -pcie_endpoint_cap_init(pci_dev, 0x80);
> > > +return 0;
> > > +}
> > >  
> > > +static void nvme_init_state(NvmeCtrl *n)
> > > +{
> > >  n->num_namespaces = 1;
> > >  n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
> > 
> > Isn't that wrong?
> > First 4K of mmio (0x1000) is the registers, and that is followed by the 
> > doorbells,
> > and each doorbell takes 8 bytes (assuming regular doorbell stride).
> > so n->params.num_queues + 1 should be total number of queues, thus the 
> > 0x1004 should be 0x1000 IMHO.
> > I might miss some r

Re: [PATCH v2 00/14] LUKS: encryption slot management using amend interface

2020-03-12 Thread Maxim Levitsky

On Thu, 2020-03-12 at 06:56 -0500, Eric Blake wrote:
> On 3/8/20 10:18 AM, Maxim Levitsky wrote:
> > Hi!
> > Here is the updated series of my patches, incorporating all the feedback I 
> > received.
> > 
> > Patches are strictly divided by topic to 3 groups, and each group depends 
> > on former groups.
> > 
> > * Patches 1,2 implement qcrypto generic amend interface, including 
> > definition
> >of structs used in crypto.json and implement this in luks crypto driver
> >Nothing is exposed to the user at this stage
> > 
> > * Patches 3-9 use the code from patches 1,2 to implement qemu-img amend 
> > based encryption slot management
> >for luks and for qcow2, and add a bunch of iotests to cover that.
> > 
> > * Patches 10-13 add x-blockdev-amend (I'll drop the -x prefix if you like), 
> > and wire it
> >to luks and qcow2 driver to implement qmp based encryption slot 
> > management also using
> >the code from patches 1,2, and also add a bunch of iotests to cover this.
> >   tests/qemu-iotests/284.out   |   6 +-
> >   tests/qemu-iotests/300   | 207 
> 
> Any reason why you skipped straight to test 300, rather than using an 
> available slot like 290?  (Admittedly, our process for reserving slots 
> is not very high-tech: manually scan the list for what other patches out 
> there have claimed a slot, and be prepared to renumber when rebasing)
The only reason I used these slots is that I know sadly that I'll have to 
resend and
rebase this patchset for a while, and every time a test with the number I use 
is added,
this causes relatively hard to fix conflict (or at least I don't know how to 
fix these conflicts effectively)

Thus I used safe numbers, but at the rate this task progresses I won't be 
surprised that when this is merged,
these will be test numbers to use...

TL;DR - these are placeholders, and once the patch set is blesssed for merging 
upstream I'll update this next
available numbers.

Best regards,
Maxim Levitsky

Re: [PATCH v2 02/14] qcrypto/luks: implement encryption key management

2020-03-11 Thread Maxim Levitsky

On Tue, 2020-03-10 at 14:02 +0200, Maxim Levitsky wrote:
> On Tue, 2020-03-10 at 12:59 +0100, Kevin Wolf wrote:
> > Am 10.03.2020 um 12:05 hat Maxim Levitsky geschrieben:
> > > On Tue, 2020-03-10 at 11:58 +0100, Max Reitz wrote:
> > > > On 08.03.20 16:18, Maxim Levitsky wrote:
> > > > > Next few patches will expose that functionality
> > > > > to the user.
> > > > > 
> > > > > Signed-off-by: Maxim Levitsky 
> > > > > ---
> > > > >  crypto/block-luks.c | 398 
> > > > > +++-
> > > > >  qapi/crypto.json|  61 ++-
> > > > >  2 files changed, 455 insertions(+), 4 deletions(-)
> > > > 
> > > > [...]
> > > > 
> > > > > +##
> > > > > +# @QCryptoBlockAmendOptionsLUKS:
> > > > > +#
> > > > > +# This struct defines the update parameters that 
> > > > > activate/de-activate set
> > > > > +# of keyslots
> > > > > +#
> > > > > +# @state: the desired state of the keyslots
> > > > > +#
> > > > > +# @new-secret:The ID of a QCryptoSecret object providing the 
> > > > > password to be
> > > > > +# written into added active keyslots
> > > > > +#
> > > > > +# @old-secret:Optional (for deactivation only)
> > > > > +# If given will deactive all keyslots that
> > > > > +# match password located in QCryptoSecret with this 
> > > > > ID
> > > > > +#
> > > > > +# @iter-time: Optional (for activation only)
> > > > > +# Number of milliseconds to spend in
> > > > > +# PBKDF passphrase processing for the newly 
> > > > > activated keyslot.
> > > > > +# Currently defaults to 2000.
> > > > > +#
> > > > > +# @keyslot:   Optional. ID of the keyslot to activate/deactivate.
> > > > > +# For keyslot activation, keyslot should not be 
> > > > > active already
> > > > > +# (this is unsafe to update an active keyslot),
> > > > > +# but possible if 'force' parameter is given.
> > > > > +# If keyslot is not given, first free keyslot will 
> > > > > be written.
> > > > > +#
> > > > > +# For keyslot deactivation, this parameter specifies 
> > > > > the exact
> > > > > +# keyslot to deactivate
> > > > > +#
> > > > > +# @unlock-secret: Optional. The ID of a QCryptoSecret object 
> > > > > providing the
> > > > > +# password to use to retrive current master key.
> > > > > +# Defaults to the same secret that was used to open 
> > > > > the image
> > > > 
> > > > So this matches Markus’ proposal except everything is flattened (because
> > > > we don’t support nested unions, AFAIU).  Sounds OK to me.  The only
> > > > difference is @unlock-secret, which did not appear in his proposal.  Why
> > > > do we need it again?
> > > 
> > > That a little undocumented hack that will disappear one day.
> > 
> > It is very much documented (just a few lines above this one), and even
> > if it weren't documented, that wouldn't make it an unstable ABI.
> > 
> > If you don't want to make it to become stable ABI, you either need to
> > drop it or it needs an x- prefix, and its documentation should specify
> > what prevents it from being a stable ABI.
> > 
> > > Its because the driver currently doesn't keep a copy of the master key,
> > > and instead only keeps ciper objects, often from outside libraries,
> > > and in theory these objects might even be implemented in hardware so that
> > > master key might be not in memory at all, so I kind of don't want yet
> > > to keep it in memory.
> > > Thus when doing the key management, I need to retrieve the master key 
> > > again,
> > > similar to how it is done on image opening. I use the same secret as was 
> > > used for opening,
> > > but in case the keys were changed already, that secret might not work 
> > > anymore.
> > > Thus I added this parameter to specify basically the old password, which 
> > > is reasonable
> > > when updating passwords.
> > > I usually omit this hack in the discussions as it is orthogonal to the 
> > > rest of the API.
> > 
> > How will this requirement disappear one day?
> 
> If I cave in and keep a copy of the master key in the memory :-)
> 
> Best regards,
>   Maxim Levitsky
> 
> > 
> > Kevin
> 
> 
OK folks, besides this hack (which I can remove if you insist, although I don't
think it matters), what else should I do to move forward to get this accepted?

Best regards,
Maxim Levitsky

Re: [PATCH v2 02/14] qcrypto/luks: implement encryption key management

2020-03-10 Thread Maxim Levitsky

On Tue, 2020-03-10 at 12:59 +0100, Kevin Wolf wrote:
> Am 10.03.2020 um 12:05 hat Maxim Levitsky geschrieben:
> > On Tue, 2020-03-10 at 11:58 +0100, Max Reitz wrote:
> > > On 08.03.20 16:18, Maxim Levitsky wrote:
> > > > Next few patches will expose that functionality
> > > > to the user.
> > > > 
> > > > Signed-off-by: Maxim Levitsky 
> > > > ---
> > > >  crypto/block-luks.c | 398 +++-
> > > >  qapi/crypto.json|  61 ++-
> > > >  2 files changed, 455 insertions(+), 4 deletions(-)
> > > 
> > > [...]
> > > 
> > > > +##
> > > > +# @QCryptoBlockAmendOptionsLUKS:
> > > > +#
> > > > +# This struct defines the update parameters that activate/de-activate 
> > > > set
> > > > +# of keyslots
> > > > +#
> > > > +# @state: the desired state of the keyslots
> > > > +#
> > > > +# @new-secret:The ID of a QCryptoSecret object providing the 
> > > > password to be
> > > > +# written into added active keyslots
> > > > +#
> > > > +# @old-secret:Optional (for deactivation only)
> > > > +# If given will deactive all keyslots that
> > > > +# match password located in QCryptoSecret with this ID
> > > > +#
> > > > +# @iter-time: Optional (for activation only)
> > > > +# Number of milliseconds to spend in
> > > > +# PBKDF passphrase processing for the newly activated 
> > > > keyslot.
> > > > +# Currently defaults to 2000.
> > > > +#
> > > > +# @keyslot:   Optional. ID of the keyslot to activate/deactivate.
> > > > +# For keyslot activation, keyslot should not be active 
> > > > already
> > > > +# (this is unsafe to update an active keyslot),
> > > > +# but possible if 'force' parameter is given.
> > > > +# If keyslot is not given, first free keyslot will be 
> > > > written.
> > > > +#
> > > > +# For keyslot deactivation, this parameter specifies 
> > > > the exact
> > > > +# keyslot to deactivate
> > > > +#
> > > > +# @unlock-secret: Optional. The ID of a QCryptoSecret object providing 
> > > > the
> > > > +# password to use to retrive current master key.
> > > > +# Defaults to the same secret that was used to open 
> > > > the image
> > > 
> > > So this matches Markus’ proposal except everything is flattened (because
> > > we don’t support nested unions, AFAIU).  Sounds OK to me.  The only
> > > difference is @unlock-secret, which did not appear in his proposal.  Why
> > > do we need it again?
> > 
> > That a little undocumented hack that will disappear one day.
> 
> It is very much documented (just a few lines above this one), and even
> if it weren't documented, that wouldn't make it an unstable ABI.
> 
> If you don't want to make it to become stable ABI, you either need to
> drop it or it needs an x- prefix, and its documentation should specify
> what prevents it from being a stable ABI.
> 
> > Its because the driver currently doesn't keep a copy of the master key,
> > and instead only keeps ciper objects, often from outside libraries,
> > and in theory these objects might even be implemented in hardware so that
> > master key might be not in memory at all, so I kind of don't want yet
> > to keep it in memory.
> > Thus when doing the key management, I need to retrieve the master key again,
> > similar to how it is done on image opening. I use the same secret as was 
> > used for opening,
> > but in case the keys were changed already, that secret might not work 
> > anymore.
> > Thus I added this parameter to specify basically the old password, which is 
> > reasonable
> > when updating passwords.
> > I usually omit this hack in the discussions as it is orthogonal to the rest 
> > of the API.
> 
> How will this requirement disappear one day?
If I cave in and keep a copy of the master key in the memory :-)

Best regards,
Maxim Levitsky

> 
> Kevin

Re: [PATCH v2 02/14] qcrypto/luks: implement encryption key management

2020-03-10 Thread Maxim Levitsky

On Tue, 2020-03-10 at 11:58 +0100, Max Reitz wrote:
> On 08.03.20 16:18, Maxim Levitsky wrote:
> > Next few patches will expose that functionality
> > to the user.
> > 
> > Signed-off-by: Maxim Levitsky 
> > ---
> >  crypto/block-luks.c | 398 +++-
> >  qapi/crypto.json|  61 ++-
> >  2 files changed, 455 insertions(+), 4 deletions(-)
> 
> [...]
> 
> > +##
> > +# @QCryptoBlockAmendOptionsLUKS:
> > +#
> > +# This struct defines the update parameters that activate/de-activate set
> > +# of keyslots
> > +#
> > +# @state: the desired state of the keyslots
> > +#
> > +# @new-secret:The ID of a QCryptoSecret object providing the password 
> > to be
> > +# written into added active keyslots
> > +#
> > +# @old-secret:Optional (for deactivation only)
> > +# If given will deactive all keyslots that
> > +# match password located in QCryptoSecret with this ID
> > +#
> > +# @iter-time: Optional (for activation only)
> > +# Number of milliseconds to spend in
> > +# PBKDF passphrase processing for the newly activated 
> > keyslot.
> > +# Currently defaults to 2000.
> > +#
> > +# @keyslot:   Optional. ID of the keyslot to activate/deactivate.
> > +# For keyslot activation, keyslot should not be active 
> > already
> > +# (this is unsafe to update an active keyslot),
> > +# but possible if 'force' parameter is given.
> > +# If keyslot is not given, first free keyslot will be 
> > written.
> > +#
> > +# For keyslot deactivation, this parameter specifies the 
> > exact
> > +# keyslot to deactivate
> > +#
> > +# @unlock-secret: Optional. The ID of a QCryptoSecret object providing the
> > +# password to use to retrive current master key.
> > +# Defaults to the same secret that was used to open the 
> > image
> 
> So this matches Markus’ proposal except everything is flattened (because
> we don’t support nested unions, AFAIU).  Sounds OK to me.  The only
> difference is @unlock-secret, which did not appear in his proposal.  Why
> do we need it again?

That a little undocumented hack that will disappear one day.
Its because the driver currently doesn't keep a copy of the master key,
and instead only keeps ciper objects, often from outside libraries,
and in theory these objects might even be implemented in hardware so that
master key might be not in memory at all, so I kind of don't want yet
to keep it in memory.
Thus when doing the key management, I need to retrieve the master key again,
similar to how it is done on image opening. I use the same secret as was used 
for opening,
but in case the keys were changed already, that secret might not work anymore.
Thus I added this parameter to specify basically the old password, which is 
reasonable
when updating passwords.
I usually omit this hack in the discussions as it is orthogonal to the rest of 
the API.

Best regards,
Maxim Levitsky


> 
> Max
>

Re: [PATCH v5 00/11] HMP monitor handlers refactoring

2020-03-09 Thread Maxim Levitsky

On Mon, 2020-03-09 at 18:30 +, Dr. David Alan Gilbert wrote:
> * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > This patch series is bunch of cleanups to the hmp monitor code.
> > It mostly moves the blockdev related hmp handlers to its own file,
> > and does some minor refactoring.
> > 
> > No functional changes expected.
> 
> Queued for HMP, with the commit message fix up in 05.
Thanks a million!

Best regards,
Maxim Levitsky

> 
> Dave
> 
> > Changes from V1:
> >* move the handlers to block/monitor/block-hmp-cmds.c
> >* tiny cleanup for the commit messages
> > 
> > Changes from V2:
> >* Moved all the function prototypes to new header (blockdev-hmp-cmds.h)
> >* Set the license of blockdev-hmp-cmds.c to GPLv2+
> >* Moved hmp_snapshot_* functions to blockdev-hmp-cmds.c
> >* Moved hmp_drive_add_node to blockdev-hmp-cmds.c
> >  (this change needed some new exports, thus in separate new patch)
> >* Moved hmp_qemu_io and hmp_eject to blockdev-hmp-cmds.c
> >* Added 'error:' prefix to vreport, and updated the iotests
> >  This is invasive change, but really feels like the right one
> >* Added minor refactoring patch that drops an unused #include
> > 
> > Changes from V3:
> >* Dropped the error prefix patches for now due to fact that it seems
> >  that libvirt doesn't need that after all. Oh well...
> >  I'll send them in a separate series.
> > 
> >* Hopefully correctly merged the copyright info the new files
> >  Both files are GPLv2 now (due to code from hmp.h/hmp-cmds.c)
> > 
> >* Addressed review feedback
> >* Renamed the added header to block-hmp-cmds.h
> > 
> >* Got rid of checkpatch.pl warnings in the moved code
> >  (cosmetic code changes only)
> > 
> >* I kept the reviewed-by tags, since the changes I did are minor.
> >  I hope that this is right thing to do.
> > 
> > Changes from V4:
> >* Rebase with recent changes
> >* Fixed review feedback
> > 
> > Best regards,
> > Maxim Levitsky
> > 
> > Maxim Levitsky (11):
> >   usb/dev-storage: remove unused include
> >   monitor/hmp: inline add_init_drive
> >   monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c
> >   monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c
> >   monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to
> > block-hmp-cmds.c Moved code was added after 2012-01-13, thus under
> > GPLv2+
> >   monitor/hmp: move hmp_block_job* to block-hmp-cmds.c
> >   monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c
> >   monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c
> >   monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c
> >   monitor/hmp: move hmp_info_block* to block-hmp-cmds.c
> >   monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c
> > 
> >  MAINTAINERS|1 +
> >  Makefile.objs  |2 +-
> >  block/Makefile.objs|1 +
> >  block/monitor/Makefile.objs|1 +
> >  block/monitor/block-hmp-cmds.c | 1015 
> >  blockdev.c |  137 +
> >  device-hotplug.c   |   91 ---
> >  hw/usb/dev-storage.c   |1 -
> >  include/block/block-hmp-cmds.h |   54 ++
> >  include/block/block_int.h  |5 +-
> >  include/monitor/hmp.h  |   24 -
> >  include/sysemu/blockdev.h  |4 -
> >  include/sysemu/sysemu.h|3 -
> >  monitor/hmp-cmds.c |  782 
> >  monitor/misc.c |1 +
> >  15 files changed, 1085 insertions(+), 1037 deletions(-)
> >  create mode 100644 block/monitor/Makefile.objs
> >  create mode 100644 block/monitor/block-hmp-cmds.c
> >  delete mode 100644 device-hotplug.c
> >  create mode 100644 include/block/block-hmp-cmds.h
> > 
> > -- 
> > 2.17.2
> > 
> 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v5 05/11] monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to block-hmp-cmds.c Moved code was added after 2012-01-13, thus under GPLv2+

2020-03-09 Thread Maxim Levitsky

On Mon, 2020-03-09 at 16:31 +, Dr. David Alan Gilbert wrote:
> * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > 
> > I see that I have the same issue of long subject line here.
> > Its because I forgot the space after first line, when adding this.
> > If I need to resend another version of this patchset I'll fix this,
> > but otherwise maybe that can be fixed when applying this to one of 
> > maintainer's
> > trees.
> > 
> > Sorry for noise.
> 
> I can just fix the commit message.


Thank you!!
Best regards,
    Maxim Levitsky
> 
> Dave
> 
> > Best regards,
> > Maxim Levitsky
> > 
> > On Sun, 2020-03-08 at 11:24 +0200, Maxim Levitsky wrote:
> > > Signed-off-by: Maxim Levitsky 
> > > Reviewed-by: Dr. David Alan Gilbert 
> > > ---
> > >  block/monitor/block-hmp-cmds.c | 60 ++
> > >  include/block/block-hmp-cmds.h | 12 +--
> > >  include/monitor/hmp.h  |  2 --
> > >  monitor/hmp-cmds.c | 58 
> > >  4 files changed, 69 insertions(+), 63 deletions(-)
> > > 
> > > diff --git a/block/monitor/block-hmp-cmds.c 
> > > b/block/monitor/block-hmp-cmds.c
> > > index ad727a6b08..d6dd5d97f7 100644
> > > --- a/block/monitor/block-hmp-cmds.c
> > > +++ b/block/monitor/block-hmp-cmds.c
> > > @@ -37,10 +37,12 @@
> > >  #include "qapi/qapi-commands-block.h"
> > >  #include "qapi/qmp/qdict.h"
> > >  #include "qapi/error.h"
> > > +#include "qapi/qmp/qerror.h"
> > >  #include "qemu/config-file.h"
> > >  #include "qemu/option.h"
> > >  #include "sysemu/sysemu.h"
> > >  #include "monitor/monitor.h"
> > > +#include "monitor/hmp.h"
> > >  #include "block/block_int.h"
> > >  #include "block/block-hmp-cmds.h"
> > >  
> > > @@ -187,3 +189,61 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
> > >  error_report("'commit' error for '%s': %s", device, 
> > > strerror(-ret));
> > >  }
> > >  }
> > > +
> > > +void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
> > > +{
> > > +const char *filename = qdict_get_str(qdict, "target");
> > > +const char *format = qdict_get_try_str(qdict, "format");
> > > +bool reuse = qdict_get_try_bool(qdict, "reuse", false);
> > > +bool full = qdict_get_try_bool(qdict, "full", false);
> > > +Error *err = NULL;
> > > +DriveMirror mirror = {
> > > +.device = (char *)qdict_get_str(qdict, "device"),
> > > +.target = (char *)filename,
> > > +.has_format = !!format,
> > > +.format = (char *)format,
> > > +.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> > > +.has_mode = true,
> > > +.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
> > > NEW_IMAGE_MODE_ABSOLUTE_PATHS,
> > > +.unmap = true,
> > > +};
> > > +
> > > +if (!filename) {
> > > +error_setg(&err, QERR_MISSING_PARAMETER, "target");
> > > +hmp_handle_error(mon, err);
> > > +return;
> > > +}
> > > +qmp_drive_mirror(&mirror, &err);
> > > +hmp_handle_error(mon, err);
> > > +}
> > > +
> > > +void hmp_drive_backup(Monitor *mon, const QDict *qdict)
> > > +{
> > > +const char *device = qdict_get_str(qdict, "device");
> > > +const char *filename = qdict_get_str(qdict, "target");
> > > +const char *format = qdict_get_try_str(qdict, "format");
> > > +bool reuse = qdict_get_try_bool(qdict, "reuse", false);
> > > +bool full = qdict_get_try_bool(qdict, "full", false);
> > > +bool compress = qdict_get_try_bool(qdict, "compress", false);
> > > +Error *err = NULL;
> > > +DriveBackup backup = {
> > > +.device = (char *)device,
> > > +.target = (char *)filename,
> > > +.has_format = !!format,
> > > +.format = (char *)format,
> > > +.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> > > +.has_mode = true,
> > > +.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
> > >

[PATCH v2 13/14] block/qcow2: implement blockdev-amend

2020-03-08 Thread Maxim Levitsky

Currently the implementation only supports amending the encryption
options, unlike the qemu-img version

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block/qcow2.c| 39 +++
 qapi/block-core.json | 16 +++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 10b22544f2..8fde20344d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5397,6 +5397,44 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 return 0;
 }
 
+static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
+   BlockdevAmendOptions *opts,
+   bool force,
+   Error **errp)
+{
+BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2;
+BDRVQcow2State *s = bs->opaque;
+int ret = 0;
+
+if (qopts->has_encrypt) {
+if (!s->crypto) {
+error_setg(errp, "image is not encrypted, can't amend");
+return -EOPNOTSUPP;
+}
+
+if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
+error_setg(errp,
+   "Amend can't be used to change the qcow2 encryption 
format");
+return -EOPNOTSUPP;
+}
+
+if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
+error_setg(errp,
+   "Only LUKS encryption options can be amended for qcow2 
with blockdev-amend");
+return -EOPNOTSUPP;
+}
+
+ret = qcrypto_block_amend_options(s->crypto,
+  qcow2_crypto_hdr_read_func,
+  qcow2_crypto_hdr_write_func,
+  bs,
+  qopts->encrypt,
+  force,
+  errp);
+}
+return ret;
+}
+
 /*
  * If offset or size are negative, respectively, they will not be included in
  * the BLOCK_IMAGE_CORRUPTED event emitted.
@@ -5606,6 +5644,7 @@ BlockDriver bdrv_qcow2 = {
 .mutable_opts= mutable_opts,
 .bdrv_co_check   = qcow2_co_check,
 .bdrv_amend_options  = qcow2_amend_options,
+.bdrv_co_amend   = qcow2_co_amend,
 
 .bdrv_detach_aio_context  = qcow2_detach_aio_context,
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 967b5738c9..4b69b0e195 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4769,6 +4769,19 @@
   'data': { }
 }
 
+##
+# @BlockdevAmendOptionsQcow2:
+#
+# Driver specific image amend options for qcow2.
+# For now, only encryption options can be amended
+#
+# @encrypt  Encryption options to be amended
+#
+# Since: 5.0
+##
+{ 'struct': 'BlockdevAmendOptionsQcow2',
+  'data': { '*encrypt': 'QCryptoBlockAmendOptions' } }
+
 ##
 # @BlockdevAmendOptions:
 #
@@ -4783,7 +4796,8 @@
   'driver': 'BlockdevDriver' },
   'discriminator': 'driver',
   'data': {
-  'luks':   'BlockdevAmendOptionsLUKS' } }
+  'luks':   'BlockdevAmendOptionsLUKS',
+  'qcow2':  'BlockdevAmendOptionsQcow2' } }
 
 ##
 # @x-blockdev-amend:
-- 
2.17.2

[PATCH v2 12/14] block/crypto: implement blockdev-amend

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block/crypto.c   | 72 
 qapi/block-core.json | 14 -
 2 files changed, 66 insertions(+), 20 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 727a3fde58..389586200f 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -694,32 +694,21 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, 
Error **errp)
 }
 
 static int
-block_crypto_amend_options_luks(BlockDriverState *bs,
-   QemuOpts *opts,
-   BlockDriverAmendStatusCB *status_cb,
-   void *cb_opaque,
-   bool force,
-   Error **errp)
+block_crypto_amend_options_generic_luks(BlockDriverState *bs,
+QCryptoBlockAmendOptions 
*amend_options,
+bool force,
+Error **errp)
 {
 BlockCrypto *crypto = bs->opaque;
-QDict *cryptoopts = NULL;
-QCryptoBlockAmendOptions *amend_options = NULL;
 int ret;
 
 assert(crypto);
 assert(crypto->block);
-crypto->updating_keys = true;
 
+/* apply for exclusive read/write permissions to the underlying file*/
+crypto->updating_keys = true;
 ret = bdrv_child_refresh_perms(bs, bs->file, errp);
-if (ret < 0) {
-goto cleanup;
-}
-
-cryptoopts = qemu_opts_to_qdict(opts, NULL);
-qdict_put_str(cryptoopts, "format", "luks");
-amend_options = block_crypto_amend_opts_init(cryptoopts, errp);
-if (!amend_options) {
-ret = -EINVAL;
+if (ret) {
 goto cleanup;
 }
 
@@ -731,13 +720,57 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
   force,
   errp);
 cleanup:
+/* release exclusive read/write permissions to the underlying file*/
 crypto->updating_keys = false;
 bdrv_child_refresh_perms(bs, bs->file, errp);
-qapi_free_QCryptoBlockAmendOptions(amend_options);
+return ret;
+}
+
+static int
+block_crypto_amend_options_luks(BlockDriverState *bs,
+QemuOpts *opts,
+BlockDriverAmendStatusCB *status_cb,
+void *cb_opaque,
+bool force,
+Error **errp)
+{
+BlockCrypto *crypto = bs->opaque;
+QDict *cryptoopts = NULL;
+QCryptoBlockAmendOptions *amend_options = NULL;
+int ret = -EINVAL;
+
+assert(crypto);
+assert(crypto->block);
+
+cryptoopts = qemu_opts_to_qdict(opts, NULL);
+qdict_put_str(cryptoopts, "format", "luks");
+amend_options = block_crypto_amend_opts_init(cryptoopts, errp);
 qobject_unref(cryptoopts);
+if (!amend_options) {
+goto cleanup;
+}
+ret = block_crypto_amend_options_generic_luks(bs, amend_options,
+  force, errp);
+cleanup:
+qapi_free_QCryptoBlockAmendOptions(amend_options);
 return ret;
 }
 
+static int
+coroutine_fn block_crypto_co_amend_luks(BlockDriverState *bs,
+BlockdevAmendOptions *opts,
+bool force,
+Error **errp)
+{
+QCryptoBlockAmendOptions amend_opts;
+
+amend_opts = (QCryptoBlockAmendOptions) {
+.format = Q_CRYPTO_BLOCK_FORMAT_LUKS,
+.u.luks = *qapi_BlockdevAmendOptionsLUKS_base(&opts->u.luks),
+};
+return block_crypto_amend_options_generic_luks(bs, &amend_opts,
+   force, errp);
+}
 
 static void
 block_crypto_child_perms(BlockDriverState *bs, BdrvChild *c,
@@ -809,6 +842,7 @@ static BlockDriver bdrv_crypto_luks = {
 .bdrv_get_info  = block_crypto_get_info_luks,
 .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
 .bdrv_amend_options = block_crypto_amend_options_luks,
+.bdrv_co_amend  = block_crypto_co_amend_luks,
 
 .strong_runtime_opts = block_crypto_strong_runtime_opts,
 };
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 192da75a10..967b5738c9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4757,6 +4757,18 @@
   'data': { 'job-id': 'str',
 'options': 'BlockdevCreateOptions' } }
 
+##
+# @BlockdevAmendOptionsLUKS:
+#
+# Driver specific image amend options for LUKS.
+#
+# Since: 5.0
+##
+{ 'struct': 'BlockdevAmendOptionsLUKS',
+  'base': 'QCryptoBlockAmendOptionsLUKS',
+  'data': { }
+}
+
 ##
 # @BlockdevAmendOptions:
 #
@@ -4771,7 +4783,7 @@
   'driver': 'BlockdevDriver' },
   'discriminator': 'driver',
   'data': {
-  } }
+  'luks':   'BlockdevAmendOptionsLUKS' } }
 
 ##
 # @x-blockdev-amend:
-- 
2.17.2

[PATCH v2 09/14] iotests: filter few more luks specific create options

2020-03-08 Thread Maxim Levitsky

This allows more tests to be able to have same output on both qcow2 luks 
encrypted images
and raw luks images

Signed-off-by: Maxim Levitsky 
---
 tests/qemu-iotests/087.out   | 6 +++---
 tests/qemu-iotests/134.out   | 2 +-
 tests/qemu-iotests/158.out   | 4 ++--
 tests/qemu-iotests/188.out   | 2 +-
 tests/qemu-iotests/189.out   | 4 ++--
 tests/qemu-iotests/198.out   | 4 ++--
 tests/qemu-iotests/263.out   | 4 ++--
 tests/qemu-iotests/284.out   | 6 +++---
 tests/qemu-iotests/common.filter | 6 --
 9 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index f23bffbbf1..d5ff53302e 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -34,7 +34,7 @@ QMP_VERSION
 
 === Encrypted image QCow ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on 
encrypt.key-secret=sec0 size=134217728
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on size=134217728
 Testing:
 QMP_VERSION
 {"return": {}}
@@ -46,7 +46,7 @@ QMP_VERSION
 
 === Encrypted image LUKS ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encrypt.format=luks 
encrypt.key-secret=sec0 size=134217728
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 Testing:
 QMP_VERSION
 {"return": {}}
@@ -58,7 +58,7 @@ QMP_VERSION
 
 === Missing driver ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on 
encrypt.key-secret=sec0 size=134217728
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on size=134217728
 Testing: -S
 QMP_VERSION
 {"return": {}}
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
index f2878f5f3a..e4733c0b81 100644
--- a/tests/qemu-iotests/134.out
+++ b/tests/qemu-iotests/134.out
@@ -1,5 +1,5 @@
 QA output created by 134
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on 
encrypt.key-secret=sec0 size=134217728
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on size=134217728
 
 == reading whole image ==
 read 134217728/134217728 bytes at offset 0
diff --git a/tests/qemu-iotests/158.out b/tests/qemu-iotests/158.out
index fa2294bb85..52ea9a488f 100644
--- a/tests/qemu-iotests/158.out
+++ b/tests/qemu-iotests/158.out
@@ -1,6 +1,6 @@
 QA output created by 158
 == create base ==
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT encryption=on 
encrypt.key-secret=sec0 size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT encryption=on size=134217728
 
 == writing whole image ==
 wrote 134217728/134217728 bytes at offset 0
@@ -10,7 +10,7 @@ wrote 134217728/134217728 bytes at offset 0
 read 134217728/134217728 bytes at offset 0
 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == create overlay ==
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on 
encrypt.key-secret=sec0 size=134217728 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encryption=on size=134217728 
backing_file=TEST_DIR/t.IMGFMT.base
 
 == writing part of a cluster ==
 wrote 1024/1024 bytes at offset 0
diff --git a/tests/qemu-iotests/188.out b/tests/qemu-iotests/188.out
index 4b9aadd51c..5426861b18 100644
--- a/tests/qemu-iotests/188.out
+++ b/tests/qemu-iotests/188.out
@@ -1,5 +1,5 @@
 QA output created by 188
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encrypt.format=luks 
encrypt.key-secret=sec0 encrypt.iter-time=10 size=16777216
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=16777216
 
 == reading whole image ==
 read 16777216/16777216 bytes at offset 0
diff --git a/tests/qemu-iotests/189.out b/tests/qemu-iotests/189.out
index e536d95d53..bc213cbe14 100644
--- a/tests/qemu-iotests/189.out
+++ b/tests/qemu-iotests/189.out
@@ -1,6 +1,6 @@
 QA output created by 189
 == create base ==
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT encrypt.format=luks 
encrypt.key-secret=sec0 encrypt.iter-time=10 size=16777216
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=16777216
 
 == writing whole image ==
 wrote 16777216/16777216 bytes at offset 0
@@ -10,7 +10,7 @@ wrote 16777216/16777216 bytes at offset 0
 read 16777216/16777216 bytes at offset 0
 16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == create overlay ==
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT encrypt.format=luks 
encrypt.key-secret=sec1 encrypt.iter-time=10 size=16777216 
backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=16777216 
backing_file=TEST_DIR/t.IMGFMT.base
 
 == writing part of a cluster ==
 wrote 1024/1024 bytes at offset 0
diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out
index b0f2d417af..acfdf96b0c 100644
--- a/tests/qemu-iotests/198.out
+++ b/tests/qemu-iotests/198.out
@@ -1,12 +1,12 @@
 QA output created by 198
 == create base ==
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT encrypt.fo

[PATCH v2 06/14] block/crypto: rename two functions

2020-03-08 Thread Maxim Levitsky

rename the write_func to create_write_func, and init_func to create_init_func.
This is preparation for other write_func that will be used to update the 
encryption keys.

No functional changes

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block/crypto.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index ecf96a7a9b..0b37dae564 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -78,12 +78,12 @@ struct BlockCryptoCreateData {
 };
 
 
-static ssize_t block_crypto_write_func(QCryptoBlock *block,
-   size_t offset,
-   const uint8_t *buf,
-   size_t buflen,
-   void *opaque,
-   Error **errp)
+static ssize_t block_crypto_create_write_func(QCryptoBlock *block,
+  size_t offset,
+  const uint8_t *buf,
+  size_t buflen,
+  void *opaque,
+  Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 ssize_t ret;
@@ -96,11 +96,10 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 return ret;
 }
 
-
-static ssize_t block_crypto_init_func(QCryptoBlock *block,
-  size_t headerlen,
-  void *opaque,
-  Error **errp)
+static ssize_t block_crypto_create_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ void *opaque,
+ Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 
@@ -296,8 +295,8 @@ static int block_crypto_co_create_generic(BlockDriverState 
*bs,
 };
 
 crypto = qcrypto_block_create(opts, NULL,
-  block_crypto_init_func,
-  block_crypto_write_func,
+  block_crypto_create_init_func,
+  block_crypto_create_write_func,
   &data,
   errp);
 
-- 
2.17.2

[PATCH v2 05/14] block/amend: refactor qcow2 amend options

2020-03-08 Thread Maxim Levitsky

Some qcow2 create options can't be used for amend.
Remove them from the qcow2 create options and add generic logic to detect
such options in qemu-img

Signed-off-by: Maxim Levitsky 
---
 block/qcow2.c  | 108 ++---
 qemu-img.c |  18 +++-
 tests/qemu-iotests/049.out | 102 ++--
 tests/qemu-iotests/061.out |  12 ++-
 tests/qemu-iotests/079.out |  18 ++--
 tests/qemu-iotests/082.out | 149 
 tests/qemu-iotests/085.out |  38 
 tests/qemu-iotests/087.out |   6 +-
 tests/qemu-iotests/115.out |   2 +-
 tests/qemu-iotests/121.out |   4 +-
 tests/qemu-iotests/125.out | 192 ++---
 tests/qemu-iotests/134.out |   2 +-
 tests/qemu-iotests/144.out |   4 +-
 tests/qemu-iotests/158.out |   4 +-
 tests/qemu-iotests/182.out |   2 +-
 tests/qemu-iotests/185.out |   8 +-
 tests/qemu-iotests/188.out |   2 +-
 tests/qemu-iotests/189.out |   4 +-
 tests/qemu-iotests/198.out |   4 +-
 tests/qemu-iotests/243.out |  16 ++--
 tests/qemu-iotests/250.out |   2 +-
 tests/qemu-iotests/255.out |   8 +-
 tests/qemu-iotests/263.out |   4 +-
 tests/qemu-iotests/280.out |   2 +-
 24 files changed, 283 insertions(+), 428 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 9574085772..81e7895e7c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2946,17 +2946,6 @@ static int qcow2_change_backing_file(BlockDriverState 
*bs,
 return qcow2_update_header(bs);
 }
 
-static int qcow2_crypt_method_from_format(const char *encryptfmt)
-{
-if (g_str_equal(encryptfmt, "luks")) {
-return QCOW_CRYPT_LUKS;
-} else if (g_str_equal(encryptfmt, "aes")) {
-return QCOW_CRYPT_AES;
-} else {
-return -EINVAL;
-}
-}
-
 static int qcow2_set_up_encryption(BlockDriverState *bs,
QCryptoBlockCreateOptions *cryptoopts,
Error **errp)
@@ -5155,9 +5144,6 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 bool lazy_refcounts = s->use_lazy_refcounts;
 bool data_file_raw = data_file_is_raw(bs);
 const char *compat = NULL;
-uint64_t cluster_size = s->cluster_size;
-bool encrypt;
-int encformat;
 int refcount_bits = s->refcount_bits;
 int ret;
 QemuOptDesc *desc = opts->list->desc;
@@ -5182,44 +5168,12 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 error_setg(errp, "Unknown compatibility level %s", compat);
 return -EINVAL;
 }
-} else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
-error_setg(errp, "Cannot change preallocation mode");
-return -ENOTSUP;
 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
-} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
-encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
-!!s->crypto);
-
-if (encrypt != !!s->crypto) {
-error_setg(errp,
-   "Changing the encryption flag is not supported");
-return -ENOTSUP;
-}
-} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
-encformat = qcow2_crypt_method_from_format(
-qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
-
-if (encformat != s->crypt_method_header) {
-error_setg(errp,
-   "Changing the encryption format is not supported");
-return -ENOTSUP;
-}
-} else if (g_str_has_prefix(desc->name, "encrypt.")) {
-error_setg(errp,
-   "Changing the encryption parameters is not supported");
-return -ENOTSUP;
-} else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
-cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
- cluster_size);
-if (cluster_size != s->cluster_size) {
-error_setg(errp, "Changing the cluster size is not supported");
-return -ENOTSUP;
-}
 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
@@ -5472,37 +5426,6 @@ void qcow2_signal_corruption(Block

[PATCH v2 14/14] iotests: add tests for blockdev-amend

2020-03-08 Thread Maxim Levitsky

This commit adds two tests that cover the
new blockdev-amend functionality of luks and qcow2 driver

Signed-off-by: Maxim Levitsky 
---
 tests/qemu-iotests/302 | 278 +
 tests/qemu-iotests/302.out |  40 ++
 tests/qemu-iotests/303 | 233 +++
 tests/qemu-iotests/303.out |  33 +
 tests/qemu-iotests/group   |   3 +
 5 files changed, 587 insertions(+)
 create mode 100755 tests/qemu-iotests/302
 create mode 100644 tests/qemu-iotests/302.out
 create mode 100755 tests/qemu-iotests/303
 create mode 100644 tests/qemu-iotests/303.out

diff --git a/tests/qemu-iotests/302 b/tests/qemu-iotests/302
new file mode 100755
index 00..a6b1155c33
--- /dev/null
+++ b/tests/qemu-iotests/302
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+#
+# Test case QMP's encrypted key management
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+import os
+import time
+import json
+
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+class Secret:
+def __init__(self, index):
+self._id = "keysec" + str(index)
+# you are not supposed to see the password...
+self._secret = "hunter" + str(index)
+
+def id(self):
+return self._id
+
+def secret(self):
+return self._secret
+
+def to_cmdline_object(self):
+return  [ "secret,id=" + self._id + ",data=" + self._secret]
+
+def to_qmp_object(self):
+return { "qom_type" : "secret", "id": self.id(),
+ "props": { "data": self.secret() } }
+
+
+class EncryptionSetupTestCase(iotests.QMPTestCase):
+
+# test case startup
+def setUp(self):
+# start the VM
+self.vm = iotests.VM()
+self.vm.launch()
+
+# create the secrets and load 'em into the VM
+self.secrets = [ Secret(i) for i in range(0, 6) ]
+for secret in self.secrets:
+result = self.vm.qmp("object-add", **secret.to_qmp_object())
+self.assert_qmp(result, 'return', {})
+
+if iotests.imgfmt == "qcow2":
+self.pfx = "encrypt."
+self.img_opts = [ '-o', "encrypt.format=luks" ]
+else:
+self.pfx = ""
+self.img_opts = []
+
+# test case shutdown
+def tearDown(self):
+# stop the VM
+self.vm.shutdown()
+
+###
+# create the encrypted block device
+def createImg(self, file, secret):
+
+iotests.qemu_img(
+'create',
+'--object', *secret.to_cmdline_object(),
+'-f', iotests.imgfmt,
+'-o', self.pfx + 'key-secret=' + secret.id(),
+'-o', self.pfx + 'iter-time=10',
+*self.img_opts,
+file,
+'1M')
+
+###
+# open an encrypted block device
+def openImageQmp(self, id, file, secret, read_only = False):
+
+encrypt_options = {
+'key-secret' : secret.id()
+}
+
+if iotests.imgfmt == "qcow2":
+encrypt_options = {
+'encrypt': {
+'format':'luks',
+**encrypt_options
+}
+}
+
+result = self.vm.qmp('blockdev-add', **
+{
+'driver': iotests.imgfmt,
+'node-name': id,
+'read-only': read_only,
+
+**encrypt_options,
+
+'file': {
+'driver': 'file',
+'filename': test_img,
+}
+}
+)
+self.assert_qmp(result, 'return', {})
+
+# close the encrypted block device
+def closeImageQmp(self, id):
+result = self.vm.qmp('blockdev-del', **{ 'node-n

[PATCH v2 03/14] block/amend: add 'force' option

2020-03-08 Thread Maxim Levitsky

'force' option will be used for some unsafe amend operations.

This includes things like erasing last keyslot in luks based formats
(which destroys the data, unless the master key is backed up
by external means), but that _might_ be desired result.

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block.c   | 4 +++-
 block/qcow2.c | 1 +
 docs/tools/qemu-img.rst   | 5 -
 include/block/block.h | 1 +
 include/block/block_int.h | 1 +
 qemu-img-cmds.hx  | 4 ++--
 qemu-img.c| 8 +++-
 7 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index 1bdb9c679d..3b6f51aa3e 100644
--- a/block.c
+++ b/block.c
@@ -6320,6 +6320,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState 
*bs,
 
 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+   bool force,
Error **errp)
 {
 if (!bs->drv) {
@@ -6331,7 +6332,8 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts 
*opts,
bs->drv->format_name);
 return -ENOTSUP;
 }
-return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp);
+return bs->drv->bdrv_amend_options(bs, opts, status_cb,
+   cb_opaque, force, errp);
 }
 
 /*
diff --git a/block/qcow2.c b/block/qcow2.c
index 3c754f616b..b55e5b7c1f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5145,6 +5145,7 @@ static void qcow2_amend_helper_cb(BlockDriverState *bs,
 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb,
void *cb_opaque,
+   bool force,
Error **errp)
 {
 BDRVQcow2State *s = bs->opaque;
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 0080f83a76..fc2dca6649 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -249,11 +249,14 @@ Command description:
 
 .. program:: qemu-img-commands
 
-.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t 
CACHE] -o OPTIONS FILENAME
+.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t 
CACHE] [--force] -o OPTIONS FILENAME
 
   Amends the image format specific *OPTIONS* for the image file
   *FILENAME*. Not all file formats support this operation.
 
+  --force allows some unsafe operations. Currently for -f luks, it allows to
+  erase last encryption key, and to overwrite an active encryption key.
+
 .. option:: bench [-c COUNT] [-d DEPTH] [-f FMT] 
[--flush-interval=FLUSH_INTERVAL] [-i AIO] [-n] [--no-drain] [-o OFFSET] 
[--pattern=PATTERN] [-q] [-s BUFFER_SIZE] [-S STEP_SIZE] [-t CACHE] [-w] [-U] 
FILENAME
 
   Run a simple sequential I/O benchmark on the specified image. If ``-w`` is
diff --git a/include/block/block.h b/include/block/block.h
index cd6b5b95aa..dda18a3fa3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -389,6 +389,7 @@ typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, 
int64_t offset,
   int64_t total_work_size, void *opaque);
 int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+   bool force,
Error **errp);
 
 /* check if a named node can be replaced when doing drive-mirror */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6f9fd5e20e..24d00fbf48 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -426,6 +426,7 @@ struct BlockDriver {
 int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
   BlockDriverAmendStatusCB *status_cb,
   void *cb_opaque,
+  bool force,
   Error **errp);
 
 void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c9c54de1df..9920f1f9d4 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ HXCOMM When amending the rST sections, please remember to 
copy the usage
 HXCOMM over to the per-command sections in qemu-img.texi.
 
 DEF("amend", img_amend,
-"amend [--object objectdef] [--image-opts] [-p] [-q] [-f fmt] [-t cache] 
-o options filename")
+"amend [--object objectdef] [--image-opts] [-p] [-q] [-f fmt] [-t cache] 
[--force] -o options filename")
 SRST
-.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t 
CACHE] -o OPTIONS FILENAME
+.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t 
CACHE] [--force] -o OPTIONS FILENAME
 ERST
 
 DEF("bench", img_bench,
diff

[PATCH v2 11/14] block/core: add generic infrastructure for x-blockdev-amend qmp command

2020-03-08 Thread Maxim Levitsky

blockdev-amend will be used similiar to blockdev-create
to allow on the fly changes of the structure of the format based block devices.

Current plan is to first support encryption keyslot management for luks
based formats (raw and embedded in qcow2)

Signed-off-by: Maxim Levitsky 
---
 block/Makefile.objs   |   2 +-
 block/amend.c | 108 ++
 include/block/block_int.h |  21 +---
 qapi/block-core.json  |  42 +++
 qapi/job.json |   4 +-
 5 files changed, 169 insertions(+), 8 deletions(-)
 create mode 100644 block/amend.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3bcb35c81d..5f0e60e7b4 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,7 +19,7 @@ block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += file-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
-block-obj-y += null.o mirror.o commit.o io.o create.o
+block-obj-y += null.o mirror.o commit.o io.o create.o amend.o
 block-obj-y += throttle-groups.o
 block-obj-$(CONFIG_LINUX) += nvme.o
 
diff --git a/block/amend.c b/block/amend.c
new file mode 100644
index 00..2db7b1eafc
--- /dev/null
+++ b/block/amend.c
@@ -0,0 +1,108 @@
+/*
+ * Block layer code related to image options amend
+ *
+ * Copyright (c) 2018 Kevin Wolf 
+ * Copyright (c) 2019 Maxim Levitsky 
+ *
+ * Heavily based on create.c
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "qemu/job.h"
+#include "qemu/main-loop.h"
+#include "qapi/qapi-commands-block-core.h"
+#include "qapi/qapi-visit-block-core.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/error.h"
+
+typedef struct BlockdevAmendJob {
+Job common;
+BlockdevAmendOptions *opts;
+BlockDriverState *bs;
+bool force;
+} BlockdevAmendJob;
+
+static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
+{
+BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
+int ret;
+
+job_progress_set_remaining(&s->common, 1);
+ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp);
+job_progress_update(&s->common, 1);
+qapi_free_BlockdevAmendOptions(s->opts);
+return ret;
+}
+
+static const JobDriver blockdev_amend_job_driver = {
+.instance_size = sizeof(BlockdevAmendJob),
+.job_type  = JOB_TYPE_AMEND,
+.run   = blockdev_amend_run,
+};
+
+void qmp_x_blockdev_amend(const char *job_id,
+  const char *node_name,
+  BlockdevAmendOptions *options,
+  bool has_force,
+  bool force,
+  Error **errp)
+{
+BlockdevAmendJob *s;
+const char *fmt = BlockdevDriver_str(options->driver);
+BlockDriver *drv = bdrv_find_format(fmt);
+BlockDriverState *bs = bdrv_find_node(node_name);
+
+/*
+ * If the driver is in the schema, we know that it exists. But it may not
+ * be whitelisted.
+ */
+assert(drv);
+if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) {
+error_setg(errp, "Driver is not whitelisted");
+return;
+}
+
+if (bs->drv != drv) {
+error_setg(errp,
+   "x-blockdev-amend doesn't support changing the block 
driver");
+return;
+}
+
+/* Error out if the driver doesn't support .bdrv_co_amend */
+if (!drv->bdrv_co_amend) {
+error_setg(errp, "Driver does not support x-blockdev-amend");
+return;
+}
+
+/* Create the block job */
+s = job_create(job_id, &blockdev_amend_job_driver, NULL,
+   bdrv_get_aio_con

[PATCH v2 07/14] block/crypto: implement the encryption key management

2020-03-08 Thread Maxim Levitsky

This implements the encryption key management using the generic code in
qcrypto layer and exposes it to the user via qemu-img

This code adds another 'write_func' because the initialization
write_func works directly on the underlying file, and amend
works on instance of luks device.

This commit also adds a 'hack/workaround' I and Kevin Wolf (thanks)
made to make the driver both support write sharing (to avoid breaking the 
users),
and be safe against concurrent  metadata update (the keyslots)

Eventually the write sharing for luks driver will be deprecated
and removed together with this hack.

The hack is that we ask (as a format driver) for BLK_PERM_CONSISTENT_READ
and then when we want to update the keys, we unshare that permission.
So if someone else has the image open, even readonly, encryption
key update will fail gracefully.

Also thanks to Daniel Berrange for the idea of
unsharing read, rather that write permission which allows
to avoid cases when the other user had opened the image read-only.

Signed-off-by: Maxim Levitsky 
---
 block/crypto.c | 127 +++--
 block/crypto.h |  44 +++--
 2 files changed, 163 insertions(+), 8 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 0b37dae564..727a3fde58 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -36,6 +36,7 @@ typedef struct BlockCrypto BlockCrypto;
 
 struct BlockCrypto {
 QCryptoBlock *block;
+bool updating_keys;
 };
 
 
@@ -70,6 +71,24 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 return ret;
 }
 
+static ssize_t block_crypto_write_func(QCryptoBlock *block,
+   size_t offset,
+   const uint8_t *buf,
+   size_t buflen,
+   void *opaque,
+   Error **errp)
+{
+BlockDriverState *bs = opaque;
+ssize_t ret;
+
+ret = bdrv_pwrite(bs->file, offset, buf, buflen);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not write encryption header");
+return ret;
+}
+return ret;
+}
+
 
 struct BlockCryptoCreateData {
 BlockBackend *blk;
@@ -148,6 +167,19 @@ static QemuOptsList block_crypto_create_opts_luks = {
 };
 
 
+static QemuOptsList block_crypto_amend_opts_luks = {
+.name = "crypto",
+.head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
+.desc = {
+BLOCK_CRYPTO_OPT_DEF_LUKS_STATE(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET(""),
+BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
+{ /* end of list */ }
+},
+};
+
 QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QDict *opts, Error **errp)
 {
@@ -661,6 +693,95 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, 
Error **errp)
 return spec_info;
 }
 
+static int
+block_crypto_amend_options_luks(BlockDriverState *bs,
+   QemuOpts *opts,
+   BlockDriverAmendStatusCB *status_cb,
+   void *cb_opaque,
+   bool force,
+   Error **errp)
+{
+BlockCrypto *crypto = bs->opaque;
+QDict *cryptoopts = NULL;
+QCryptoBlockAmendOptions *amend_options = NULL;
+int ret;
+
+assert(crypto);
+assert(crypto->block);
+crypto->updating_keys = true;
+
+ret = bdrv_child_refresh_perms(bs, bs->file, errp);
+if (ret < 0) {
+goto cleanup;
+}
+
+cryptoopts = qemu_opts_to_qdict(opts, NULL);
+qdict_put_str(cryptoopts, "format", "luks");
+amend_options = block_crypto_amend_opts_init(cryptoopts, errp);
+if (!amend_options) {
+ret = -EINVAL;
+goto cleanup;
+}
+
+ret = qcrypto_block_amend_options(crypto->block,
+  block_crypto_read_func,
+  block_crypto_write_func,
+  bs,
+  amend_options,
+  force,
+  errp);
+cleanup:
+crypto->updating_keys = false;
+bdrv_child_refresh_perms(bs, bs->file, errp);
+qapi_free_QCryptoBlockAmendOptions(amend_options);
+qobject_unref(cryptoopts);
+return ret;
+}
+
+
+static void
+block_crypto_child_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+
+BlockCrypto *crypto = bs->opaque;
+
+bdrv_filter_default_perms(bs, c, rol

[PATCH v2 02/14] qcrypto/luks: implement encryption key management

2020-03-08 Thread Maxim Levitsky

Next few patches will expose that functionality
to the user.

Signed-off-by: Maxim Levitsky 
---
 crypto/block-luks.c | 398 +++-
 qapi/crypto.json|  61 ++-
 2 files changed, 455 insertions(+), 4 deletions(-)

diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 4861db810c..b11ee08c6d 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -32,6 +32,7 @@
 #include "qemu/uuid.h"
 
 #include "qemu/coroutine.h"
+#include "qemu/bitmap.h"
 
 /*
  * Reference for the LUKS format implemented here is
@@ -70,6 +71,9 @@ typedef struct QCryptoBlockLUKSKeySlot 
QCryptoBlockLUKSKeySlot;
 
 #define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL
 
+#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000
+#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40
+
 static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = {
 'L', 'U', 'K', 'S', 0xBA, 0xBE
 };
@@ -219,6 +223,9 @@ struct QCryptoBlockLUKS {
 
 /* Hash algorithm used in pbkdf2 function */
 QCryptoHashAlgorithm hash_alg;
+
+/* Name of the secret that was used to open the image */
+char *secret;
 };
 
 
@@ -1069,6 +1076,108 @@ qcrypto_block_luks_find_key(QCryptoBlock *block,
 return -1;
 }
 
+/*
+ * Returns true if a slot i is marked as active
+ * (contains encrypted copy of the master key)
+ */
+static bool
+qcrypto_block_luks_slot_active(const QCryptoBlockLUKS *luks,
+   unsigned int slot_idx)
+{
+uint32_t val = luks->header.key_slots[slot_idx].active;
+return val ==  QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
+}
+
+/*
+ * Returns the number of slots that are marked as active
+ * (slots that contain encrypted copy of the master key)
+ */
+static unsigned int
+qcrypto_block_luks_count_active_slots(const QCryptoBlockLUKS *luks)
+{
+size_t i = 0;
+unsigned int ret = 0;
+
+for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+if (qcrypto_block_luks_slot_active(luks, i)) {
+ret++;
+}
+}
+return ret;
+}
+
+/*
+ * Finds first key slot which is not active
+ * Returns the key slot index, or -1 if it doesn't exist
+ */
+static int
+qcrypto_block_luks_find_free_keyslot(const QCryptoBlockLUKS *luks)
+{
+size_t i;
+
+for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+if (!qcrypto_block_luks_slot_active(luks, i)) {
+return i;
+}
+}
+return -1;
+}
+
+/*
+ * Erases an keyslot given its index
+ * Returns:
+ *0 if the keyslot was erased successfully
+ *   -1 if a error occurred while erasing the keyslot
+ *
+ */
+static int
+qcrypto_block_luks_erase_key(QCryptoBlock *block,
+ unsigned int slot_idx,
+ QCryptoBlockWriteFunc writefunc,
+ void *opaque,
+ Error **errp)
+{
+QCryptoBlockLUKS *luks = block->opaque;
+QCryptoBlockLUKSKeySlot *slot = &luks->header.key_slots[slot_idx];
+g_autofree uint8_t *garbagesplitkey = NULL;
+size_t splitkeylen = luks->header.master_key_len * slot->stripes;
+size_t i;
+
+assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
+assert(splitkeylen > 0);
+garbagesplitkey = g_new0(uint8_t, splitkeylen);
+
+/* Reset the key slot header */
+memset(slot->salt, 0, QCRYPTO_BLOCK_LUKS_SALT_LEN);
+slot->iterations = 0;
+slot->active = QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED;
+
+qcrypto_block_luks_store_header(block,  writefunc, opaque, errp);
+/*
+ * Now try to erase the key material, even if the header
+ * update failed
+ */
+for (i = 0; i < QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS; i++) {
+if (qcrypto_random_bytes(garbagesplitkey, splitkeylen, errp) < 0) {
+/*
+ * If we failed to get the random data, still write
+ * at least zeros to the key slot at least once
+ */
+if (i > 0) {
+return -1;
+}
+}
+if (writefunc(block,
+  slot->key_offset_sector * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+  garbagesplitkey,
+  splitkeylen,
+  opaque,
+  errp) != splitkeylen) {
+return -1;
+}
+}
+return 0;
+}
 
 static int
 qcrypto_block_luks_open(QCryptoBlock *block,
@@ -1099,6 +1208,7 @@ qcrypto_block_luks_open(QCryptoBlock *block,
 
 luks = g_new0(QCryptoBlockLUKS, 1);
 block->opaque = luks;
+luks->secret = g_strdup(options->u.luks.key_secret);
 
 if (qcrypto_block_luks_load_header(block, readfunc, opaque, errp) < 0) {
 goto fail;
@@ -1164,6 +1274,7 @@ qcrypto_block_luks_open(QCryptoBlock *block,
  fail:
 qcrypto_block_free_cipher(block);
 qcrypto_ivgen_free(block->ivgen);
+g_free(luks

[PATCH v2 08/14] block/qcow2: extend qemu-img amend interface with crypto options

2020-03-08 Thread Maxim Levitsky

Now that we have all the infrastructure in place,
wire it in the qcow2 driver and expose this to the user.

Signed-off-by: Maxim Levitsky 
---
 block/qcow2.c  | 80 --
 tests/qemu-iotests/082.out | 45 +
 2 files changed, 114 insertions(+), 11 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 81e7895e7c..10b22544f2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -176,6 +176,19 @@ static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock 
*block, size_t offset,
 return ret;
 }
 
+static QDict*
+qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
+{
+QDict *cryptoopts_qdict;
+QDict *opts_qdict;
+
+/* Extract "encrypt." options into a qdict */
+opts_qdict = qemu_opts_to_qdict(opts, NULL);
+qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
+qobject_unref(opts_qdict);
+qdict_put_str(cryptoopts_qdict, "format", "luks");
+return cryptoopts_qdict;
+}
 
 /* 
  * read qcow2 extension and fill bs
@@ -4615,20 +4628,18 @@ static ssize_t 
qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
 static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
  Error **errp)
 {
-QDict *opts_qdict;
-QDict *cryptoopts_qdict;
 QCryptoBlockCreateOptions *cryptoopts;
+QDict *crypto_opts_dict;
 QCryptoBlock *crypto;
 
-/* Extract "encrypt." options into a qdict */
-opts_qdict = qemu_opts_to_qdict(opts, NULL);
-qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
-qobject_unref(opts_qdict);
+crypto_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp);
+if (!crypto_opts_dict) {
+return false;
+}
+
+cryptoopts = block_crypto_create_opts_init(crypto_opts_dict, errp);
+qobject_unref(crypto_opts_dict);
 
-/* Build QCryptoBlockCreateOptions object from qdict */
-qdict_put_str(cryptoopts_qdict, "format", "luks");
-cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
-qobject_unref(cryptoopts_qdict);
 if (!cryptoopts) {
 return false;
 }
@@ -5067,6 +5078,7 @@ typedef enum Qcow2AmendOperation {
 QCOW2_NO_OPERATION = 0,
 
 QCOW2_UPGRADING,
+QCOW2_UPDATING_ENCRYPTION,
 QCOW2_CHANGING_REFCOUNT_ORDER,
 QCOW2_DOWNGRADING,
 } Qcow2AmendOperation;
@@ -5148,6 +5160,7 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 int ret;
 QemuOptDesc *desc = opts->list->desc;
 Qcow2AmendHelperCBInfo helper_cb_info;
+bool encryption_update = false;
 
 while (desc && desc->name) {
 if (!qemu_opt_find(opts, desc->name)) {
@@ -5174,6 +5187,18 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
+} else if (g_str_has_prefix(desc->name, "encrypt.")) {
+if (!s->crypto) {
+error_setg(errp,
+   "Can't amend encryption options - encryption not 
present");
+return -EINVAL;
+}
+if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
+error_setg(errp,
+   "Only LUKS encryption options can be amended");
+return -ENOTSUP;
+}
+encryption_update = true;
 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
@@ -5216,7 +5241,8 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 .original_status_cb = status_cb,
 .original_cb_opaque = cb_opaque,
 .total_operations = (new_version != old_version)
-  + (s->refcount_bits != refcount_bits)
+  + (s->refcount_bits != refcount_bits) +
+(encryption_update == true)
 };
 
 /* Upgrade first (some features may require compat=1.1) */
@@ -5229,6 +5255,33 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 }
 }
 
+if (encryption_update) {
+QDict *amend_opts_dict;
+QCryptoBlockAmendOptions *amend_opts;
+
+helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION;
+amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp);
+if (!amend_opts_dict) {
+return -EINVAL;
+}
+amend_opts = block_crypto_amend_opts_init(a

[PATCH v2 10/14] iotests: qemu-img tests for luks key management

2020-03-08 Thread Maxim Levitsky

This commit adds two tests, which test the new amend interface
of both luks raw images and qcow2 luks encrypted images.

Signed-off-by: Maxim Levitsky 
---
 tests/qemu-iotests/300 | 207 +
 tests/qemu-iotests/300.out |  99 ++
 tests/qemu-iotests/301 |  90 
 tests/qemu-iotests/301.out |  30 ++
 tests/qemu-iotests/group   |   3 +
 5 files changed, 429 insertions(+)
 create mode 100755 tests/qemu-iotests/300
 create mode 100644 tests/qemu-iotests/300.out
 create mode 100755 tests/qemu-iotests/301
 create mode 100644 tests/qemu-iotests/301.out

diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
new file mode 100755
index 00..aa1a77690f
--- /dev/null
+++ b/tests/qemu-iotests/300
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+#
+# Test encryption key management with luks
+# Based on 134
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mlevi...@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2 luks
+_supported_proto file #TODO
+
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
+
+if [ "$IMGFMT" = "qcow2" ] ; then
+   PR="encrypt."
+   EXTRA_IMG_ARGS="-o encrypt.format=luks"
+fi
+
+
+# secrets: you are supposed to see the password as ***, see :-)
+S0="--object secret,id=sec0,data=hunter0"
+S1="--object secret,id=sec1,data=hunter1"
+S2="--object secret,id=sec2,data=hunter2"
+S3="--object secret,id=sec3,data=hunter3"
+S4="--object secret,id=sec4,data=hunter4"
+SECRETS="$S0 $S1 $S2 $S3 $S4"
+
+# image with given secret
+IMGS0="--image-opts 
driver=$IMGFMT,file.filename=$TEST_IMG,${PR}key-secret=sec0"
+IMGS1="--image-opts 
driver=$IMGFMT,file.filename=$TEST_IMG,${PR}key-secret=sec1"
+IMGS2="--image-opts 
driver=$IMGFMT,file.filename=$TEST_IMG,${PR}key-secret=sec2"
+IMGS3="--image-opts 
driver=$IMGFMT,file.filename=$TEST_IMG,${PR}key-secret=sec3"
+IMGS4="--image-opts 
driver=$IMGFMT,file.filename=$TEST_IMG,${PR}key-secret=sec4"
+
+
+echo "== creating a test image =="
+_make_test_img $S0 $EXTRA_IMG_ARGS -o ${PR}key-secret=sec0,${PR}iter-time=10 
32M
+
+echo
+echo "== test that key 0 opens the image =="
+$QEMU_IO $S0 -c "read 0 4096" $IMGS0 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== adding a password to slot 4 =="
+$QEMU_IMG amend $SECRETS $IMGS0 -o 
${PR}state=active,${PR}new-secret=sec4,${PR}iter-time=10,${PR}keyslot=4
+echo "== adding a password to slot 1 =="
+$QEMU_IMG amend $SECRETS $IMGS0 -o 
${PR}state=active,${PR}new-secret=sec1,${PR}iter-time=10
+echo "== adding a password to slot 3 =="
+$QEMU_IMG amend $SECRETS $IMGS1 -o 
${PR}state=active,${PR}new-secret=sec3,${PR}iter-time=10,${PR}keyslot=3
+
+echo "== adding a password to slot 2 =="
+$QEMU_IMG amend $SECRETS $IMGS3 -o 
${PR}state=active,${PR}new-secret=sec2,${PR}iter-time=10
+
+
+echo "== erase slot 4 =="
+$QEMU_IMG amend $SECRETS $IMGS1 -o ${PR}state=inactive,${PR}keyslot=4 | 
_filter_img_create
+
+
+echo
+echo "== all secrets should work =="
+for IMG in "$IMGS0" "$IMGS1" "$IMGS2" "$IMGS3"; do
+   $QEMU_IO $SECRETS -c "read 0 4096" $IMG | _filter_qemu_io | 
_filter_testdir
+done
+
+echo
+echo "== erase slot 0 and try it =="
+$QEMU_IMG amend $SECRETS $IMGS1 -o ${PR}state=inactive,${PR}old-secret=sec0 | 
_filter_img_create
+$QEMU_IO $SECRETS -c "read 0 4096" $IMGS0 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== erase slot 2 and try it =="
+$QEMU_IMG amend $SECRETS $IMGS1 -o ${PR}state=inactive,${PR}keyslot=2 | 
_filter_img_create
+$QEMU_IO $SECRETS -c "read 0 4096" $IMGS2 | _filter_qemu_io | _filter_testdir
+
+
+# at this point slots 1 and 3 should be active
+
+echo
+echo "== filling  4 slots with secret 2 =="
+for i in $(seq 0 3) ; do
+   $QEMU_IMG amend $SECRETS $IMGS3 -o

[PATCH v2 01/14] qcrypto/core: add generic infrastructure for crypto options amendment

2020-03-08 Thread Maxim Levitsky

This will be used first to implement luks keyslot management.

block_crypto_amend_opts_init will be used to convert
qemu-img cmdline to QCryptoBlockAmendOptions

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block/crypto.c | 17 +
 block/crypto.h |  3 +++
 crypto/block.c | 31 +++
 crypto/blockpriv.h |  8 
 include/crypto/block.h | 22 ++
 qapi/crypto.json   | 16 
 6 files changed, 97 insertions(+)

diff --git a/block/crypto.c b/block/crypto.c
index 24823835c1..ecf96a7a9b 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -184,6 +184,23 @@ block_crypto_create_opts_init(QDict *opts, Error **errp)
 return ret;
 }
 
+QCryptoBlockAmendOptions *
+block_crypto_amend_opts_init(QDict *opts, Error **errp)
+{
+Visitor *v;
+QCryptoBlockAmendOptions *ret;
+
+v = qobject_input_visitor_new_flat_confused(opts, errp);
+if (!v) {
+return NULL;
+}
+
+visit_type_QCryptoBlockAmendOptions(v, NULL, &ret, errp);
+
+visit_free(v);
+return ret;
+}
+
 
 static int block_crypto_open_generic(QCryptoBlockFormat format,
  QemuOptsList *opts_spec,
diff --git a/block/crypto.h b/block/crypto.h
index b935695e79..06e044c9be 100644
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -91,6 +91,9 @@
 QCryptoBlockCreateOptions *
 block_crypto_create_opts_init(QDict *opts, Error **errp);
 
+QCryptoBlockAmendOptions *
+block_crypto_amend_opts_init(QDict *opts, Error **errp);
+
 QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QDict *opts, Error **errp);
 
diff --git a/crypto/block.c b/crypto/block.c
index 325752871c..0ce67db641 100644
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -115,6 +115,37 @@ QCryptoBlock 
*qcrypto_block_create(QCryptoBlockCreateOptions *options,
 }
 
 
+int qcrypto_block_amend_options(QCryptoBlock *block,
+QCryptoBlockReadFunc readfunc,
+QCryptoBlockWriteFunc writefunc,
+void *opaque,
+QCryptoBlockAmendOptions *options,
+bool force,
+Error **errp)
+{
+if (options->format != block->format) {
+error_setg(errp,
+   "Cannot amend encryption format");
+return -1;
+}
+
+if (!block->driver->amend) {
+error_setg(errp,
+   "Crypto format %s doesn't support format options amendment",
+   QCryptoBlockFormat_str(block->format));
+return -1;
+}
+
+return block->driver->amend(block,
+readfunc,
+writefunc,
+opaque,
+options,
+force,
+errp);
+}
+
+
 QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
  Error **errp)
 {
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
index 71c59cb542..3c7ccea504 100644
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -62,6 +62,14 @@ struct QCryptoBlockDriver {
   void *opaque,
   Error **errp);
 
+int (*amend)(QCryptoBlock *block,
+ QCryptoBlockReadFunc readfunc,
+ QCryptoBlockWriteFunc writefunc,
+ void *opaque,
+ QCryptoBlockAmendOptions *options,
+ bool force,
+ Error **errp);
+
 int (*get_info)(QCryptoBlock *block,
 QCryptoBlockInfo *info,
 Error **errp);
diff --git a/include/crypto/block.h b/include/crypto/block.h
index d49d2c2da9..e4553cf33d 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h
@@ -144,6 +144,28 @@ QCryptoBlock 
*qcrypto_block_create(QCryptoBlockCreateOptions *options,
void *opaque,
Error **errp);
 
+/**
+ * qcrypto_block_amend_options:
+ * @block: the block encryption object
+ *
+ * @readfunc: callback for reading data from the volume header
+ * @writefunc: callback for writing data to the volume header
+ * @opaque: data to pass to @readfunc and @writefunc
+ * @options: the new/amended encryption options
+ * @force: hint for the driver to allow unsafe operation
+ * @errp: error pointer
+ *
+ * Changes the crypto options of the encryption format
+ *
+ */
+int qcrypto_block_amend_options(QCryptoBlock *block,
+QCryptoBlockReadFunc readfunc,
+QCryptoBlockWriteFunc writefunc,
+void *opaque,
+QCryptoBlockAmendOptions *options,
+bool force,
+Error

[PATCH v2 04/14] block/amend: separate amend and create options for qemu-img

2020-03-08 Thread Maxim Levitsky

Some options are only useful for creation
(or hard to be amended, like cluster size for qcow2), while some other
options are only useful for amend, like upcoming keyslot management
options for luks

Since currently only qcow2 supports amend, move all its options
to a common macro and then include it in each action option list.

In future it might be useful to remove some options which are
not supported anyway from amend list, which currently
cause an error message if amended.

Signed-off-by: Maxim Levitsky 
---
 block/qcow2.c | 160 +-
 include/block/block_int.h |   4 +
 qemu-img.c|  18 ++---
 3 files changed, 100 insertions(+), 82 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index b55e5b7c1f..9574085772 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5440,83 +5440,96 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool 
fatal, int64_t offset,
 s->signaled_corruption = true;
 }
 
+#define QCOW_COMMON_OPTIONS \
+{   \
+.name = BLOCK_OPT_SIZE, \
+.type = QEMU_OPT_SIZE,  \
+.help = "Virtual disk size" \
+},  \
+{   \
+.name = BLOCK_OPT_COMPAT_LEVEL, \
+.type = QEMU_OPT_STRING,\
+.help = "Compatibility level (v2 [0.10] or v3 [1.1])"   \
+},  \
+{   \
+.name = BLOCK_OPT_BACKING_FILE, \
+.type = QEMU_OPT_STRING,\
+.help = "File name of a base image" \
+},  \
+{   \
+.name = BLOCK_OPT_BACKING_FMT,  \
+.type = QEMU_OPT_STRING,\
+.help = "Image format of the base image"\
+},  \
+{   \
+.name = BLOCK_OPT_DATA_FILE,\
+.type = QEMU_OPT_STRING,\
+.help = "File name of an external data file"\
+},  \
+{   \
+.name = BLOCK_OPT_DATA_FILE_RAW,\
+.type = QEMU_OPT_BOOL,  \
+.help = "The external data file must stay valid "   \
+"as a raw image"\
+},  \
+{   \
+.name = BLOCK_OPT_ENCRYPT,  \
+.type = QEMU_OPT_BOOL,  \
+.help = "Encrypt the image with format 'aes'. (Deprecated " \
+"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",\
+},  \
+{   \
+.name = BLOCK_OPT_ENCRYPT_FORMAT,   \
+.type = QEMU_OPT_STRING,\
+.help = "Encrypt the image, format choices: 'aes', 'luks'", \
+},  \
+BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", \
+"ID of secret providing qcow AES key or LUKS passphrase"),  \
+BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),   \
+BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),  \
+BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),\
+BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),   \
+BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), \
+BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),\
+{   \
+.name = BLOCK_OPT_CLUSTER_SIZE,

[PATCH v2 00/14] LUKS: encryption slot management using amend interface

2020-03-08 Thread Maxim Levitsky

Hi!
Here is the updated series of my patches, incorporating all the feedback I 
received.

This implements the API interface that we agreed upon except that I merged the
LUKSKeyslotActive/LUKSKeyslotInactive union into a struct because otherwise
I need nested unions which are not supported currently by QAPI parser.
This didn't change the API and thus once support for nested unions is there,
it can always be implemented in backward compatible way.

I hope that this series will finally be considered for merging, since I am 
somewhat running
out of time to finish this task.

Patches are strictly divided by topic to 3 groups, and each group depends on 
former groups.

* Patches 1,2 implement qcrypto generic amend interface, including definition
  of structs used in crypto.json and implement this in luks crypto driver
  Nothing is exposed to the user at this stage

* Patches 3-9 use the code from patches 1,2 to implement qemu-img amend based 
encryption slot management
  for luks and for qcow2, and add a bunch of iotests to cover that.

* Patches 10-13 add x-blockdev-amend (I'll drop the -x prefix if you like), and 
wire it
  to luks and qcow2 driver to implement qmp based encryption slot management 
also using
  the code from patches 1,2, and also add a bunch of iotests to cover this.

Tested with -raw,-qcow2 and -luks iotests and 'make check'

Best regards,
    Maxim Levitsky

clone of "luks-keymgmnt-v2"

Maxim Levitsky (14):
  qcrypto/core: add generic infrastructure for crypto options amendment
  qcrypto/luks: implement encryption key management
  block/amend: add 'force' option
  block/amend: separate amend and create options for qemu-img
  block/amend: refactor qcow2 amend options
  block/crypto: rename two functions
  block/crypto: implement the encryption key management
  block/qcow2: extend qemu-img amend interface with crypto options
  iotests: filter few more luks specific create options
  iotests: qemu-img tests for luks key management
  block/core: add generic infrastructure for x-blockdev-amend qmp
command
  block/crypto: implement blockdev-amend
  block/qcow2: implement blockdev-amend
  iotests: add tests for blockdev-amend

 block.c  |   4 +-
 block/Makefile.objs  |   2 +-
 block/amend.c| 108 +
 block/crypto.c   | 203 ++--
 block/crypto.h   |  47 +++-
 block/qcow2.c| 314 ++--
 crypto/block-luks.c  | 398 ++-
 crypto/block.c   |  31 +++
 crypto/blockpriv.h   |   8 +
 docs/tools/qemu-img.rst  |   5 +-
 include/block/block.h|   1 +
 include/block/block_int.h|  24 +-
 include/crypto/block.h   |  22 ++
 qapi/block-core.json |  68 ++
 qapi/crypto.json |  75 +-
 qapi/job.json|   4 +-
 qemu-img-cmds.hx |   4 +-
 qemu-img.c   |  44 +++-
 tests/qemu-iotests/049.out   | 102 
 tests/qemu-iotests/061.out   |  12 +-
 tests/qemu-iotests/079.out   |  18 +-
 tests/qemu-iotests/082.out   | 176 --
 tests/qemu-iotests/085.out   |  38 +--
 tests/qemu-iotests/087.out   |   6 +-
 tests/qemu-iotests/115.out   |   2 +-
 tests/qemu-iotests/121.out   |   4 +-
 tests/qemu-iotests/125.out   | 192 +++
 tests/qemu-iotests/134.out   |   2 +-
 tests/qemu-iotests/144.out   |   4 +-
 tests/qemu-iotests/158.out   |   4 +-
 tests/qemu-iotests/182.out   |   2 +-
 tests/qemu-iotests/185.out   |   8 +-
 tests/qemu-iotests/188.out   |   2 +-
 tests/qemu-iotests/189.out   |   4 +-
 tests/qemu-iotests/198.out   |   4 +-
 tests/qemu-iotests/243.out   |  16 +-
 tests/qemu-iotests/250.out   |   2 +-
 tests/qemu-iotests/255.out   |   8 +-
 tests/qemu-iotests/263.out   |   4 +-
 tests/qemu-iotests/280.out   |   2 +-
 tests/qemu-iotests/284.out   |   6 +-
 tests/qemu-iotests/300   | 207 
 tests/qemu-iotests/300.out   |  99 
 tests/qemu-iotests/301   |  90 +++
 tests/qemu-iotests/301.out   |  30 +++
 tests/qemu-iotests/302   | 278 +
 tests/qemu-iotests/302.out   |  40 
 tests/qemu-iotests/303   | 233 ++
 tests/qemu-iotests/303.out   |  33 +++
 tests/qemu-iotests/common.filter |   6 +-
 tests/qemu-iotests/group |   6 +
 51 files changed, 2486 insertions(+), 516 deletions(-)
 create mode 100644 block/amend.c
 create mode 100755 tests/qemu-iotests/300
 create mode 100644 tests/qemu-iotests/300.out
 create mode 100755 tests/qemu-iotests/301
 create mode 100644 tests/qemu-iotests/301.out
 create mode 100755 tests/qemu-iotests/302
 create mode 100644 tests/qemu-iotests/302.out
 create mode 100755 tests/qemu-iotests/303
 c

Re: [PATCH v5 05/11] monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to block-hmp-cmds.c Moved code was added after 2012-01-13, thus under GPLv2+

2020-03-08 Thread Maxim Levitsky



I see that I have the same issue of long subject line here.
Its because I forgot the space after first line, when adding this.
If I need to resend another version of this patchset I'll fix this,
but otherwise maybe that can be fixed when applying this to one of maintainer's
trees.

Sorry for noise.

Best regards,
    Maxim Levitsky

On Sun, 2020-03-08 at 11:24 +0200, Maxim Levitsky wrote:
> Signed-off-by: Maxim Levitsky 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  block/monitor/block-hmp-cmds.c | 60 ++
>  include/block/block-hmp-cmds.h | 12 +--
>  include/monitor/hmp.h  |  2 --
>  monitor/hmp-cmds.c | 58 
>  4 files changed, 69 insertions(+), 63 deletions(-)
> 
> diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
> index ad727a6b08..d6dd5d97f7 100644
> --- a/block/monitor/block-hmp-cmds.c
> +++ b/block/monitor/block-hmp-cmds.c
> @@ -37,10 +37,12 @@
>  #include "qapi/qapi-commands-block.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/error.h"
> +#include "qapi/qmp/qerror.h"
>  #include "qemu/config-file.h"
>  #include "qemu/option.h"
>  #include "sysemu/sysemu.h"
>  #include "monitor/monitor.h"
> +#include "monitor/hmp.h"
>  #include "block/block_int.h"
>  #include "block/block-hmp-cmds.h"
>  
> @@ -187,3 +189,61 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
>  error_report("'commit' error for '%s': %s", device, strerror(-ret));
>  }
>  }
> +
> +void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
> +{
> +const char *filename = qdict_get_str(qdict, "target");
> +const char *format = qdict_get_try_str(qdict, "format");
> +bool reuse = qdict_get_try_bool(qdict, "reuse", false);
> +bool full = qdict_get_try_bool(qdict, "full", false);
> +Error *err = NULL;
> +DriveMirror mirror = {
> +.device = (char *)qdict_get_str(qdict, "device"),
> +.target = (char *)filename,
> +.has_format = !!format,
> +.format = (char *)format,
> +.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> +.has_mode = true,
> +.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
> NEW_IMAGE_MODE_ABSOLUTE_PATHS,
> +.unmap = true,
> +};
> +
> +if (!filename) {
> +error_setg(&err, QERR_MISSING_PARAMETER, "target");
> +hmp_handle_error(mon, err);
> +return;
> +}
> +qmp_drive_mirror(&mirror, &err);
> +hmp_handle_error(mon, err);
> +}
> +
> +void hmp_drive_backup(Monitor *mon, const QDict *qdict)
> +{
> +const char *device = qdict_get_str(qdict, "device");
> +const char *filename = qdict_get_str(qdict, "target");
> +const char *format = qdict_get_try_str(qdict, "format");
> +bool reuse = qdict_get_try_bool(qdict, "reuse", false);
> +bool full = qdict_get_try_bool(qdict, "full", false);
> +bool compress = qdict_get_try_bool(qdict, "compress", false);
> +Error *err = NULL;
> +DriveBackup backup = {
> +.device = (char *)device,
> +.target = (char *)filename,
> +.has_format = !!format,
> +.format = (char *)format,
> +.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> +.has_mode = true,
> +.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
> NEW_IMAGE_MODE_ABSOLUTE_PATHS,
> +.has_compress = !!compress,
> +.compress = compress,
> +};
> +
> +if (!filename) {
> +error_setg(&err, QERR_MISSING_PARAMETER, "target");
> +hmp_handle_error(mon, err);
> +return;
> +}
> +
> +qmp_drive_backup(&backup, &err);
> +hmp_handle_error(mon, err);
> +}
> diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
> index 30b0f56415..a64b737b3a 100644
> --- a/include/block/block-hmp-cmds.h
> +++ b/include/block/block-hmp-cmds.h
> @@ -3,10 +3,13 @@
>   *
>   * Copyright (c) 2003-2008 Fabrice Bellard
>   * Copyright (c) 2020 Red Hat, Inc.
> + * Copyright IBM, Corp. 2011
>   *
> - * This work is licensed under the terms of the GNU GPL, version 2.
> - * or (at your option) any later version.
> - * See the COPYING file in the top-level directory.
> + * Authors:
> + *  Anthony Liguori   
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
&

[PATCH v5 11/11] monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 30 
 blockdev.c | 43 +++---
 include/block/block_int.h  |  5 ++--
 3 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index aebf1dce0d..c3a6368dfc 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -56,6 +56,36 @@
 #include "block/block-hmp-cmds.h"
 #include "qemu-io.h"
 
+static void hmp_drive_add_node(Monitor *mon, const char *optstr)
+{
+QemuOpts *opts;
+QDict *qdict;
+Error *local_err = NULL;
+
+opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
+if (!opts) {
+return;
+}
+
+qdict = qemu_opts_to_qdict(opts, NULL);
+
+if (!qdict_get_try_str(qdict, "node-name")) {
+qobject_unref(qdict);
+error_report("'node-name' needs to be specified");
+goto out;
+}
+
+BlockDriverState *bs = bds_tree_init(qdict, &local_err);
+if (!bs) {
+error_report_err(local_err);
+goto out;
+}
+
+bdrv_set_monitor_owned(bs);
+out:
+qemu_opts_del(opts);
+}
+
 void hmp_drive_add(Monitor *mon, const QDict *qdict)
 {
 Error *err = NULL;
diff --git a/blockdev.c b/blockdev.c
index b38c247cdc..257cb37682 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -64,9 +64,14 @@
 #include "qemu/main-loop.h"
 #include "qemu/throttle-options.h"
 
-static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
+QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
 QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
 
+void bdrv_set_monitor_owned(BlockDriverState *bs)
+{
+QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+}
+
 static const char *const if_name[IF_COUNT] = {
 [IF_NONE] = "none",
 [IF_IDE] = "ide",
@@ -640,7 +645,7 @@ err_no_opts:
 }
 
 /* Takes the ownership of bs_opts */
-static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
 {
 int bdrv_flags = 0;
 
@@ -3719,38 +3724,6 @@ out:
 aio_context_release(aio_context);
 }
 
-
-void hmp_drive_add_node(Monitor *mon, const char *optstr)
-{
-QemuOpts *opts;
-QDict *qdict;
-Error *local_err = NULL;
-
-opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
-if (!opts) {
-return;
-}
-
-qdict = qemu_opts_to_qdict(opts, NULL);
-
-if (!qdict_get_try_str(qdict, "node-name")) {
-qobject_unref(qdict);
-error_report("'node-name' needs to be specified");
-goto out;
-}
-
-BlockDriverState *bs = bds_tree_init(qdict, &local_err);
-if (!bs) {
-error_report_err(local_err);
-goto out;
-}
-
-QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
-
-out:
-qemu_opts_del(opts);
-}
-
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
 BlockDriverState *bs;
@@ -3780,7 +3753,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error 
**errp)
 goto fail;
 }
 
-QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+bdrv_set_monitor_owned(bs);
 
 fail:
 visit_free(v);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f422c0bff0..3f70a98b2d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1216,8 +1216,6 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 BlockCompletionFunc *cb, void *opaque,
 JobTxn *txn, Error **errp);
 
-void hmp_drive_add_node(Monitor *mon, const char *optstr);
-
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
   const char *child_name,
   const BdrvChildRole *child_role,
@@ -1322,4 +1320,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, 
uint64_t src_offset,
 
 int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
 
+void bdrv_set_monitor_owned(BlockDriverState *bs);
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
+
 #endif /* BLOCK_INT_H */
-- 
2.17.2

[PATCH v5 10/11] monitor/hmp: move hmp_info_block* to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 389 +
 include/block/block-hmp-cmds.h |   4 +
 include/monitor/hmp.h  |   4 -
 monitor/hmp-cmds.c | 388 
 4 files changed, 393 insertions(+), 392 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 5beb7df2f7..aebf1dce0d 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -46,10 +46,12 @@
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/sockets.h"
+#include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
 #include "monitor/hmp.h"
 #include "block/nbd.h"
+#include "block/qapi.h"
 #include "block/block_int.h"
 #include "block/block-hmp-cmds.h"
 #include "qemu-io.h"
@@ -594,3 +596,390 @@ fail:
 blk_unref(local_blk);
 hmp_handle_error(mon, err);
 }
+
+static void print_block_info(Monitor *mon, BlockInfo *info,
+ BlockDeviceInfo *inserted, bool verbose)
+{
+ImageInfo *image_info;
+
+assert(!info || !info->has_inserted || info->inserted == inserted);
+
+if (info && *info->device) {
+monitor_printf(mon, "%s", info->device);
+if (inserted && inserted->has_node_name) {
+monitor_printf(mon, " (%s)", inserted->node_name);
+}
+} else {
+assert(info || inserted);
+monitor_printf(mon, "%s",
+   inserted && inserted->has_node_name ? 
inserted->node_name
+   : info && info->has_qdev ? info->qdev
+   : "");
+}
+
+if (inserted) {
+monitor_printf(mon, ": %s (%s%s%s)\n",
+   inserted->file,
+   inserted->drv,
+   inserted->ro ? ", read-only" : "",
+   inserted->encrypted ? ", encrypted" : "");
+} else {
+monitor_printf(mon, ": [not inserted]\n");
+}
+
+if (info) {
+if (info->has_qdev) {
+monitor_printf(mon, "Attached to:  %s\n", info->qdev);
+}
+if (info->has_io_status && info->io_status != 
BLOCK_DEVICE_IO_STATUS_OK) {
+monitor_printf(mon, "I/O status:   %s\n",
+   BlockDeviceIoStatus_str(info->io_status));
+}
+
+if (info->removable) {
+monitor_printf(mon, "Removable device: %slocked, tray %s\n",
+   info->locked ? "" : "not ",
+   info->tray_open ? "open" : "closed");
+}
+}
+
+
+if (!inserted) {
+return;
+}
+
+monitor_printf(mon, "Cache mode:   %s%s%s\n",
+   inserted->cache->writeback ? "writeback" : "writethrough",
+   inserted->cache->direct ? ", direct" : "",
+   inserted->cache->no_flush ? ", ignore flushes" : "");
+
+if (inserted->has_backing_file) {
+monitor_printf(mon,
+   "Backing file: %s "
+   "(chain depth: %" PRId64 ")\n",
+   inserted->backing_file,
+   inserted->backing_file_depth);
+}
+
+if (inserted->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF) {
+monitor_printf(mon, "Detect zeroes:%s\n",
+BlockdevDetectZeroesOptions_str(inserted->detect_zeroes));
+}
+
+if (inserted->bps  || inserted->bps_rd  || inserted->bps_wr  ||
+inserted->iops || inserted->iops_rd || inserted->iops_wr)
+{
+monitor_printf(mon, "I/O throttling:   bps=%" PRId64
+" bps_rd=%" PRId64  " bps_wr=%" PRId64
+" bps_max=%" PRId64
+" bps_rd_max=%" PRId64
+" bps_wr_max=%" PRId64
+" iops=%" PRId64 " iops_rd=%" PRId64
+" iops_wr=%" PRId64
+" iops_max=%" PRId64
+" iops_rd_max=%" PRId64
+" iops_wr_max=%" PRId64
+" iops_size=%" PRId64
+" group=%s\n",
+inserted->

[PATCH v5 09/11] monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 140 +
 include/block/block-hmp-cmds.h |   9 +++
 include/monitor/hmp.h  |   6 --
 monitor/hmp-cmds.c | 137 
 4 files changed, 149 insertions(+), 143 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 188374abbc..5beb7df2f7 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -52,6 +52,7 @@
 #include "block/nbd.h"
 #include "block/block_int.h"
 #include "block/block-hmp-cmds.h"
+#include "qemu-io.h"
 
 void hmp_drive_add(Monitor *mon, const QDict *qdict)
 {
@@ -454,3 +455,142 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
 qmp_nbd_server_stop(&err);
 hmp_handle_error(mon, err);
 }
+
+void hmp_block_resize(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+int64_t size = qdict_get_int(qdict, "size");
+Error *err = NULL;
+
+qmp_block_resize(true, device, false, NULL, size, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_block_stream(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+const char *base = qdict_get_try_str(qdict, "base");
+int64_t speed = qdict_get_try_int(qdict, "speed", 0);
+
+qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
+ false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+ BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
+ &error);
+
+hmp_handle_error(mon, error);
+}
+
+void hmp_block_passwd(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *password = qdict_get_str(qdict, "password");
+Error *err = NULL;
+
+qmp_block_passwd(true, device, false, NULL, password, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
+{
+Error *err = NULL;
+char *device = (char *) qdict_get_str(qdict, "device");
+BlockIOThrottle throttle = {
+.bps = qdict_get_int(qdict, "bps"),
+.bps_rd = qdict_get_int(qdict, "bps_rd"),
+.bps_wr = qdict_get_int(qdict, "bps_wr"),
+.iops = qdict_get_int(qdict, "iops"),
+.iops_rd = qdict_get_int(qdict, "iops_rd"),
+.iops_wr = qdict_get_int(qdict, "iops_wr"),
+};
+
+/*
+ * qmp_block_set_io_throttle has separate parameters for the
+ * (deprecated) block device name and the qdev ID but the HMP
+ * version has only one, so we must decide which one to pass.
+ */
+if (blk_by_name(device)) {
+throttle.has_device = true;
+throttle.device = device;
+} else {
+throttle.has_id = true;
+throttle.id = device;
+}
+
+qmp_block_set_io_throttle(&throttle, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_eject(Monitor *mon, const QDict *qdict)
+{
+bool force = qdict_get_try_bool(qdict, "force", false);
+const char *device = qdict_get_str(qdict, "device");
+Error *err = NULL;
+
+qmp_eject(true, device, false, NULL, true, force, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_qemu_io(Monitor *mon, const QDict *qdict)
+{
+BlockBackend *blk;
+BlockBackend *local_blk = NULL;
+bool qdev = qdict_get_try_bool(qdict, "qdev", false);
+const char *device = qdict_get_str(qdict, "device");
+const char *command = qdict_get_str(qdict, "command");
+Error *err = NULL;
+int ret;
+
+if (qdev) {
+blk = blk_by_qdev_id(device, &err);
+if (!blk) {
+goto fail;
+}
+} else {
+blk = blk_by_name(device);
+if (!blk) {
+BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
+if (bs) {
+blk = local_blk = blk_new(bdrv_get_aio_context(bs),
+  0, BLK_PERM_ALL);
+ret = blk_insert_bs(blk, bs, &err);
+if (ret < 0) {
+goto fail;
+}
+} else {
+goto fail;
+}
+}
+}
+
+/*
+ * Notably absent: Proper permission management. This is sad, but it seems
+ * almost impossible to achieve without changing the semantics and thereby
+ * limiting the use cases of the qemu-io HMP command.
+ *
+ * In an ideal world we would unconditionally create a new BlockBackend for
+ * qemuio_command(), but we have commands like 'reopen' and want th

[PATCH v5 05/11] monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to block-hmp-cmds.c Moved code was added after 2012-01-13, thus under GPLv2+

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 60 ++
 include/block/block-hmp-cmds.h | 12 +--
 include/monitor/hmp.h  |  2 --
 monitor/hmp-cmds.c | 58 
 4 files changed, 69 insertions(+), 63 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ad727a6b08..d6dd5d97f7 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -37,10 +37,12 @@
 #include "qapi/qapi-commands-block.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "block/block_int.h"
 #include "block/block-hmp-cmds.h"
 
@@ -187,3 +189,61 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
 error_report("'commit' error for '%s': %s", device, strerror(-ret));
 }
 }
+
+void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
+{
+const char *filename = qdict_get_str(qdict, "target");
+const char *format = qdict_get_try_str(qdict, "format");
+bool reuse = qdict_get_try_bool(qdict, "reuse", false);
+bool full = qdict_get_try_bool(qdict, "full", false);
+Error *err = NULL;
+DriveMirror mirror = {
+.device = (char *)qdict_get_str(qdict, "device"),
+.target = (char *)filename,
+.has_format = !!format,
+.format = (char *)format,
+.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
+.has_mode = true,
+.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
NEW_IMAGE_MODE_ABSOLUTE_PATHS,
+.unmap = true,
+};
+
+if (!filename) {
+error_setg(&err, QERR_MISSING_PARAMETER, "target");
+hmp_handle_error(mon, err);
+return;
+}
+qmp_drive_mirror(&mirror, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_drive_backup(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *filename = qdict_get_str(qdict, "target");
+const char *format = qdict_get_try_str(qdict, "format");
+bool reuse = qdict_get_try_bool(qdict, "reuse", false);
+bool full = qdict_get_try_bool(qdict, "full", false);
+bool compress = qdict_get_try_bool(qdict, "compress", false);
+Error *err = NULL;
+DriveBackup backup = {
+.device = (char *)device,
+.target = (char *)filename,
+.has_format = !!format,
+.format = (char *)format,
+.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
+.has_mode = true,
+.mode = reuse ? NEW_IMAGE_MODE_EXISTING : 
NEW_IMAGE_MODE_ABSOLUTE_PATHS,
+.has_compress = !!compress,
+.compress = compress,
+};
+
+if (!filename) {
+error_setg(&err, QERR_MISSING_PARAMETER, "target");
+hmp_handle_error(mon, err);
+return;
+}
+
+qmp_drive_backup(&backup, &err);
+hmp_handle_error(mon, err);
+}
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
index 30b0f56415..a64b737b3a 100644
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -3,10 +3,13 @@
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
  * Copyright (c) 2020 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
  *
- * This work is licensed under the terms of the GNU GPL, version 2.
- * or (at your option) any later version.
- * See the COPYING file in the top-level directory.
+ * Authors:
+ *  Anthony Liguori   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
  */
 
 #ifndef BLOCK_HMP_COMMANDS_H
@@ -17,4 +20,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict);
 void hmp_commit(Monitor *mon, const QDict *qdict);
 void hmp_drive_del(Monitor *mon, const QDict *qdict);
 
+void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
+void hmp_drive_backup(Monitor *mon, const QDict *qdict);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 3d329853b2..c1b363ee57 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -64,8 +64,6 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
-void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
-void hmp_drive_backup(Monitor *mon, const QDict *qdict);
 void hmp_loadvm(Monitor *mon, const QDict *qdict);
 void

[PATCH v5 07/11] monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

hmp_snapshot_blkdev is from GPLv2 version of the hmp-cmds.c thus
have to change the licence to GPLv2

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 58 --
 include/block/block-hmp-cmds.h |  4 +++
 include/monitor/hmp.h  |  3 --
 monitor/hmp-cmds.c | 47 ---
 4 files changed, 60 insertions(+), 52 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 8e8288c2f1..0131be8ecf 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1,10 +1,15 @@
 /*
  * Blockdev HMP commands
  *
+ *  Authors:
+ *  Anthony Liguori   
+ *
  * Copyright (c) 2003-2008 Fabrice Bellard
  *
- * This work is licensed under the terms of the GNU GPL, version 2 or
- * later.  See the COPYING file in the top-level directory.
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
  *
  * This file incorporates work covered by the following copyright and
  * permission notice:
@@ -299,3 +304,52 @@ void hmp_block_job_complete(Monitor *mon, const QDict 
*qdict)
 
 hmp_handle_error(mon, error);
 }
+
+void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *filename = qdict_get_try_str(qdict, "snapshot-file");
+const char *format = qdict_get_try_str(qdict, "format");
+bool reuse = qdict_get_try_bool(qdict, "reuse", false);
+enum NewImageMode mode;
+Error *err = NULL;
+
+if (!filename) {
+/*
+ * In the future, if 'snapshot-file' is not specified, the snapshot
+ * will be taken internally. Today it's actually required.
+ */
+error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file");
+hmp_handle_error(mon, err);
+return;
+}
+
+mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+qmp_blockdev_snapshot_sync(true, device, false, NULL,
+   filename, false, NULL,
+   !!format, format,
+   true, mode, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *name = qdict_get_str(qdict, "name");
+Error *err = NULL;
+
+qmp_blockdev_snapshot_internal_sync(device, name, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *name = qdict_get_str(qdict, "name");
+const char *id = qdict_get_try_str(qdict, "id");
+Error *err = NULL;
+
+qmp_blockdev_snapshot_delete_internal_sync(device, !!id, id,
+   true, name, &err);
+hmp_handle_error(mon, err);
+}
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
index fcdf1eec48..cc81779c7c 100644
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -29,4 +29,8 @@ void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
 void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
 void hmp_block_job_complete(Monitor *mon, const QDict *qdict);
 
+void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 592ce0ccfe..6d34e29bb6 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -61,9 +61,6 @@ void hmp_set_link(Monitor *mon, const QDict *qdict);
 void hmp_block_passwd(Monitor *mon, const QDict *qdict);
 void hmp_balloon(Monitor *mon, const QDict *qdict);
 void hmp_block_resize(Monitor *mon, const QDict *qdict);
-void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
-void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict);
-void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_loadvm(Monitor *mon, const QDict *qdict);
 void hmp_savevm(Monitor *mon, const QDict *qdict);
 void hmp_delvm(Monitor *mon, const QDict *qdict);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index ac90a9e0c6..74e6e5b7ef 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1342,53 +1342,6 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, err);
 }
 
-void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
-{
-const char *devi

[PATCH v5 08/11] monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 101 +
 include/block/block-hmp-cmds.h |   5 ++
 include/monitor/hmp.h  |   4 --
 monitor/hmp-cmds.c | 100 
 4 files changed, 106 insertions(+), 104 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 0131be8ecf..188374abbc 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -45,9 +45,11 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
+#include "qemu/sockets.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
 #include "monitor/hmp.h"
+#include "block/nbd.h"
 #include "block/block_int.h"
 #include "block/block-hmp-cmds.h"
 
@@ -353,3 +355,102 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, 
const QDict *qdict)
true, name, &err);
 hmp_handle_error(mon, err);
 }
+
+void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
+{
+const char *uri = qdict_get_str(qdict, "uri");
+bool writable = qdict_get_try_bool(qdict, "writable", false);
+bool all = qdict_get_try_bool(qdict, "all", false);
+Error *local_err = NULL;
+BlockInfoList *block_list, *info;
+SocketAddress *addr;
+BlockExportNbd export;
+
+if (writable && !all) {
+error_setg(&local_err, "-w only valid together with -a");
+goto exit;
+}
+
+/* First check if the address is valid and start the server.  */
+addr = socket_parse(uri, &local_err);
+if (local_err != NULL) {
+goto exit;
+}
+
+nbd_server_start(addr, NULL, NULL, &local_err);
+qapi_free_SocketAddress(addr);
+if (local_err != NULL) {
+goto exit;
+}
+
+if (!all) {
+return;
+}
+
+/* Then try adding all block devices.  If one fails, close all and
+ * exit.
+ */
+block_list = qmp_query_block(NULL);
+
+for (info = block_list; info; info = info->next) {
+if (!info->value->has_inserted) {
+continue;
+}
+
+export = (BlockExportNbd) {
+.device = info->value->device,
+.has_writable   = true,
+.writable   = writable,
+};
+
+qmp_nbd_server_add(&export, &local_err);
+
+if (local_err != NULL) {
+qmp_nbd_server_stop(NULL);
+break;
+}
+}
+
+qapi_free_BlockInfoList(block_list);
+
+exit:
+hmp_handle_error(mon, local_err);
+}
+
+void hmp_nbd_server_add(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *name = qdict_get_try_str(qdict, "name");
+bool writable = qdict_get_try_bool(qdict, "writable", false);
+Error *local_err = NULL;
+
+BlockExportNbd export = {
+.device = (char *) device,
+.has_name   = !!name,
+.name   = (char *) name,
+.has_writable   = true,
+.writable   = writable,
+};
+
+qmp_nbd_server_add(&export, &local_err);
+hmp_handle_error(mon, local_err);
+}
+
+void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict)
+{
+const char *name = qdict_get_str(qdict, "name");
+bool force = qdict_get_try_bool(qdict, "force", false);
+Error *err = NULL;
+
+/* Rely on NBD_SERVER_REMOVE_MODE_SAFE being the default */
+qmp_nbd_server_remove(name, force, NBD_SERVER_REMOVE_MODE_HARD, &err);
+hmp_handle_error(mon, err);
+}
+
+void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
+{
+Error *err = NULL;
+
+qmp_nbd_server_stop(&err);
+hmp_handle_error(mon, err);
+}
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
index cc81779c7c..50ff802598 100644
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -33,4 +33,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
 
+void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
+void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
+void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict);
+void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 6d34e29bb6..736a969131 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -94,10 +94,6 @@ void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const

[PATCH v5 06/11] monitor/hmp: move hmp_block_job* to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 52 ++
 include/block/block-hmp-cmds.h |  6 
 include/monitor/hmp.h  |  5 
 monitor/hmp-cmds.c | 52 --
 4 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d6dd5d97f7..8e8288c2f1 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -247,3 +247,55 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 qmp_drive_backup(&backup, &err);
 hmp_handle_error(mon, err);
 }
+
+void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+int64_t value = qdict_get_int(qdict, "speed");
+
+qmp_block_job_set_speed(device, value, &error);
+
+hmp_handle_error(mon, error);
+}
+
+void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+bool force = qdict_get_try_bool(qdict, "force", false);
+
+qmp_block_job_cancel(device, true, force, &error);
+
+hmp_handle_error(mon, error);
+}
+
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+
+qmp_block_job_pause(device, &error);
+
+hmp_handle_error(mon, error);
+}
+
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+
+qmp_block_job_resume(device, &error);
+
+hmp_handle_error(mon, error);
+}
+
+void hmp_block_job_complete(Monitor *mon, const QDict *qdict)
+{
+Error *error = NULL;
+const char *device = qdict_get_str(qdict, "device");
+
+qmp_block_job_complete(device, &error);
+
+hmp_handle_error(mon, error);
+}
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
index a64b737b3a..fcdf1eec48 100644
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -23,4 +23,10 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict);
 void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_drive_backup(Monitor *mon, const QDict *qdict);
 
+void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
+void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
+void hmp_block_job_complete(Monitor *mon, const QDict *qdict);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index c1b363ee57..592ce0ccfe 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -87,11 +87,6 @@ void hmp_eject(Monitor *mon, const QDict *qdict);
 void hmp_change(Monitor *mon, const QDict *qdict);
 void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
-void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
-void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
-void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
-void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
-void hmp_block_job_complete(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_add(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 06f0cb4bb9..ac90a9e0c6 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1997,58 +1997,6 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, error);
 }
 
-void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict)
-{
-Error *error = NULL;
-const char *device = qdict_get_str(qdict, "device");
-int64_t value = qdict_get_int(qdict, "speed");
-
-qmp_block_job_set_speed(device, value, &error);
-
-hmp_handle_error(mon, error);
-}
-
-void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
-{
-Error *error = NULL;
-const char *device = qdict_get_str(qdict, "device");
-bool force = qdict_get_try_bool(qdict, "force", false);
-
-qmp_block_job_cancel(device, true, force, &error);
-
-hmp_handle_error(mon, error);
-}
-
-void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
-{
-Error *error = NULL;
-const char *device = qdict_get_str(qdict, "device");
-
-qmp_block_job_pause(device, &error);
-
-hmp_handle_error(mon, error);
-}
-
-void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
-{
-Error *error = NULL;
-const char *device = qdict_get_str(qdict, "device&q

[PATCH v5 03/11] monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

These days device-hotplug.c only contains the hmp_drive_add
In the next patch, rest of hmp_drive* functions will be moved
there.

Also add block-hmp-cmds.h to contain prototypes of these
functions

License for block-hmp-cmds.h since it contains the code
moved from sysemu.h which lacks license and thus according
to LICENSE is under GPLv2+


Signed-off-by: Maxim Levitsky 
Reviewed-by: Markus Armbruster 
---
 MAINTAINERS  |  1 +
 Makefile.objs|  2 +-
 block/Makefile.objs  |  1 +
 block/monitor/Makefile.objs  |  1 +
 .../monitor/block-hmp-cmds.c |  3 ++-
 include/block/block-hmp-cmds.h   | 16 
 include/sysemu/sysemu.h  |  3 ---
 monitor/misc.c   |  1 +
 8 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 block/monitor/Makefile.objs
 rename device-hotplug.c => block/monitor/block-hmp-cmds.c (97%)
 create mode 100644 include/block/block-hmp-cmds.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 36d0c6887a..d881ba7d9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1920,6 +1920,7 @@ Block QAPI, monitor, command line
 M: Markus Armbruster 
 S: Supported
 F: blockdev.c
+F: blockdev-hmp-cmds.c
 F: block/qapi.c
 F: qapi/block*.json
 F: qapi/transaction.json
diff --git a/Makefile.objs b/Makefile.objs
index e288663d89..40d3a1696c 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,7 +48,7 @@ common-obj-y += dump/
 common-obj-y += job-qmp.o
 common-obj-y += monitor/
 common-obj-y += net/
-common-obj-y += qdev-monitor.o device-hotplug.o
+common-obj-y += qdev-monitor.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
 
diff --git a/block/Makefile.objs b/block/Makefile.objs
index cb36ae2503..3635b6b4c1 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -45,6 +45,7 @@ block-obj-y += crypto.o
 block-obj-y += aio_task.o
 block-obj-y += backup-top.o
 block-obj-y += filter-compress.o
+common-obj-y += monitor/
 
 block-obj-y += stream.o
 
diff --git a/block/monitor/Makefile.objs b/block/monitor/Makefile.objs
new file mode 100644
index 00..0a74f9a8b5
--- /dev/null
+++ b/block/monitor/Makefile.objs
@@ -0,0 +1 @@
+common-obj-y += block-hmp-cmds.o
diff --git a/device-hotplug.c b/block/monitor/block-hmp-cmds.c
similarity index 97%
rename from device-hotplug.c
rename to block/monitor/block-hmp-cmds.c
index 554e4d98db..bcf35b4b44 100644
--- a/device-hotplug.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1,5 +1,5 @@
 /*
- * QEMU device hotplug helpers
+ * Blockdev HMP commands
  *
  * Copyright (c) 2004 Fabrice Bellard
  *
@@ -33,6 +33,7 @@
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
 #include "block/block_int.h"
+#include "block/block-hmp-cmds.h"
 
 
 void hmp_drive_add(Monitor *mon, const QDict *qdict)
diff --git a/include/block/block-hmp-cmds.h b/include/block/block-hmp-cmds.h
new file mode 100644
index 00..0db8a889a1
--- /dev/null
+++ b/include/block/block-hmp-cmds.h
@@ -0,0 +1,16 @@
+/*
+ * HMP commands related to the block layer
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * or (at your option) any later version.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BLOCK_HMP_COMMANDS_H
+#define BLOCK_HMP_COMMANDS_H
+
+void hmp_drive_add(Monitor *mon, const QDict *qdict);
+
+#endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 479d90bcea..ef81302e1a 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -63,9 +63,6 @@ extern int nb_option_roms;
 extern const char *prom_envs[MAX_PROM_ENVS];
 extern unsigned int nb_prom_envs;
 
-/* generic hotplug */
-void hmp_drive_add(Monitor *mon, const QDict *qdict);
-
 /* pcie aer error injection */
 void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict);
 
diff --git a/monitor/misc.c b/monitor/misc.c
index 1748ab3911..c3bc34c099 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -66,6 +66,7 @@
 #include "qemu/option.h"
 #include "qemu/thread.h"
 #include "block/qapi.h"
+#include "block/block-hmp-cmds.h"
 #include "qapi/qapi-commands-char.h"
 #include "qapi/qapi-commands-control.h"
 #include "qapi/qapi-commands-migration.h"
-- 
2.17.2

[PATCH v5 04/11] monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Dr. David Alan Gilbert 
---
 block/monitor/block-hmp-cmds.c | 108 -
 blockdev.c |  96 +
 include/block/block-hmp-cmds.h |   4 ++
 include/sysemu/blockdev.h  |   4 --
 4 files changed, 111 insertions(+), 101 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index bcf35b4b44..ad727a6b08 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -1,7 +1,15 @@
 /*
  * Blockdev HMP commands
  *
- * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
@@ -26,6 +34,7 @@
 #include "hw/boards.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
+#include "qapi/qapi-commands-block.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/error.h"
 #include "qemu/config-file.h"
@@ -35,7 +44,6 @@
 #include "block/block_int.h"
 #include "block/block-hmp-cmds.h"
 
-
 void hmp_drive_add(Monitor *mon, const QDict *qdict)
 {
 Error *err = NULL;
@@ -83,3 +91,99 @@ err:
 blk_unref(blk);
 }
 }
+
+void hmp_drive_del(Monitor *mon, const QDict *qdict)
+{
+const char *id = qdict_get_str(qdict, "id");
+BlockBackend *blk;
+BlockDriverState *bs;
+AioContext *aio_context;
+Error *local_err = NULL;
+
+bs = bdrv_find_node(id);
+if (bs) {
+qmp_blockdev_del(id, &local_err);
+if (local_err) {
+error_report_err(local_err);
+}
+return;
+}
+
+blk = blk_by_name(id);
+if (!blk) {
+error_report("Device '%s' not found", id);
+return;
+}
+
+if (!blk_legacy_dinfo(blk)) {
+error_report("Deleting device added with blockdev-add"
+ " is not supported");
+return;
+}
+
+aio_context = blk_get_aio_context(blk);
+aio_context_acquire(aio_context);
+
+bs = blk_bs(blk);
+if (bs) {
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
+error_report_err(local_err);
+aio_context_release(aio_context);
+return;
+}
+
+blk_remove_bs(blk);
+}
+
+/* Make the BlockBackend and the attached BlockDriverState anonymous */
+monitor_remove_blk(blk);
+
+/*
+ * If this BlockBackend has a device attached to it, its refcount will be
+ * decremented when the device is removed; otherwise we have to do so here.
+ */
+if (blk_get_attached_dev(blk)) {
+/* Further I/O must not pause the guest */
+blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT);
+} else {
+blk_unref(blk);
+}
+
+aio_context_release(aio_context);
+}
+
+void hmp_commit(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+BlockBackend *blk;
+int ret;
+
+if (!strcmp(device, "all")) {
+ret = blk_commit_all();
+} else {
+BlockDriverState *bs;
+AioContext *aio_context;
+
+blk = blk_by_name(device);
+if (!blk) {
+error_report("Device '%s' not found", device);
+return;
+}
+if (!blk_is_available(blk)) {
+error_report("Device '%s' has no medium", device);
+return;
+}
+
+bs = blk_bs(blk);
+aio_context = bdrv_get_aio_context(bs);
+aio_context_acquire(aio_context);
+
+ret = bdrv_commit(bs);
+
+aio_context_release(aio_context);
+}
+if (ret < 0) {
+error_report("'commit' error for '%s': %s", device, strerror(-ret));
+}
+}
diff --git a/blockdev.c b/blockdev.c
index 3e44fa766b..b38c247cdc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1039,41 +1039,6 @@ static BlockDriverState *qmp_get_root_bs(const char 
*name, Error **errp)
 return bs;
 }
 
-void hmp_commit(Monitor *mon, const QDict *qdict)
-{
-const char *device = qdict_get_str(qdict, "device");
-BlockBackend *blk;
-int ret;
-
-if (!strcmp(device, "all")) {
-ret = blk_commit_all();
-} else {
-BlockDriverState *bs;
-AioContext *aio_context;
-
-blk = blk_by_name(device);
-if (!blk) {
-

[PATCH v5 02/11] monitor/hmp: inline add_init_drive

2020-03-08 Thread Maxim Levitsky

This function is only used by hmp_drive_add.
The code is just a bit shorter this way.

No functional changes

Signed-off-by: Maxim Levitsky 
Reviewed-by: Markus Armbruster 
---
 device-hotplug.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/device-hotplug.c b/device-hotplug.c
index f01d53774b..554e4d98db 100644
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -34,42 +34,35 @@
 #include "monitor/monitor.h"
 #include "block/block_int.h"
 
-static DriveInfo *add_init_drive(const char *optstr)
+
+void hmp_drive_add(Monitor *mon, const QDict *qdict)
 {
 Error *err = NULL;
 DriveInfo *dinfo;
 QemuOpts *opts;
 MachineClass *mc;
+const char *optstr = qdict_get_str(qdict, "opts");
+bool node = qdict_get_try_bool(qdict, "node", false);
+
+if (node) {
+hmp_drive_add_node(mon, optstr);
+return;
+}
 
 opts = drive_def(optstr);
 if (!opts)
-return NULL;
+return;
 
 mc = MACHINE_GET_CLASS(current_machine);
 dinfo = drive_new(opts, mc->block_default_type, &err);
 if (err) {
 error_report_err(err);
 qemu_opts_del(opts);
-return NULL;
-}
-
-return dinfo;
-}
-
-void hmp_drive_add(Monitor *mon, const QDict *qdict)
-{
-DriveInfo *dinfo = NULL;
-const char *opts = qdict_get_str(qdict, "opts");
-bool node = qdict_get_try_bool(qdict, "node", false);
-
-if (node) {
-hmp_drive_add_node(mon, opts);
-return;
+goto err;
 }
 
-dinfo = add_init_drive(opts);
 if (!dinfo) {
-goto err;
+return;
 }
 
 switch (dinfo->type) {
-- 
2.17.2

[PATCH v5 00/11] HMP monitor handlers refactoring

2020-03-08 Thread Maxim Levitsky

This patch series is bunch of cleanups to the hmp monitor code.
It mostly moves the blockdev related hmp handlers to its own file,
and does some minor refactoring.

No functional changes expected.

Changes from V1:
   * move the handlers to block/monitor/block-hmp-cmds.c
   * tiny cleanup for the commit messages

Changes from V2:
   * Moved all the function prototypes to new header (blockdev-hmp-cmds.h)
   * Set the license of blockdev-hmp-cmds.c to GPLv2+
   * Moved hmp_snapshot_* functions to blockdev-hmp-cmds.c
   * Moved hmp_drive_add_node to blockdev-hmp-cmds.c
 (this change needed some new exports, thus in separate new patch)
   * Moved hmp_qemu_io and hmp_eject to blockdev-hmp-cmds.c
   * Added 'error:' prefix to vreport, and updated the iotests
 This is invasive change, but really feels like the right one
   * Added minor refactoring patch that drops an unused #include

Changes from V3:
   * Dropped the error prefix patches for now due to fact that it seems
 that libvirt doesn't need that after all. Oh well...
 I'll send them in a separate series.

   * Hopefully correctly merged the copyright info the new files
 Both files are GPLv2 now (due to code from hmp.h/hmp-cmds.c)

   * Addressed review feedback
   * Renamed the added header to block-hmp-cmds.h

   * Got rid of checkpatch.pl warnings in the moved code
 (cosmetic code changes only)

   * I kept the reviewed-by tags, since the changes I did are minor.
 I hope that this is right thing to do.

Changes from V4:
   * Rebase with recent changes
   * Fixed review feedback

Best regards,
    Maxim Levitsky

Maxim Levitsky (11):
  usb/dev-storage: remove unused include
  monitor/hmp: inline add_init_drive
  monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c
  monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c
  monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to
block-hmp-cmds.c Moved code was added after 2012-01-13, thus under
GPLv2+
  monitor/hmp: move hmp_block_job* to block-hmp-cmds.c
  monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c
  monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c
  monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c
  monitor/hmp: move hmp_info_block* to block-hmp-cmds.c
  monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c

 MAINTAINERS|1 +
 Makefile.objs  |2 +-
 block/Makefile.objs|1 +
 block/monitor/Makefile.objs|1 +
 block/monitor/block-hmp-cmds.c | 1015 
 blockdev.c |  137 +
 device-hotplug.c   |   91 ---
 hw/usb/dev-storage.c   |1 -
 include/block/block-hmp-cmds.h |   54 ++
 include/block/block_int.h  |5 +-
 include/monitor/hmp.h  |   24 -
 include/sysemu/blockdev.h  |4 -
 include/sysemu/sysemu.h|3 -
 monitor/hmp-cmds.c |  782 
 monitor/misc.c |1 +
 15 files changed, 1085 insertions(+), 1037 deletions(-)
 create mode 100644 block/monitor/Makefile.objs
 create mode 100644 block/monitor/block-hmp-cmds.c
 delete mode 100644 device-hotplug.c
 create mode 100644 include/block/block-hmp-cmds.h

-- 
2.17.2

[PATCH v5 01/11] usb/dev-storage: remove unused include

2020-03-08 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/usb/dev-storage.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 90da008df1..5629213d55 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -19,7 +19,6 @@
 #include "hw/scsi/scsi.h"
 #include "ui/console.h"
 #include "migration/vmstate.h"
-#include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
 #include "qapi/visitor.h"
-- 
2.17.2

Re: QAPI schema for desired state of LUKS keyslots (was: [PATCH 02/13] qcrypto-luks: implement encryption key management)

2020-03-05 Thread Maxim Levitsky

On Tue, 2020-03-03 at 11:18 +0200, Maxim Levitsky wrote:
> On Sat, 2020-02-15 at 15:51 +0100, Markus Armbruster wrote:
> > Review of this patch led to a lengthy QAPI schema design discussion.
> > Let me try to condense it into a concrete proposal.
> > 
> > This is about the QAPI schema, and therefore about QMP.  The
> > human-friendly interface is out of scope.  Not because it's not
> > important (it clearly is!), only because we need to *focus* to have a
> > chance at success.
> > 
> > I'm going to include a few design options.  I'll mark them "Option:".
> > 
> > The proposed "amend" interface takes a specification of desired state,
> > and figures out how to get from here to there by itself.  LUKS keyslots
> > are one part of desired state.
> > 
> > We commonly have eight LUKS keyslots.  Each keyslot is either active or
> > inactive.  An active keyslot holds a secret.
> > 
> > Goal: a QAPI type for specifying desired state of LUKS keyslots.
> > 
> > Proposal:
> > 
> > { 'enum': 'LUKSKeyslotState',
> >   'data': [ 'active', 'inactive' ] }
> > 
> > { 'struct': 'LUKSKeyslotActive',
> >   'data': { 'secret': 'str',
> > '*iter-time': 'int } }
> > 
> > { 'struct': 'LUKSKeyslotInactive',
> >   'data': { '*old-secret': 'str' } }
> > 
> > { 'union': 'LUKSKeyslotAmend',
> >   'base': { '*keyslot': 'int',
> > 'state': 'LUKSKeyslotState' }
> >   'discriminator': 'state',
> >   'data': { 'active': 'LUKSKeyslotActive',
> > 'inactive': 'LUKSKeyslotInactive' } }
> > 
> > LUKSKeyslotAmend specifies desired state for a set of keyslots.
> > 
> > Four cases:
> > 
> > * @state is "active"
> > 
> >   Desired state is active holding the secret given by @secret.  Optional
> >   @iter-time tweaks key stretching.
> > 
> >   The keyslot is chosen either by the user or by the system, as follows:
> > 
> >   - @keyslot absent
> > 
> > One inactive keyslot chosen by the system.  If none exists, error.
> > 
> >   - @keyslot present
> > 
> > The keyslot given by @keyslot.
> > 
> > If it's already active holding @secret, no-op.  Rationale: the
> > current state is the desired state.
> > 
> > If it's already active holding another secret, error.  Rationale:
> > update in place is unsafe.
> > 
> > Option: delete the "already active holding @secret" case.  Feels
> > inelegant to me.  Okay if it makes things substantially simpler.
> > 
> > * @state is "inactive"
> > 
> >   Desired state is inactive.
> > 
> >   Error if the current state has active keyslots, but the desired state
> >   has none.
> > 
> >   The user choses the keyslot by number and/or by the secret it holds,
> >   as follows:
> > 
> >   - @keyslot absent, @old-secret present
> > 
> > All active keyslots holding @old-secret.  If none exists, error.
> > 
> >   - @keyslot present, @old-secret absent
> > 
> > The keyslot given by @keyslot.
> > 
> > If it's already inactive, no-op.  Rationale: the current state is
> > the desired state.
> > 
> >   - both @keyslot and @old-secret present
> > 
> > The keyslot given by keyslot.
> > 
> > If it's inactive or holds a secret other than @old-secret, error.
> > 
> > Option: error regardless of @old-secret, if that makes things
> > simpler.
> > 
> >   - neither @keyslot not @old-secret present
> > 
> > All keyslots.  Note that this will error out due to "desired state
> > has no active keyslots" unless the current state has none, either.
> > 
> > Option: error out unconditionally.
> > 
> > Note that LUKSKeyslotAmend can specify only one desired state for
> > commonly just one keyslot.  Rationale: this satisfies practical needs.
> > An array of LUKSKeyslotAmend could specify desired state for all
> > keyslots.  However, multiple array elements could then apply to the same
> > slot.  We'd have to specify how to resolve such conflicts, and

Re: [PATCH v4 07/11] monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c hmp_snapshot_blkdev is from GPLv2 version of the hmp-cmds.c thus have to change the licence to GPLv2

2020-03-04 Thread Maxim Levitsky

On Tue, 2020-03-03 at 18:15 +0100, Kevin Wolf wrote:
> Am 30.01.2020 um 13:34 hat Maxim Levitsky geschrieben:
> > Signed-off-by: Maxim Levitsky 
> > Reviewed-by: Dr. David Alan Gilbert 
> 
> Very long subject line. I suppose the license notice should be in the
> body instead.
> 
> >  block/monitor/block-hmp-cmds.c | 56 --
> >  include/block/block-hmp-cmds.h |  4 +++
> >  include/monitor/hmp.h  |  3 --
> >  monitor/hmp-cmds.c | 47 
> >  4 files changed, 58 insertions(+), 52 deletions(-)
> > 
> > diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
> > index 8e8288c2f1..b83687196f 100644
> > --- a/block/monitor/block-hmp-cmds.c
> > +++ b/block/monitor/block-hmp-cmds.c
> > @@ -1,10 +1,13 @@
> >  /*
> >   * Blockdev HMP commands
> >   *
> > + *  Authors:
> > + *  Anthony Liguori   
> > + *
> >   * Copyright (c) 2003-2008 Fabrice Bellard
> >   *
> > - * This work is licensed under the terms of the GNU GPL, version 2 or
> > - * later.  See the COPYING file in the top-level directory.
> > + * This work is licensed under the terms of the GNU GPL, version 2.
> > + * See the COPYING file in the top-level directory.
> 
> Please also copy the next paragraph of the license header:
> 
>  * Contributions after 2012-01-13 are licensed under the terms of the
>  * GNU GPL, version 2 or (at your option) any later version.
> 
> Kevin
Will do,
Best regards,
Maxim Levitsky

Re: [PATCH v4 02/11] monitor/hmp: uninline add_init_drive

2020-03-04 Thread Maxim Levitsky

On Tue, 2020-03-03 at 18:10 +0100, Kevin Wolf wrote:
> Am 30.01.2020 um 13:34 hat Maxim Levitsky geschrieben:
> > This is only used by hmp_drive_add.
> > The code is just a bit shorter this way.
> > 
> > No functional changes
> > 
> > Signed-off-by: Maxim Levitsky 
> > Reviewed-by: Markus Armbruster 
> 
> Shouldn't the subject say "inline" rather than "uninline"?
> 
> Kevin

Oh, you are absolutely correct. I don't know why I even now thought
about this that way.

Best regards,
Maxim Levitsky

Re: QAPI schema for desired state of LUKS keyslots (was: [PATCH 02/13] qcrypto-luks: implement encryption key management)

2020-03-03 Thread Maxim Levitsky

slot:
> 
>   { "state": "inactive", "keyslot": 0 }
> 
>   Possibly less dangerous:
> 
>   { "state": "inactive", "keyslot": 0, "old-secret": "CIA/GRU/MI6" }
> 
> Option: Make use of Max's patches to support optional union tag with
> default value to let us default @state to "active".  I doubt this makes
> much of a difference in QMP.  A human-friendly interface should probably
> be higher level anyway (Daniel pointed to cryptsetup).
> 
> Option: LUKSKeyslotInactive member @old-secret could also be named
> @secret.  I don't care.
> 
> Option: delete @keyslot.  It provides low-level slot access.
> Complicates the interface.  Fine if we need lov-level slot access.  Do
> we?
> 
> I apologize for the time it has taken me to write this.
> 
> Comments?

I tried today to implement this but I hit a very unpleasant roadblock:

Since QCrypto is generic (even though it only implements currently luks for 
raw/qcow2 usage,
and legacy qcow2 aes encryption), I still can't assume that this is always the 
case.
Thus I implemented the Qcrypto amend API in this way:

##
# @QCryptoBlockAmendOptions:
#
# The options that are available for all encryption formats
# when amending encryption settings
#
# Since: 5.0
##
{ 'union': 'QCryptoBlockAmendOptions',
  'base': 'QCryptoBlockOptionsBase',
  'discriminator': 'format',
  'data': {
  'luks': 'QCryptoBlockAmendOptionsLUKS' } }

However the QCryptoBlockAmendOptionsLUKS is a union too to be in line with the 
API proposal,
but that is not supported on QAPI level and after I and Markus talked about we 
are not sure
that it is worth it to implement this support only for this case.

So far I see the following solutions


1. Drop the QCryptoBlockAmendOptionsLUKS union for now.
This will bring the schema pretty much to be the same as my original proposal,
however the API will be the same thus once nested unions are implemented this 
union
can always be introduced again.

2. Drop the QCryptoBlockAmendOptions union. Strictly speaking this union is not 
needed
since it only has one member anyway, however this union is used both by qcow2 
QAPI scheme,
so that it doesn't hardcode an encryption format for amend just like it doesn't 
for creation,
(this can be hardcoded for now as well for now as long as we don't have more 
amendable encryption formats).
However I also use the QCryptoBlockAmendOptions in C code in QCrypto API thus 
it will be ugly to use the 
QCryptoBlockAmendOptionsLUKS instead.


3. Make QCryptoBlockAmendOptionsLUKS a struct and add to it a nested member 
with new union type 
(say QCryptoBlockAmendOptionsLUKS1) which will be exactly as 
QCryptoBlockAmendOptionsLUKS was.

This IMHO is even uglier since it changes the API (which we can't later fix) 
and adds both a dummy struct
field and a dummy struct name.

I personally vote 1.

Best regards,
Maxim Levitsky

Re: QAPI schema for desired state of LUKS keyslots

2020-02-26 Thread Maxim Levitsky

antics are just the most regular I could find.  We can
> > > therefore resolve the conflict by picking "active, both absent":
> > > 
> > >   keyslot old-secret  slot(s) selected
> > >   absent  absent  one inactive slot if exist, else error
> > >   present absent  the slot given by @keyslot
> > >   absent  present all active slots holding @old-secret
> > >   present present the slot given by @keyslot, error unless
> > >   it's active holding @old-secret
> > > 
> > > Changes:
> > > 
> > > * inactive, both absent: changed; we select "one inactive slot" instead of
> > >   "all slots".
> > > 
> > >   "All slots" is a no-op when the current state has no active keyslots,
> > >   else error.
> > > 
> > >   "One inactive slot" is a no-op when the current state has one, else
> > >   error.  Thus, we no-op rather than error in some states.
> > > 
> > > * active, keyslot absent or present, old-secret present: new; selects
> > >   active slot(s) holding @old-secret, no-op when old-secret == secret,
> > >   else error (no in place update)
> > > 
> > > Can do.  It's differently irregular, and has a few more combinations
> > > that are basically useless, which I find unappealing.  Matter of taste,
> > > I guess.
> > > 
> > > Anyone got strong feelings here?
> > 
> > The only strong feeling I have is that I absolutely don’t have a strong
> > feeling about this. :)
> > 
> > As such, I think we should just treat my rambling as such and stick to
> > your proposal, since we’ve already gathered support for it.
> 
> Thanks!

So in summary, do I have the green light to implement the Markus's proposal as 
is?

Best regards,
Maxim Levitsky

Re: QAPI schema for desired state of LUKS keyslots (was: [PATCH 02/13] qcrypto-luks: implement encryption key management)

2020-02-24 Thread Maxim Levitsky

On Mon, 2020-02-24 at 14:46 +, Daniel P. Berrangé wrote:
> On Mon, Feb 17, 2020 at 01:07:23PM +0200, Maxim Levitsky wrote:
> > On Mon, 2020-02-17 at 11:37 +0100, Kevin Wolf wrote:
> > > Am 15.02.2020 um 15:51 hat Markus Armbruster geschrieben:
> > > > Review of this patch led to a lengthy QAPI schema design discussion.
> > > > Let me try to condense it into a concrete proposal.
> > > > 
> > > > This is about the QAPI schema, and therefore about QMP.  The
> > > > human-friendly interface is out of scope.  Not because it's not
> > > > important (it clearly is!), only because we need to *focus* to have a
> > > > chance at success.
> > > > 
> > > > I'm going to include a few design options.  I'll mark them "Option:".
> > > > 
> > > > The proposed "amend" interface takes a specification of desired state,
> > > > and figures out how to get from here to there by itself.  LUKS keyslots
> > > > are one part of desired state.
> > > > 
> > > > We commonly have eight LUKS keyslots.  Each keyslot is either active or
> > > > inactive.  An active keyslot holds a secret.
> > > > 
> > > > Goal: a QAPI type for specifying desired state of LUKS keyslots.
> > > > 
> > > > Proposal:
> > > > 
> > > > { 'enum': 'LUKSKeyslotState',
> > > >   'data': [ 'active', 'inactive' ] }
> > > > 
> > > > { 'struct': 'LUKSKeyslotActive',
> > > >   'data': { 'secret': 'str',
> > > > '*iter-time': 'int } }
> > > > 
> > > > { 'struct': 'LUKSKeyslotInactive',
> > > >   'data': { '*old-secret': 'str' } }
> > > > 
> > > > { 'union': 'LUKSKeyslotAmend',
> > > >   'base': { '*keyslot': 'int',
> > > > 'state': 'LUKSKeyslotState' }
> > > >   'discriminator': 'state',
> > > >   'data': { 'active': 'LUKSKeyslotActive',
> > > > 'inactive': 'LUKSKeyslotInactive' } }
> > > > 
> > > > LUKSKeyslotAmend specifies desired state for a set of keyslots.
> > > 
> > > Though not arbitrary sets of keyslots, it's only a single keyslot or
> > > multiple keyslots containing the same secret. Might be good enough in
> > > practice, though it means that you may have to issue multiple amend
> > > commands to get to the final state that you really want (even if doing
> > > everything at once would be safe).
> > > 
> > > > Four cases:
> > > > 
> > > > * @state is "active"
> > > > 
> > > >   Desired state is active holding the secret given by @secret.  Optional
> > > >   @iter-time tweaks key stretching.
> > > > 
> > > >   The keyslot is chosen either by the user or by the system, as follows:
> > > > 
> > > >   - @keyslot absent
> > > > 
> > > > One inactive keyslot chosen by the system.  If none exists, error.
> > > > 
> > > >   - @keyslot present
> > > > 
> > > > The keyslot given by @keyslot.
> > > > 
> > > > If it's already active holding @secret, no-op.  Rationale: the
> > > > current state is the desired state.
> > > > 
> > > > If it's already active holding another secret, error.  Rationale:
> > > > update in place is unsafe.
> > > > 
> > > > Option: delete the "already active holding @secret" case.  Feels
> > > > inelegant to me.  Okay if it makes things substantially simpler.
> > > > 
> > > > * @state is "inactive"
> > > > 
> > > >   Desired state is inactive.
> > > > 
> > > >   Error if the current state has active keyslots, but the desired state
> > > >   has none.
> > > > 
> > > >   The user choses the keyslot by number and/or by the secret it holds,
> > > >   as follows:
> > > > 
> > > >   - @keyslot absent, @old-secret present
> > > > 
> > > > All act

Re: [PATCH v4 00/11] RFC: [for 5.0]: HMP monitor handlers refactoring

2020-02-20 Thread Maxim Levitsky

On Fri, 2020-02-07 at 18:28 +, Dr. David Alan Gilbert wrote:
> * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > On Mon, 2020-02-03 at 19:57 +, Dr. David Alan Gilbert wrote:
> > > * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > > > This patch series is bunch of cleanups to the hmp monitor code.
> > > > It mostly moves the blockdev related hmp handlers to its own file,
> > > > and does some minor refactoring.
> > > > 
> > > > No functional changes expected.
> > > 
> > > You've still got the title marked as RFC - are you actually ready for
> > > this log?
> > 
> > I forgot to update this to be honest, I don't consider this an RFC,
> > especially since I dropped for now the patches that might cause
> > issues. This is now just a nice refactoring.
> 
> OK, so if we can get some block people to say they're happy, then
> I'd be happy to take this through HMP or they can take it through block.


Any update?

Best regards,
Maxim Levitsky

> 
> Dave
> 
> > Best regards,
> > Maxim Levitsky
> > 
> > > 
> > > Dave
> > > 
> > > > 
> > > > Changes from V1:
> > > >* move the handlers to block/monitor/block-hmp-cmds.c
> > > >* tiny cleanup for the commit messages
> > > > 
> > > > Changes from V2:
> > > >* Moved all the function prototypes to new header 
> > > > (blockdev-hmp-cmds.h)
> > > >* Set the license of blockdev-hmp-cmds.c to GPLv2+
> > > >* Moved hmp_snapshot_* functions to blockdev-hmp-cmds.c
> > > >* Moved hmp_drive_add_node to blockdev-hmp-cmds.c
> > > >  (this change needed some new exports, thus in separate new patch)
> > > >* Moved hmp_qemu_io and hmp_eject to blockdev-hmp-cmds.c
> > > >* Added 'error:' prefix to vreport, and updated the iotests
> > > >  This is invasive change, but really feels like the right one
> > > >* Added minor refactoring patch that drops an unused #include
> > > > 
> > > > Changes from V3:
> > > >* Dropped the error prefix patches for now due to fact that it seems
> > > >  that libvirt doesn't need that after all. Oh well...
> > > >  I'll send them in a separate series.
> > > > 
> > > >* Hopefully correctly merged the copyright info the new files
> > > >  Both files are GPLv2 now (due to code from hmp.h/hmp-cmds.c)
> > > > 
> > > >* Addressed review feedback
> > > >* Renamed the added header to block-hmp-cmds.h
> > > > 
> > > >* Got rid of checkpatch.pl warnings in the moved code
> > > >  (cosmetic code changes only)
> > > > 
> > > >* I kept the reviewed-by tags, since the changes I did are minor.
> > > >  I hope that this is right thing to do.
> > > > 
> > > > Best regards,
> > > > Maxim Levitsky
> > > > 
> > > > Maxim Levitsky (11):
> > > >   usb/dev-storage: remove unused include
> > > >   monitor/hmp: uninline add_init_drive
> > > >   monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to
> > > > block-hmp-cmds.c Moved code was added after 2012-01-13, thus under
> > > > GPLv2+
> > > >   monitor/hmp: move hmp_block_job* to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c
> > > > hmp_snapshot_blkdev is from GPLv2 version of the hmp-cmds.c thus
> > > > have to change the licence to GPLv2
> > > >   monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c
> > > >   monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_info_block* to block-hmp-cmds.c
> > > >   monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c
> > > > 
> > > >  MAINTAINERS|1 +
> > > >  Makefile.objs  |2 +-
> > > >  block/Makefile.objs|1 +
> > > >  block/monitor/Makefile.objs|1 +
> > > >  block/monitor/block-hmp-cmds.c | 1002 
> > > >  blockdev.c |  137 +
> > > >  device-hotplug.c   |   91 ---
> > > >  hw/usb/dev-storage.c   |1 -
> > > >  include/block/block-hmp-cmds.h |   54 ++
> > > >  include/block/block_int.h  |5 +-
> > > >  include/monitor/hmp.h  |   24 -
> > > >  include/sysemu/blockdev.h  |4 -
> > > >  include/sysemu/sysemu.h|3 -
> > > >  monitor/hmp-cmds.c |  769 
> > > >  monitor/misc.c |1 +
> > > >  15 files changed, 1072 insertions(+), 1024 deletions(-)
> > > >  create mode 100644 block/monitor/Makefile.objs
> > > >  create mode 100644 block/monitor/block-hmp-cmds.c
> > > >  delete mode 100644 device-hotplug.c
> > > >  create mode 100644 include/block/block-hmp-cmds.h
> > > > 
> > > > -- 
> > > > 2.17.2
> > > > 
> > > 
> > > --
> > > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> > 
> > 
> 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v2 0/5] block: Generic file creation fallback

2020-02-19 Thread Maxim Levitsky

On Wed, 2020-02-19 at 11:38 +0100, Max Reitz wrote:
> On 22.01.20 17:45, Max Reitz wrote:
> > Hi,
> > 
> > As version 1, this series adds a fallback path for creating files (on
> > the protocol layer) if the protocol driver does not support file
> > creation, but the file already exists.
> > 
> > 
> > Branch: https://github.com/XanClic/qemu.git skip-proto-create-v2
> > Branch: https://git.xanclic.moe/XanClic/qemu.git skip-proto-create-v2
> > 
> > 
> > v2:
> > - Drop blk_truncate_for_formatting(): It doesn’t make sense to introduce
> >   this function any more after 26536c7fc25917d1bd13781f81fe3ab039643bff
> >   (“block: Do not truncate file node when formatting”), because we’d
> >   only use it in bdrv_create_file_fallback().
> >   Thus, it makes more sense to create special helper functions
> >   specifically for bdrv_create_file_fallback().
> > 
> > - Thus, dropped patches 2 and 3.
> > 
> > - And changed patch 4 to include those helper functions.
> > 
> > - Rebased, which was a bit of a pain.
> 
> Thanks for the reviews, added a note to the new test why the second case
> is expected to fail (as requested by Maxim), and applied the series to
> my block branch:
> 
> https://git.xanclic.moe/XanClic/qemu/commits/branch/block
> 
> Max
> 
Thank you too!
Best regards,
Maxim Levitsky

Re: QAPI schema for desired state of LUKS keyslots (was: [PATCH 02/13] qcrypto-luks: implement encryption key management)

2020-02-17 Thread Maxim Levitsky

ired state for
> > commonly just one keyslot.  Rationale: this satisfies practical needs.
> > An array of LUKSKeyslotAmend could specify desired state for all
> > keyslots.  However, multiple array elements could then apply to the same
> > slot.  We'd have to specify how to resolve such conflicts, and we'd have
> > to code up conflict detection.  Not worth it.
> > 
> > Examples:
> > 
> > * Add a secret to some free keyslot:
> > 
> >   { "state": "active", "secret": "CIA/GRU/MI6" }
> > 
> > * Deactivate all keyslots holding a secret:
> > 
> >   { "state": "inactive", "old-secret": "CIA/GRU/MI6" }
> > 
> > * Add a secret to a specific keyslot:
> > 
> >   { "state": "active", "secret": "CIA/GRU/MI6", "keyslot": 0 }
> > 
> > * Deactivate a specific keyslot:
> > 
> >   { "state": "inactive", "keyslot": 0 }
> > 
> >   Possibly less dangerous:
> > 
> >   { "state": "inactive", "keyslot": 0, "old-secret": "CIA/GRU/MI6" }
> > 
> > Option: Make use of Max's patches to support optional union tag with
> > default value to let us default @state to "active".  I doubt this makes
> > much of a difference in QMP.  A human-friendly interface should probably
> > be higher level anyway (Daniel pointed to cryptsetup).
> > 
> > Option: LUKSKeyslotInactive member @old-secret could also be named
> > @secret.  I don't care.
> > 
> > Option: delete @keyslot.  It provides low-level slot access.
> > Complicates the interface.  Fine if we need lov-level slot access.  Do
> > we?
> > 
> > I apologize for the time it has taken me to write this.
> > 
> > Comments?
> 
> Works for me (without taking any of the options).
> 
> The unclear part is what the human-friendly interface should look like
> and where it should live. I'm afraid doing only the QMP part and calling
> the feature completed like we do so often won't work in this case.

IMHO, I think that the best way to create human friendly part is to implement
luks specific commands for qemu-img and use interface very similar
to what cryptsetup does.

Best regards,
Maxim Levitsky
> 
> Kevin

Re: QAPI schema for desired state of LUKS keyslots

2020-02-17 Thread Maxim Levitsky

On Mon, 2020-02-17 at 07:45 +0100, Markus Armbruster wrote:
> Maxim Levitsky  writes:
> 
> > On Sat, 2020-02-15 at 15:51 +0100, Markus Armbruster wrote:
> > > Review of this patch led to a lengthy QAPI schema design discussion.
> > > Let me try to condense it into a concrete proposal.
> > > 
> > > This is about the QAPI schema, and therefore about QMP.  The
> > > human-friendly interface is out of scope.  Not because it's not
> > > important (it clearly is!), only because we need to *focus* to have a
> > > chance at success.
> > 
> > 100% agree.
> > > 
> > > I'm going to include a few design options.  I'll mark them "Option:".
> > > 
> > > The proposed "amend" interface takes a specification of desired state,
> > > and figures out how to get from here to there by itself.  LUKS keyslots
> > > are one part of desired state.
> > > 
> > > We commonly have eight LUKS keyslots.  Each keyslot is either active or
> > > inactive.  An active keyslot holds a secret.
> > > 
> > > Goal: a QAPI type for specifying desired state of LUKS keyslots.
> > > 
> > > Proposal:
> > > 
> > > { 'enum': 'LUKSKeyslotState',
> > >   'data': [ 'active', 'inactive' ] }
> > > 
> > > { 'struct': 'LUKSKeyslotActive',
> > >   'data': { 'secret': 'str',
> > > '*iter-time': 'int } }
> > > 
> > > { 'struct': 'LUKSKeyslotInactive',
> > >   'data': { '*old-secret': 'str' } }
> > > 
> > > { 'union': 'LUKSKeyslotAmend',
> > >   'base': { '*keyslot': 'int',
> > > 'state': 'LUKSKeyslotState' }
> > >   'discriminator': 'state',
> > >   'data': { 'active': 'LUKSKeyslotActive',
> > > 'inactive': 'LUKSKeyslotInactive' } }
> > > 
> > > LUKSKeyslotAmend specifies desired state for a set of keyslots.
> > > 
> > > Four cases:
> > > 
> > > * @state is "active"
> > > 
> > >   Desired state is active holding the secret given by @secret.  Optional
> > >   @iter-time tweaks key stretching.
> > > 
> > >   The keyslot is chosen either by the user or by the system, as follows:
> > > 
> > >   - @keyslot absent
> > > 
> > > One inactive keyslot chosen by the system.  If none exists, error.
> > > 
> > >   - @keyslot present
> > > 
> > > The keyslot given by @keyslot.
> > > 
> > > If it's already active holding @secret, no-op.  Rationale: the
> > > current state is the desired state.
> > > 
> > > If it's already active holding another secret, error.  Rationale:
> > > update in place is unsafe.
> > > 
> > > Option: delete the "already active holding @secret" case.  Feels
> > > inelegant to me.  Okay if it makes things substantially simpler.
> > 
> > I didn't really understand this, since in state=active we shouldn't
> > delete anything. Looks OK otherwise.
> 
> Let me try to clarify.
> 
> Option: make the "already active holding @secret" case an error like the
> "already active holding another secret" case.  In longhand:
> 
>  - @keyslot present
> 
>The keyslot given by @keyslot.
> 
>If it's already active, error.
> 
> It feels inelegant to me, because it deviates from "specify desired
> state" paradigm: the specified desired state is fine, the way to get
> there from current state is obvious (do nothing), yet it's still an
> error.
Yep, although in theory we also specify that iteration count, which might not
match (and it will never exactly match since it is benchmark based), thus
if user specified it, we might err out, and otherwise indeed ignore this.
This is of course very minor issue.

> 
> > > * @state is "inactive"
> > > 
> > >   Desired state is inactive.
> > > 
> > >   Error if the current state has active keyslots, but the desired state
> > >   has none.
> > > 
> > >   The user choses the keyslot by number and/or by the secret it holds,
> > >   as follo

Re: QAPI schema for desired state of LUKS keyslots (was: [PATCH 02/13] qcrypto-luks: implement encryption key management)

2020-02-16 Thread Maxim Levitsky

CIA/GRU/MI6" }
> 
> * Add a secret to a specific keyslot:
> 
>   { "state": "active", "secret": "CIA/GRU/MI6", "keyslot": 0 }
> 
> * Deactivate a specific keyslot:
> 
>   { "state": "inactive", "keyslot": 0 }
> 
>   Possibly less dangerous:
> 
>   { "state": "inactive", "keyslot": 0, "old-secret": "CIA/GRU/MI6" }
> 
> Option: Make use of Max's patches to support optional union tag with
> default value to let us default @state to "active".  I doubt this makes
> much of a difference in QMP.  A human-friendly interface should probably
> be higher level anyway (Daniel pointed to cryptsetup).
Also agree.
> 
> Option: LUKSKeyslotInactive member @old-secret could also be named
> @secret.  I don't care.
I prefer old-secret.
> 
> Option: delete @keyslot.  It provides low-level slot access.
> Complicates the interface.  Fine if we need lov-level slot access.  Do
> we?
I don't have strong opinion on that. I'll probably would like to keep
this for tests/debugging/etc.

> 
> I apologize for the time it has taken me to write this.
Thank you very much for doing this.

> 
> Comments?

Looks good to me.

Best regards,
Maxim Levitsky

Re: [PATCH] nbd-client: Support leading / in NBD URI

2020-02-12 Thread Maxim Levitsky

On Wed, 2020-02-12 at 14:33 +0100, Ján Tomko wrote:
> On Tue, Feb 11, 2020 at 08:31:01PM -0600, Eric Blake wrote:
> > The NBD URI specification [1] states that only one leading slash at
> > the beginning of the URI path component is stripped, not all such
> > slashes.  This becomes important to a patch I just proposed to nbdkit
> > [2], which would allow the exportname to select a file embedded within
> > an ext2 image: ext2fs demands an absolute pathname beginning with '/',
> > and because qemu was inadvertantly stripping it, my nbdkit patch had
> > to work around the behavior.
> > 
> > [1] https://github.com/NetworkBlockDevice/nbd/blob/master/doc/uri.md
> > [2] https://www.redhat.com/archives/libguestfs/2020-February/msg00109.html
> > 
> > Note that the qemu bug only affects handling of URIs such as
> > nbd://host:port//abs/path (where '/abs/path' should be the export
> > name); it is still possible to use --image-opts and pass the desired
> > export name with a leading slash directly through JSON even without
> > this patch.
> > 
> > Signed-off-by: Eric Blake 
> > ---
> > block/nbd.c | 6 --
> > 1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> 
> Reviewed-by: Ján Tomko 
> 
> Jano
Note that I had a bug open for this.
https://bugzilla.redhat.com/show_bug.cgi?id=1728545

I expected this to be a feature to be honest,
and was afraid to break existing users that might rely on this.

Best regards,
Maxim Levitsky

Re: [PATCH v5 01/26] nvme: rename trace events to nvme_dev

2020-02-12 Thread Maxim Levitsky

On Wed, 2020-02-12 at 14:08 +0100, Klaus Birkelund Jensen wrote:
> On Feb 12 11:08, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Change the prefix of all nvme device related trace events to 'nvme_dev'
> > > to not clash with trace events from the nvme block driver.
> > > 
> 
> Hi Maxim,
> 
> Thank you very much for your thorough reviews! Utterly appreciated!

Thanks to you for the patch series!

> 
> I'll start going through your suggested changes. There is a bit of work
> to do on splitting patches into refactoring and bugfixes, but I can
> definitely see the reason for this, so I'll get to work.
> 
> You mention the alignment with split lines alot. I actually thought I
> was following CODING_STYLE.rst (which allows a single 4 space indent for
> functions, but not statements such as if/else and while/for). But since
> hw/block/nvme.c is originally written in the style of aligning with the
> opening paranthesis I'm in the wrong here, so I will of course amend
> it. Should have done that from the beginning, it's just my personal
> taste shining through ;)

TO be honest this is my personal taste as well, but after *many* review
complains about this I consider that aligning on opening paranthesis 
is kind of an official style.

If others are OK with this though, I am personally 100% fine with leaving the
split lines as is.


Best regards,
Maxim Levitsky

Re: [PATCH v5 26/26] nvme: make lba data size configurable

2020-02-12 Thread Maxim Levitsky

On Thu, 2020-02-06 at 08:24 +0100, Klaus Birkelund Jensen wrote:
> On Feb  5 01:43, Keith Busch wrote:
> > On Tue, Feb 04, 2020 at 10:52:08AM +0100, Klaus Jensen wrote:
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme-ns.c | 2 +-
> > >  hw/block/nvme-ns.h | 4 +++-
> > >  hw/block/nvme.c| 1 +
> > >  3 files changed, 5 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> > > index 0e5be44486f4..981d7101b8f2 100644
> > > --- a/hw/block/nvme-ns.c
> > > +++ b/hw/block/nvme-ns.c
> > > @@ -18,7 +18,7 @@ static int nvme_ns_init(NvmeNamespace *ns)
> > >  {
> > >  NvmeIdNs *id_ns = &ns->id_ns;
> > >  
> > > -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > > +id_ns->lbaf[0].ds = ns->params.lbads;
> > >  id_ns->nuse = id_ns->ncap = id_ns->nsze =
> > >  cpu_to_le64(nvme_ns_nlbas(ns));
> > >  
> > > diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
> > > index b564bac25f6d..f1fe4db78b41 100644
> > > --- a/hw/block/nvme-ns.h
> > > +++ b/hw/block/nvme-ns.h
> > > @@ -7,10 +7,12 @@
> > >  
> > >  #define DEFINE_NVME_NS_PROPERTIES(_state, _props) \
> > >  DEFINE_PROP_DRIVE("drive", _state, blk), \
> > > -DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0)
> > > +DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0), \
> > > +DEFINE_PROP_UINT8("lbads", _state, _props.lbads, BDRV_SECTOR_BITS)
> > 
> > I think we need to validate the parameter is between 9 and 12 before
> > trusting it can be used safely.
> > 
> > Alternatively, add supported formats to the lbaf array and let the host
> > decide on a live system with the 'format' command.
> 
> The device does not yet support Format NVM, but we have a patch ready
> for that to be submitted with a new series when this is merged.
> 
> For now, while it does not support Format, I will change this patch such
> that it defaults to 9 (BRDV_SECTOR_BITS) and only accept 12 as an
> alternative (while always keeping the number of formats available to 1).
Looks like a good idea.

Best regards,
Maxim Levitsky

Re: [PATCH v5 25/26] nvme: remove redundant NvmeCmd pointer parameter

2020-02-12 Thread Maxim Levitsky

t16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
>  {
> -switch (cmd->opcode) {
> +    switch (req->cmd.opcode) {
>      case NVME_ADM_CMD_DELETE_SQ:
> -return nvme_del_sq(n, cmd);
> +return nvme_del_sq(n, req);
>  case NVME_ADM_CMD_CREATE_SQ:
> -return nvme_create_sq(n, cmd);
> +return nvme_create_sq(n, req);
>  case NVME_ADM_CMD_GET_LOG_PAGE:
> -return nvme_get_log(n, cmd, req);
> +return nvme_get_log(n, req);
>  case NVME_ADM_CMD_DELETE_CQ:
> -return nvme_del_cq(n, cmd);
> +return nvme_del_cq(n, req);
>  case NVME_ADM_CMD_CREATE_CQ:
> -return nvme_create_cq(n, cmd);
> +return nvme_create_cq(n, req);
>  case NVME_ADM_CMD_IDENTIFY:
> -return nvme_identify(n, cmd, req);
> +return nvme_identify(n, req);
>  case NVME_ADM_CMD_ABORT:
> -return nvme_abort(n, cmd, req);
> +return nvme_abort(n, req);
>  case NVME_ADM_CMD_SET_FEATURES:
> -return nvme_set_feature(n, cmd, req);
> +return nvme_set_feature(n, req);
>  case NVME_ADM_CMD_GET_FEATURES:
> -return nvme_get_feature(n, cmd, req);
> +return nvme_get_feature(n, req);
>  case NVME_ADM_CMD_ASYNC_EV_REQ:
> -return nvme_aer(n, cmd, req);
> +return nvme_aer(n, req);
>  default:
> -trace_nvme_dev_err_invalid_admin_opc(cmd->opcode);
> +trace_nvme_dev_err_invalid_admin_opc(req->cmd.opcode);
>  return NVME_INVALID_OPCODE | NVME_DNR;
>  }
>  }
> @@ -1919,8 +1917,8 @@ static void nvme_process_sq(void *opaque)
>  req->cqe.cid = cmd.cid;
>  memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
>  
> -status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
> -nvme_admin_cmd(n, &cmd, req);
> +status = sq->sqid ? nvme_io_cmd(n, req) :
> +nvme_admin_cmd(n, req);
>  if (status != NVME_NO_COMPLETE) {
>  req->status = status;
>  nvme_enqueue_req_completion(cq, req);

Other that line wrapping issues,

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 24/26] nvme: change controller pci id

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> There are two reasons for changing this:
> 
>   1. The nvme device currently uses an internal Intel device id.
> 
>   2. Since commits "nvme: fix write zeroes offset and count" and "nvme:
>  support multiple namespaces" the controller device no longer has
>  the quirks that the Linux kernel think it has.
> 
>  As the quirks are applied based on pci vendor and device id, change
>  them to get rid of the quirks.
> 
> To keep backward compatibility, add a new 'x-use-intel-id' parameter to
> the nvme device to force use of the Intel vendor and device id. This is
> off by default but add a compat property to set this for machines 4.2
> and older.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 13 +
>  hw/block/nvme.h   |  4 +++-
>  hw/core/machine.c |  1 +
>  3 files changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3a377bc56734..bdef53a590b0 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -2467,8 +2467,15 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  
>  pci_conf[PCI_INTERRUPT_PIN] = 1;
>  pci_config_set_prog_interface(pci_conf, 0x2);
> -pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> -pci_config_set_device_id(pci_conf, 0x5845);
> +
> +if (n->params.use_intel_id) {
> +pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> +pci_config_set_device_id(pci_conf, 0x5846);
> +} else {
> +pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
> +pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
> +}
> +
>  pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
>  pcie_endpoint_cap_init(pci_dev, 0x80);
>  
> @@ -2638,8 +2645,6 @@ static void nvme_class_init(ObjectClass *oc, void *data)
>  pc->realize = nvme_realize;
>  pc->exit = nvme_exit;
>  pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
> -pc->vendor_id = PCI_VENDOR_ID_INTEL;
> -pc->device_id = 0x5845;
>  pc->revision = 2;
>  
>  set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index c3cef0f024da..6b584f53ed64 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -12,7 +12,8 @@
>  DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
>  DEFINE_PROP_UINT8("aerl", _state, _props.aerl, 3), \
>  DEFINE_PROP_UINT32("aer_max_queued", _state, _props.aer_max_queued, 64), 
> \
> -DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7)
> +DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7), \
> +DEFINE_PROP_BOOL("x-use-intel-id", _state, _props.use_intel_id, false)
>  
>  typedef struct NvmeParams {
>  char *serial;
> @@ -21,6 +22,7 @@ typedef struct NvmeParams {
>  uint8_t  aerl;
>  uint32_t aer_max_queued;
>  uint8_t  mdts;
> +bool use_intel_id;
>  } NvmeParams;
>  
>  typedef struct NvmeAsyncEvent {
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 3e288bfceb7f..984412d98c9d 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -34,6 +34,7 @@ GlobalProperty hw_compat_4_2[] = {
>  { "vhost-blk-device", "seg_max_adjust", "off"},
>  { "usb-host", "suppress-remote-wake", "off" },
>  { "usb-redir", "suppress-remote-wake", "off" },
> +{ "nvme", "x-use-intel-id", "on"},
>  };
>  const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);
>  

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 23/26] pci: allocate pci id for nvme

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> The emulated nvme device (hw/block/nvme.c) is currently using an
> internal Intel device id.
> 
> Prepare to change that by allocating a device id under the 1b36 (Red
> Hat, Inc.) vendor id.

> 
> Signed-off-by: Klaus Jensen 
> ---
>  MAINTAINERS|  1 +
>  docs/specs/nvme.txt| 10 ++
>  docs/specs/pci-ids.txt |  1 +
>  include/hw/pci/pci.h   |  1 +
>  4 files changed, 13 insertions(+)
>  create mode 100644 docs/specs/nvme.txt
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 1f0bc72f2189..14a018e9c0ae 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1645,6 +1645,7 @@ L: qemu-bl...@nongnu.org
>  S: Supported
>  F: hw/block/nvme*
>  F: tests/qtest/nvme-test.c
> +F: docs/specs/nvme.txt
>  
>  megasas
>  M: Hannes Reinecke 
> diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
> new file mode 100644
> index ..6ec7ddbc7ee0
> --- /dev/null
> +++ b/docs/specs/nvme.txt
> @@ -0,0 +1,10 @@
> +NVM Express Controller
> +==
> +
> +The nvme device (-device nvme) emulates an NVM Express Controller.
> +
> +
> +Reference Specifications
> +
> +
> +  https://nvmexpress.org/resources/specifications/

Nitpick: maybe mention the nvme version here, plus some TODOs that are left?

> diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
> index 4d53e5c7d9d5..abbdbca6be38 100644
> --- a/docs/specs/pci-ids.txt
> +++ b/docs/specs/pci-ids.txt
> @@ -63,6 +63,7 @@ PCI devices (other than virtio):
>  1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
>  1b36:000d  PCI xhci usb host adapter
>  1b36:000f  mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
> +1b36:0010  PCIe NVMe device (-device nvme)
>  
>  All these devices are documented in docs/specs.
>  
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index b5013b834b20..9a20c309d0f2 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -103,6 +103,7 @@ extern bool pci_available;
>  #define PCI_DEVICE_ID_REDHAT_XHCI0x000d
>  #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
>  #define PCI_DEVICE_ID_REDHAT_MDPY0x000f
> +#define PCI_DEVICE_ID_REDHAT_NVME0x0010
>  #define PCI_DEVICE_ID_REDHAT_QXL 0x0100
>  
>  #define FMT_PCIBUS  PRIx64

Other than the actual ID assignment which is not something
I can approve/allocate:

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 22/26] nvme: support multiple namespaces

2020-02-12 Thread Maxim Levitsky

fconf;
>  NvmeParams   params;
> +NvmeBus  bus;
> +BlockConfconf;
>  
>  boolqs_created;
>  uint32_tpage_size;
> @@ -203,7 +194,6 @@ typedef struct NvmeCtrl {
>  uint32_treg_size;
>  uint32_tnum_namespaces;
>  uint32_tmax_q_ents;
> -uint64_tns_size;
>  uint8_t outstanding_aers;
>  uint32_tcmbsz;
>  uint32_tcmbloc;
> @@ -219,7 +209,8 @@ typedef struct NvmeCtrl {
>  QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
>  int aer_queued;
>  
> -NvmeNamespace   *namespaces;
> +NvmeNamespace   namespace;
> +NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
>  NvmeSQueue  **sq;
>  NvmeCQueue  **cq;
>  NvmeSQueue  admin_sq;
> @@ -228,9 +219,13 @@ typedef struct NvmeCtrl {
>  NvmeFeatureVal  features;
>  } NvmeCtrl;
>  
> -static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> +static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>  {
> -return n->ns_size >> nvme_ns_lbads(ns);
> +if (!nsid || nsid > n->num_namespaces) {
> +return NULL;
> +}
> +
> +return n->namespaces[nsid - 1];
>  }
>  
>  static inline uint16_t nvme_cid(NvmeRequest *req)
> @@ -253,4 +248,6 @@ static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
>  return req->sq->ctrl;
>  }
>  
> +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
> +
>  #endif /* HW_NVME_H */
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 81d69e15fc32..aaf1fcda7923 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -29,6 +29,7 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, 
> uint32_t secs, int t
>  
>  # nvme.c
>  # nvme traces for successful events
> +nvme_dev_register_namespace(uint32_t nsid) "nsid %"PRIu32""
>  nvme_dev_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
>  nvme_dev_irq_pin(void) "pulsing IRQ pin"
>  nvme_dev_irq_masked(void) "IRQ is masked"
> @@ -38,7 +39,7 @@ nvme_dev_map_sgl(uint16_t cid, uint8_t typ, uint32_t nlb, 
> uint64_t len) "cid %"P
>  nvme_dev_req_register_aio(uint16_t cid, void *aio, const char *blkname, 
> uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" 
> aio %p blk \"%s\" offset %"PRIu64" count
> %"PRIu64" opc \"%s\" req %p"
>  nvme_dev_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset 
> %"PRIu64" opc \"%s\" req %p"
>  nvme_dev_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) 
> "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
> -nvme_dev_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, 
> uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
> +nvme_dev_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, 
> uint64_t count, uint64_t lba) "cid %"PRIu16" %s nsid %"PRIu32" nlb %"PRIu32" 
> count %"PRIu64" lba 0x%"PRIx64""
>  nvme_dev_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
>  nvme_dev_write_zeros(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t 
> nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
>  nvme_dev_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t 
> qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", 
> sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16",
> qflags=%"PRIu16""
> @@ -94,7 +95,8 @@ nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP 
> list entry is null or no
>  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""
>  nvme_dev_err_invalid_prp2_missing(void) "PRP2 is null and more data to be 
> transferred"
>  nvme_dev_err_invalid_prp(void) "invalid PRP"
> -nvme_dev_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u 
> not within 1-%u"
> +nvme_dev_err_invalid_ns(uint32_t nsid, uint32_t nn) "nsid %"PRIu32" nn 
> %"PRIu32""
> +nvme_dev_err_inactive_ns(uint32_t nsid, uint32_t nn) "nsid %"PRIu32" nn 
> %"PRIu32""
>  nvme_dev_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
>  nvme_dev_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
>  nvme_dev_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) 
> "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""


Best regards,
Maxim Levitsky

Re: [PATCH v5 21/26] nvme: add support for scatter gather lists

2020-02-12 Thread Maxim Levitsky

dev_map_sgl(uint16_t cid, uint8_t typ, uint32_t nlb, uint64_t len) "cid 
> %"PRIu16" type 0x%"PRIx8" nlb %"PRIu32" len %"PRIu64""
>  nvme_dev_req_register_aio(uint16_t cid, void *aio, const char *blkname, 
> uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" 
> aio %p blk \"%s\" offset %"PRIu64" count
> %"PRIu64" opc \"%s\" req %p"
>  nvme_dev_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset 
> %"PRIu64" opc \"%s\" req %p"
>  nvme_dev_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) 
> "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
> @@ -85,6 +86,9 @@ nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid 
> %"PRIu16" ctrl %"PRIu16""
>  nvme_dev_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p 
> blk \"%s\" offset %"PRIu64" opc \"%s\" req %p
> status 0x%"PRIx16""
>  nvme_dev_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
>  nvme_dev_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
> +nvme_dev_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 
> 0x%"PRIx8""
> +nvme_dev_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 
> 0x%"PRIx8""
> +nvme_dev_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16""
>  nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null 
> or not page aligned: 0x%"PRIx64""
>  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index a873776d98b8..dbdeecf82358 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -205,15 +205,53 @@ enum NvmeCmbszMask {
>  #define NVME_CMBSZ_GETSIZE(cmbsz) \
>  (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz
>  
> +enum NvmeSglDescriptorType {
> +NVME_SGL_DESCR_TYPE_DATA_BLOCK   = 0x0,
> +NVME_SGL_DESCR_TYPE_BIT_BUCKET   = 0x1,
> +NVME_SGL_DESCR_TYPE_SEGMENT  = 0x2,
> +NVME_SGL_DESCR_TYPE_LAST_SEGMENT = 0x3,
> +NVME_SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4,
> +
> +NVME_SGL_DESCR_TYPE_VENDOR_SPECIFIC  = 0xf,
> +};
> +
> +enum NvmeSglDescriptorSubtype {
> +NVME_SGL_DESCR_SUBTYPE_ADDRESS = 0x0,
> +};
> +
> +typedef struct NvmeSglDescriptor {
> +uint64_t addr;
> +uint32_t len;
> +uint8_t  rsvd[3];
> +uint8_t  type;
> +} NvmeSglDescriptor;

I suggest you add a build time struct size check for this,
just in case compiler tries something funny.
(look at _nvme_check_size, at nvme.h)

Also I think that the spec update change that adds the NvmeSglDescriptor
should be split into separate patch (or better be added in one big patch that 
adds all 1.3d features), 
which would make it also easier to see changes that touch the other nvme driver 
we have.

> +
> +#define NVME_SGL_TYPE(type) ((type >> 4) & 0xf)
> +#define NVME_SGL_SUBTYPE(type)  (type & 0xf)
> +
> +typedef union NvmeCmdDptr {
> +struct {
> +uint64_tprp1;
> +uint64_tprp2;
> +} prp;
> +
> +NvmeSglDescriptor sgl;
> +} NvmeCmdDptr;
> +
> +enum NvmePsdt {
> +PSDT_PRP = 0x0,
> +PSDT_SGL_MPTR_CONTIGUOUS = 0x1,
> +PSDT_SGL_MPTR_SGL= 0x2,
> +};
> +
>  typedef struct NvmeCmd {
>  uint8_t opcode;
> -uint8_t fuse;
> +uint8_t flags;
>  uint16_tcid;
>  uint32_tnsid;
>  uint64_tres1;
>  uint64_tmptr;
> -uint64_tprp1;
> -uint64_tprp2;
> +NvmeCmdDptr dptr;
>  uint32_tcdw10;
>  uint32_tcdw11;
>  uint32_tcdw12;
> @@ -222,6 +260,9 @@ typedef struct NvmeCmd {
>  uint32_tcdw15;
>  } NvmeCmd;
>  
> +#define NVME_CMD_FLAGS_FUSE(flags) (flags & 0x3)
> +#define NVME_CMD_FLAGS_PSDT(flags) ((flags >> 6) & 0x3)
> +
>  enum NvmeAdminCommands {
>  NVME_ADM_CMD_DELETE_SQ  = 0x00,
>  NVME_ADM_CMD_CREATE_SQ  = 0x01,
> @@ -427,6 +468,11 @@ enum NvmeStatusCodes {
>  NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
>  NVME_INVALID_NSID   = 0x000b,
>  NVME_CMD_SEQ_ERROR  = 0x000c,
> +NVME_INVALID_SGL_SEG_DESCRIPTOR  = 0x000d,
> +NVME_INVALID_NUM_SGL_DESCRIPTORS = 0x000e,
> +NVME_DATA_SGL_LENGTH_INVALID = 0x000f,
> +NVME_METADATA_SGL_LENGTH_INVALID = 0x0010,
> +NVME_SGL_DESCRIPTOR_TYPE_INVALID = 0x0011,
>  NVME_INVALID_USE_OF_CMB = 0x0012,
>  NVME_LBA_RANGE  = 0x0080,
>  NVME_CAP_EXCEEDED   = 0x0081,
> @@ -623,6 +669,16 @@ enum NvmeIdCtrlOncs {
>  #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
>  #define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf)
>  
> +#define NVME_CTRL_SGLS_SUPPORTED(sgls) ((sgls) & 0x3)
> +#define NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT(sgls)((sgls) & (0x1 <<  0))
> +#define NVME_CTRL_SGLS_SUPPORTED_DWORD_ALIGNMENT(sgls) ((sgls) & (0x1 <<  1))
> +#define NVME_CTRL_SGLS_KEYED(sgls) ((sgls) & (0x1 <<  2))
> +#define NVME_CTRL_SGLS_BITBUCKET(sgls) ((sgls) & (0x1 << 16))
> +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS(sgls)   ((sgls) & (0x1 << 17))
> +#define NVME_CTRL_SGLS_EXCESS_LENGTH(sgls) ((sgls) & (0x1 << 18))
> +#define NVME_CTRL_SGLS_MPTR_SGL(sgls)  ((sgls) & (0x1 << 19))
> +#define NVME_CTRL_SGLS_ADDR_OFFSET(sgls)   ((sgls) & (0x1 << 20))
> +
>  typedef struct NvmeFeatureVal {
>  uint32_tarbitration;
>  uint32_tpower_mgmt;

Best regards,
Maxim Levitsky

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-02-12 Thread Maxim Levitsky

  break;
> +}
> +QTAILQ_REMOVE(&cq->req_list, req, entry);
> +nvme_inc_cq_tail(cq);
>  nvme_req_clear(req);
>  QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
>  }
> @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
>  
>  while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
>  addr = sq->dma_addr + sq->head * n->sqe_size;
> -nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
> +if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
> +trace_nvme_dev_err_addr_read(addr);
> +timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> +100 * SCALE_MS);
> +break;
> +}

Note that once the driver is optimized for performance, these timers must go,
since they run on main thread and also add latency to each request.
But for now this change is all right.

About user triggering this each 100ms on purpose, I don't think that this is 
such a big issue.
Maybe up that to 500ms or even one second, since this condition will not
happen in real life usage of the device anyway.

>  nvme_inc_sq_head(sq);
>  
>  req = QTAILQ_FIRST(&sq->req_list);
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 90a57fb6099a..09bfb3782dd0 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -83,6 +83,8 @@ nvme_dev_mmio_shutdown_cleared(void) "shutdown bit cleared"
>  nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts 
> %"PRIu64" len %"PRIu64""
>  nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl 
> %"PRIu16""
>  nvme_dev_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p 
> blk \"%s\" offset %"PRIu64" opc \"%s\" req %p
> status 0x%"PRIx16""
> +nvme_dev_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
> +nvme_dev_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
>  nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null 
> or not page aligned: 0x%"PRIx64""
>  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index c1de92179596..a873776d98b8 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -418,7 +418,7 @@ enum NvmeStatusCodes {
>  NVME_INVALID_OPCODE = 0x0001,
>  NVME_INVALID_FIELD  = 0x0002,
>  NVME_CID_CONFLICT   = 0x0003,
> -NVME_DATA_TRAS_ERROR= 0x0004,
> +NVME_DATA_TRANSFER_ERROR= 0x0004,
>  NVME_POWER_LOSS_ABORT   = 0x0005,
>  NVME_INTERNAL_DEV_ERROR = 0x0006,
>  NVME_CMD_ABORT_REQ  = 0x0007,


Best regards,
Maxim Levitsky

Re: [PATCH v5 18/26] nvme: use preallocated qsg/iov in nvme_dma_prp

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> Since clean up of the request qsg/iov has been moved to the common
> nvme_enqueue_req_completion function, there is no need to use a
> stack allocated qsg/iov in nvme_dma_prp.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 18 ++
>  1 file changed, 6 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index e97da35c4ca1..f8c81b9e2202 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -298,23 +298,21 @@ unmap:
>  static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
>  uint64_t prp1, uint64_t prp2, DMADirection dir, NvmeRequest *req)
>  {
> -QEMUSGList qsg;
> -QEMUIOVector iov;
>  uint16_t status = NVME_SUCCESS;
>  size_t bytes;
>  
> -status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
> +status = nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
>  if (status) {
>  return status;
>  }
>  
> -if (qsg.nsg > 0) {
> +if (req->qsg.nsg > 0) {
>  uint64_t residual;
>  
>  if (dir == DMA_DIRECTION_TO_DEVICE) {
> -residual = dma_buf_write(ptr, len, &qsg);
> +residual = dma_buf_write(ptr, len, &req->qsg);
>  } else {
> -residual = dma_buf_read(ptr, len, &qsg);
> +residual = dma_buf_read(ptr, len, &req->qsg);
>  }
>  
>  if (unlikely(residual)) {
> @@ -322,15 +320,13 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -qemu_sglist_destroy(&qsg);
> -
>  return status;
>  }
>  
>  if (dir == DMA_DIRECTION_TO_DEVICE) {
> -bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
> +bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
>  } else {
> -bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
> +bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
>  }
>  
>  if (unlikely(bytes != len)) {
> @@ -338,8 +334,6 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -qemu_iovec_destroy(&iov);
> -
>  return status;
>  }
>  


Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 17/26] nvme: allow multiple aios per command

2020-02-12 Thread Maxim Levitsky

}
> +}
> +
> +static inline bool nvme_req_is_write(NvmeRequest *req)
> +{
> +switch (req->cmd.opcode) {
> +case NVME_CMD_WRITE:
> +case NVME_CMD_WRITE_UNCOR:
> +case NVME_CMD_WRITE_ZEROS:
> +return true;
> +default:
> +return false;
> +}
> +}
> +
>  #define TYPE_NVME "nvme"
>  #define NVME(obj) \
>  OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> @@ -139,10 +236,21 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, 
> NvmeNamespace *ns)
>  static inline uint16_t nvme_cid(NvmeRequest *req)
>  {
>  if (req) {
> -return le16_to_cpu(req->cqe.cid);
> +return le16_to_cpu(req->cmd.cid);
>  }
>  
>  return 0x;
>  }
>  
> +static inline bool nvme_status_is_error(uint16_t status, uint16_t err)
> +{
> +/* strip DNR and MORE */
> +return (status & 0xfff) == err;
> +}
> +
> +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
> +{
> +return req->sq->ctrl;
> +}
> +
>  #endif /* HW_NVME_H */
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 77aa0da99ee0..90a57fb6099a 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -34,7 +34,12 @@ nvme_dev_irq_pin(void) "pulsing IRQ pin"
>  nvme_dev_irq_masked(void) "IRQ is masked"
>  nvme_dev_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" 
> prp2=0x%"PRIx64""
>  nvme_dev_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t 
> len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 
> 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1
> 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
> +nvme_dev_req_register_aio(uint16_t cid, void *aio, const char *blkname, 
> uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" 
> aio %p blk \"%s\" offset %"PRIu64" count
> %"PRIu64" opc \"%s\" req %p"
> +nvme_dev_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset 
> %"PRIu64" opc \"%s\" req %p"
> +nvme_dev_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) 
> "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
>  nvme_dev_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, 
> uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
> +nvme_dev_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
> +nvme_dev_write_zeros(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t 
> nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
>  nvme_dev_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t 
> qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", 
> sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16",
> qflags=%"PRIu16""
>  nvme_dev_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t 
> size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", 
> cqid=%"PRIu16", vector=%"PRIu16",
> qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
>  nvme_dev_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
> @@ -75,6 +80,9 @@ nvme_dev_mmio_shutdown_set(void) "shutdown bit set"
>  nvme_dev_mmio_shutdown_cleared(void) "shutdown bit cleared"
>  
>  # nvme traces for error conditions
> +nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts 
> %"PRIu64" len %"PRIu64""
> +nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl 
> %"PRIu16""
> +nvme_dev_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t 
> offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p 
> blk \"%s\" offset %"PRIu64" opc \"%s\" req %p
> status 0x%"PRIx16""
>  nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null 
> or not page aligned: 0x%"PRIx64""
>  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""



The patch is large, I tried my best to spot issues, but I might have missed 
some.
Please split it as I pointed out.
Overall I do like most of the changes.

Best regards,
Maxim Levitsky

Re: [PATCH v5 16/26] nvme: refactor prp mapping

2020-02-12 Thread Maxim Levitsky

t:
>  trace_nvme_dev_err_invalid_identify_cns(le32_to_cpu(c->cns));
>  return NVME_INVALID_FIELD | NVME_DNR;
> @@ -1039,15 +1105,16 @@ static inline uint64_t nvme_get_timestamp(const 
> NvmeCtrl *n)
>  return cpu_to_le64(ts.all);
>  }
>  
> -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
> +NvmeRequest *req)
>  {
>  uint64_t prp1 = le64_to_cpu(cmd->prp1);
>  uint64_t prp2 = le64_to_cpu(cmd->prp2);
>  
>  uint64_t timestamp = nvme_get_timestamp(n);
>  
> -return nvme_dma_read_prp(n, (uint8_t *)×tamp,
> - sizeof(timestamp), prp1, prp2);
> +return nvme_dma_prp(n, (uint8_t *)×tamp, sizeof(timestamp),
> +prp1, prp2, DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
>  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> @@ -1099,7 +1166,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  trace_nvme_dev_getfeat_numq(result);
>  break;
>  case NVME_TIMESTAMP:
> -return nvme_get_feature_timestamp(n, cmd);
> +return nvme_get_feature_timestamp(n, cmd, req);
>  case NVME_INTERRUPT_COALESCING:
>  result = cpu_to_le32(n->features.int_coalescing);
>  break;
> @@ -1125,15 +1192,16 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
> +NvmeRequest *req)
>  {
>  uint16_t ret;
>  uint64_t timestamp;
>  uint64_t prp1 = le64_to_cpu(cmd->prp1);
>  uint64_t prp2 = le64_to_cpu(cmd->prp2);
>  
> -ret = nvme_dma_write_prp(n, (uint8_t *)×tamp,
> -sizeof(timestamp), prp1, prp2);
> +ret = nvme_dma_prp(n, (uint8_t *) ×tamp, sizeof(timestamp),
> +prp1, prp2, DMA_DIRECTION_TO_DEVICE, req);
>  if (ret != NVME_SUCCESS) {
>  return ret;
>  }
> @@ -1194,7 +1262,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  ((n->params.num_queues - 2) << 16));
>  break;
>  case NVME_TIMESTAMP:
> -return nvme_set_feature_timestamp(n, cmd);
> +return nvme_set_feature_timestamp(n, cmd, req);
>  case NVME_ASYNCHRONOUS_EVENT_CONF:
>  n->features.async_config = dw11;
>  break;
> @@ -1246,7 +1314,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  case NVME_ADM_CMD_CREATE_CQ:
>  return nvme_create_cq(n, cmd);
>  case NVME_ADM_CMD_IDENTIFY:
> -return nvme_identify(n, cmd);
> +return nvme_identify(n, cmd, req);
>  case NVME_ADM_CMD_ABORT:
>  return nvme_abort(n, cmd, req);
>  case NVME_ADM_CMD_SET_FEATURES:
> @@ -1282,6 +1350,7 @@ static void nvme_process_sq(void *opaque)
>  QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
>  memset(&req->cqe, 0, sizeof(req->cqe));
>  req->cqe.cid = cmd.cid;
> +memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
>  
>  status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
>  nvme_admin_cmd(n, &cmd, req);
> @@ -1804,7 +1873,7 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  
>  NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> -NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> +NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 7ced5fd485a9..d27baa9d5391 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -27,11 +27,11 @@ typedef struct NvmeRequest {
>  struct NvmeSQueue   *sq;
>  BlockAIOCB  *aiocb;
>  uint16_tstatus;
> -boolhas_sg;
>  NvmeCqe cqe;
>  BlockAcctCookie acct;
>  QEMUSGList  qsg;
>  QEMUIOVectoriov;
> +NvmeCmd cmd;
>  QTAILQ_ENTRY(NvmeRequest)entry;
>  } NvmeRequest;
>  
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 9e5a4548bde0..77aa0da99ee0 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -33,6 +33,7 @@ nvme_dev_irq_msix(uint32_t vector) "raising MSI-X IRQ 
> vector %u"
>  nvme_dev_irq_pin(void) "pulsing IRQ pin"
>  nvme_dev_irq_masked(void) "IRQ is masked"
>  nvme_dev_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" 
> prp2=0x%"PRIx64""
> +nvme_dev_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t 
> len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 
> 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1
> 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
>  nvme_dev_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, 
> uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
>  nvme_dev_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t 
> qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", 
> sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16",
> qflags=%"PRIu16""
>  nvme_dev_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t 
> size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", 
> cqid=%"PRIu16", vector=%"PRIu16",
> qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 31eb9397d8c6..c1de92179596 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -427,6 +427,7 @@ enum NvmeStatusCodes {
>  NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
>  NVME_INVALID_NSID   = 0x000b,
>  NVME_CMD_SEQ_ERROR  = 0x000c,
> +NVME_INVALID_USE_OF_CMB = 0x0012,
>  NVME_LBA_RANGE  = 0x0080,
>  NVME_CAP_EXCEEDED   = 0x0081,
>  NVME_NS_NOT_READY   = 0x0082,


Overall I would split this commit into real refactoring and bugfixes.
Best regards,
Maxim Levitsky

Re: [PATCH v5 15/26] nvme: bump supported specification to 1.3

2020-02-12 Thread Maxim Levitsky

at;
> -uint8_t rsvd33;
>  uint16_tnawun;
>  uint16_tnawupf;
> +uint16_tnacwu;
Aha! Here you 'fix' the bug you had in patch 4.
>  uint16_tnabsn;
>  uint16_tnabo;
>  uint16_tnabspf;
> -uint8_t rsvd46[2];
> +uint16_tnoiob;
>  uint8_t nvmcap[16];
>  uint8_t rsvd64[40];
>  uint8_t nguid[16];
>  uint64_teui64;
>  NvmeLBAFlbaf[16];
> -uint8_t res192[192];
> +uint8_t rsvd192[192];
And even do what I suggested with that field :-)
Please squash the changes.
>  uint8_t vs[3712];
>  } NvmeIdNs;
>  

So I suggest you squash this set of changes with patch 4.
I also suggest you to split the other changes in this patch, 1 per feature 
added.
The tracing change can also be squashed with the other tracing patch you 
submitted.

In summary I would suggest you to have:

1. patch that only adds all the fields from the 1.3d spec, and overall updates 
nvme.h
to be up to 1.3d spec

2. patches that do refactoring, add more tracing (also form of refactoring, 
since tracing
isn't a functional thing)

3. set of patches that implement all the 1.3d features.

4. patch that only bumps the supported version right to 1.3d

Best regards,
Maxim Levitsky

Re: [PATCH v5 14/26] nvme: make sure ncqr and nsqr is valid

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> 0x is not an allowed value for NCQR and NSQR in Set Features on
> Number of Queues.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 30c5b3e7a67d..900732bb2f38 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1133,6 +1133,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
>  break;
>  case NVME_NUMBER_OF_QUEUES:
> +if ((dw11 & 0x) == 0x || ((dw11 >> 16) & 0x) == 0x) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
Very minor nitpick: since this spec requirement is not obvious, a 
quote/reference to the spec
would be nice to have here. 

> +
>  trace_nvme_dev_setfeat_numq((dw11 & 0x) + 1,
>  ((dw11 >> 16) & 0xFFFF) + 1, n->params.num_queues - 1,
>  n->params.num_queues - 1);

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 13/26] nvme: additional tracing

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Add a trace call for nvme_enqueue_req_completion.
> 
> Also, streamline nvme_identify_ns and nvme_identify_ns_list. They do not
> need to repeat the command, it is already in the trace name.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 8 +---
>  hw/block/trace-events | 5 +++--
>  2 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3267ee2de47a..30c5b3e7a67d 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -320,6 +320,8 @@ static void nvme_post_cqes(void *opaque)
>  static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
>  {
>  assert(cq->cqid == req->sq->cqid);
> +trace_nvme_dev_enqueue_req_completion(nvme_cid(req), cq->cqid,
> +req->status);
Split line alignment on that '('

>  QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
>  QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
>  timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
> @@ -895,7 +897,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
> NvmeIdentify *c)
>  prp1, prp2);
>  }
>  
> -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
> +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c)
>  {
>  static const int data_len = 4 * KiB;
>  uint32_t min_nsid = le32_to_cpu(c->nsid);
> @@ -905,7 +907,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
> NvmeIdentify *c)
>  uint16_t ret;
>  int i, j = 0;
>  
> -trace_nvme_dev_identify_nslist(min_nsid);
> +trace_nvme_dev_identify_ns_list(min_nsid);
>  
>  list = g_malloc0(data_len);
>  for (i = 0; i < n->num_namespaces; i++) {
> @@ -932,7 +934,7 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  case 0x01:
>  return nvme_identify_ctrl(n, c);
>  case 0x02:
> -return nvme_identify_nslist(n, c);
> +return nvme_identify_ns_list(n, c);
>  default:
>  trace_nvme_dev_err_invalid_identify_cns(le32_to_cpu(c->cns));
>  return NVME_INVALID_FIELD | NVME_DNR;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 4cf39961989d..f982ec1a3221 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -39,8 +39,8 @@ nvme_dev_create_cq(uint64_t addr, uint16_t cqid, uint16_t 
> vector, uint16_t size,
>  nvme_dev_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
>  nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
>  nvme_dev_identify_ctrl(void) "identify controller"
> -nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
> -nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, 
> nsid=%"PRIu16""
> +nvme_dev_identify_ns(uint32_t ns) "nsid %"PRIu32""
> +nvme_dev_identify_ns_list(uint32_t ns) "nsid %"PRIu32""
>  nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
>  nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" 
> fid 0x%"PRIx32" val 0x%"PRIx32""
>  nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write 
> cache, result=%s"
> @@ -54,6 +54,7 @@ nvme_dev_aer(uint16_t cid) "cid %"PRIu16""
>  nvme_dev_aer_aerl_exceeded(void) "aerl exceeded"
>  nvme_dev_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 
> 0x%"PRIx8""
>  nvme_dev_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 
> 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
> +nvme_dev_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t 
> status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
>  nvme_dev_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 
> 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
>  nvme_dev_enqueue_event_noqueue(int queued) "queued %d"
>  nvme_dev_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""

With alignment fixed:

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 12/26] nvme: add missing mandatory features

2020-02-12 Thread Maxim Levitsky

id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
>  
> +
> +if (blk_enable_write_cache(n->conf.blk)) {
> +id->vwc = 1;
> +}
> +
>  strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
>  pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
>  
>  id->psd[0].mp = cpu_to_le16(0x9c4);
>  id->psd[0].enlat = cpu_to_le32(0x10);
>  id->psd[0].exlat = cpu_to_le32(0x4);
> -if (blk_enable_write_cache(n->conf.blk)) {
> -id->vwc = 1;
> -}
>  
>  n->bar.cap = 0;
>  NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> @@ -1861,6 +1911,7 @@ static void nvme_exit(PCIDevice *pci_dev)
>  g_free(n->cq);
>  g_free(n->sq);
>  g_free(n->aer_reqs);
> +g_free(n->features.int_vector_config);
>  
>  if (n->params.cmb_size_mb) {
>  g_free(n->cmbuf);
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 3952c36774cf..4cf39961989d 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -41,6 +41,8 @@ nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, 
> sqid=%"PRIu16""
>  nvme_dev_identify_ctrl(void) "identify controller"
>  nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
>  nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, 
> nsid=%"PRIu16""
> +nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
> +nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" 
> fid 0x%"PRIx32" val 0x%"PRIx32""
>  nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write 
> cache, result=%s"
>  nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d"
>  nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested 
> cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index a24be047a311..09419ed499d0 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -445,7 +445,8 @@ enum NvmeStatusCodes {
>  NVME_FW_REQ_RESET   = 0x010b,
>  NVME_INVALID_QUEUE_DEL  = 0x010c,
>  NVME_FID_NOT_SAVEABLE   = 0x010d,
> -NVME_FID_NOT_NSID_SPEC  = 0x010f,
> +NVME_FEAT_NOT_CHANGABLE = 0x010e,
> +NVME_FEAT_NOT_NSID_SPEC = 0x010f,
>  NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
>  NVME_CONFLICTING_ATTRS  = 0x0180,
>  NVME_INVALID_PROT_INFO  = 0x0181,

Best regards,
Maxim Levitsky

Re: [PATCH v5 11/26] nvme: add support for the asynchronous event request command

2020-02-12 Thread Maxim Levitsky

amp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
>  nvme_dev_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
>  nvme_dev_get_log(uint16_t cid, uint8_t lid, uint8_t rae, uint32_t len, 
> uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off 
> %"PRIu64""
> +nvme_dev_process_aers(int queued) "queued %d"
> +nvme_dev_aer(uint16_t cid) "cid %"PRIu16""
> +nvme_dev_aer_aerl_exceeded(void) "aerl exceeded"
> +nvme_dev_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 
> 0x%"PRIx8""
> +nvme_dev_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 
> 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
> +nvme_dev_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 
> 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
> +nvme_dev_enqueue_event_noqueue(int queued) "queued %d"
> +nvme_dev_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
> +nvme_dev_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
>  nvme_dev_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, 
> interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
>  nvme_dev_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, 
> interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
>  nvme_dev_mmio_cfg(uint64_t data) "wrote MMIO, config controller 
> config=0x%"PRIx64""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 9a6055adeb61..a24be047a311 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest {
>  NVME_AER_TYPE_SMART = 1,
>  NVME_AER_TYPE_IO_SPECIFIC   = 6,
>  NVME_AER_TYPE_VENDOR_SPECIFIC   = 7,
> -NVME_AER_INFO_ERR_INVALID_SQ= 0,
> -NVME_AER_INFO_ERR_INVALID_DB= 1,
> +NVME_AER_INFO_ERR_INVALID_DB_REGISTER   = 0,
> +NVME_AER_INFO_ERR_INVALID_DB_VALUE  = 1,
>  NVME_AER_INFO_ERR_DIAG_FAIL = 2,
>  NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
>  NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR= 4,
> @@ -640,6 +640,10 @@ typedef struct NvmeFeatureVal {
>  #define NVME_TEMP_TMPSEL(temp) ((temp >> 16) & 0xf)
>  #define NVME_TEMP_TMPTH(temp)  (temp & 0x)
>  
> +#define NVME_AEC_SMART(aec) (aec & 0xff)
> +#define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
> +#define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
> +
>  enum NvmeFeatureIds {
>  NVME_ARBITRATION= 0x1,
>  NVME_POWER_MANAGEMENT   = 0x2,


Overall looks very good. This feature is very tricky to
get right due to somewhat unclear spec but after reading the
spec again, it looks OK.

I might have missed something though. I cross checked against my
implementation of this and it looks like I misunderstood the spec
in few places back then in my nvme-mdev implementation.

Reminding to fix all the split code line alignment issues (when C statement is 
split over to next line it should be aligned on first '('). 
There are plenty of these here.

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 10/26] nvme: add support for the get log page command

2020-02-12 Thread Maxim Levitsky

interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
>  nvme_dev_mmio_cfg(uint64_t data) "wrote MMIO, config controller 
> config=0x%"PRIx64""
> @@ -85,6 +86,7 @@ nvme_dev_err_invalid_create_cq_qflags(uint16_t qflags) 
> "failed creating completi
>  nvme_dev_err_invalid_identify_cns(uint16_t cns) "identify, invalid 
> cns=0x%"PRIx16""
>  nvme_dev_err_invalid_getfeat(int dw10) "invalid get features, 
> dw10=0x%"PRIx32""
>  nvme_dev_err_invalid_setfeat(uint32_t dw10) "invalid set features, 
> dw10=0x%"PRIx32""
> +nvme_dev_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 
> 0x%"PRIx16""
>  nvme_dev_err_startfail_cq(void) "nvme_start_ctrl failed because there are 
> non-admin completion queues"
>  nvme_dev_err_startfail_sq(void) "nvme_start_ctrl failed because there are 
> non-admin submission queues"
>  nvme_dev_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the 
> admin submission queue address is null"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index ff31cb32117c..9a6055adeb61 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -515,7 +515,7 @@ enum NvmeSmartWarn {
>  NVME_SMART_FAILED_VOLATILE_MEDIA  = 1 << 4,
>  };
>  
> -enum LogIdentifier {
> +enum NvmeLogIdentifier {
>  NVME_LOG_ERROR_INFO = 0x01,
>  NVME_LOG_SMART_INFO = 0x02,
>  NVME_LOG_FW_SLOT_INFO   = 0x03,

Best regards,
Maxim Levitsky

Re: [PATCH v5 09/26] nvme: add temperature threshold feature

2020-02-12 Thread Maxim Levitsky

by the 
> host */
>  uint64_ttimestamp_set_qemu_clock_ms;/* QEMU clock time */
> +uint16_ttemperature;
>  
>  NvmeNamespace   *namespaces;
>  NvmeSQueue  **sq;
> @@ -115,6 +116,7 @@ typedef struct NvmeCtrl {
>  NvmeSQueue  admin_sq;
>  NvmeCQueue  admin_cq;
>  NvmeIdCtrl  id_ctrl;
> +NvmeFeatureVal  features;
>  } NvmeCtrl;
>  
>  static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index d2f65e8fe496..ff31cb32117c 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -616,7 +616,8 @@ enum NvmeIdCtrlOncs {
>  typedef struct NvmeFeatureVal {
>  uint32_tarbitration;
>  uint32_tpower_mgmt;
> -uint32_ttemp_thresh;
> +uint16_ttemp_thresh_hi;
> +uint16_ttemp_thresh_low;
>  uint32_terr_rec;
>  uint32_tvolatile_wc;
>  uint32_tnum_queues;
> @@ -635,6 +636,10 @@ typedef struct NvmeFeatureVal {
>  #define NVME_INTC_THR(intc) (intc & 0xff)
>  #define NVME_INTC_TIME(intc)((intc >> 8) & 0xff)
>  
> +#define NVME_TEMP_THSEL(temp)  ((temp >> 20) & 0x3)
> +#define NVME_TEMP_TMPSEL(temp) ((temp >> 16) & 0xf)
> +#define NVME_TEMP_TMPTH(temp)  (temp & 0x)
> +
>  enum NvmeFeatureIds {
>  NVME_ARBITRATION= 0x1,
>  NVME_POWER_MANAGEMENT   = 0x2,


Best regards,
Maxim Levitsky

Re: [PATCH v5 08/26] nvme: refactor device realization

2020-02-12 Thread Maxim Levitsky

static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> +{
> +NvmeCtrl *n = NVME(pci_dev);
> +Error *local_err = NULL;
> +int i;
> +
> +if (nvme_check_constraints(n, &local_err)) {
> +error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
Do we need that hint for the end user?
> +return;
> +}
> +
> +nvme_init_state(n);
> +
> +if (nvme_init_blk(n, &local_err)) {
> +error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
Same here
> +return;
>  }
>  
>  for (i = 0; i < n->num_namespaces; i++) {
> -NvmeNamespace *ns = &n->namespaces[i];
> -NvmeIdNs *id_ns = &ns->id_ns;
> -id_ns->nsfeat = 0;
> -id_ns->nlbaf = 0;
> -id_ns->flbas = 0;
> -id_ns->mc = 0;
> -id_ns->dpc = 0;
> -id_ns->dps = 0;
> -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> -id_ns->ncap  = id_ns->nuse = id_ns->nsze =
> -cpu_to_le64(n->ns_size >>
> -id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
> +if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
> +error_propagate_prepend(errp, local_err, "nvme_init_namespace: 
> ");
And here
> +return;
> +}
>  }
> +
> +nvme_init_pci(n, pci_dev);
> +nvme_init_ctrl(n);
>  }
>  
>  static void nvme_exit(PCIDevice *pci_dev)
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 9957c4a200e2..a867bdfabafd 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -65,6 +65,22 @@ typedef struct NvmeNamespace {
>  NvmeIdNsid_ns;
>  } NvmeNamespace;
>  
> +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
> +{
Its not common to return a structure in C, usually pointer is returned to
avoid copying. In this case this doesn't matter that much though.
> +NvmeIdNs *id_ns = &ns->id_ns;
> +return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
> +}
> +
> +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
> +{
> +return nvme_ns_lbaf(ns).ds;
> +}
> +
> +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
> +{
> +return 1 << nvme_ns_lbads(ns);
> +}
> +
>  #define TYPE_NVME "nvme"
>  #define NVME(obj) \
>  OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> @@ -101,4 +117,9 @@ typedef struct NvmeCtrl {
>  NvmeIdCtrl  id_ctrl;
>  } NvmeCtrl;
>  
> +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +return n->ns_size >> nvme_ns_lbads(ns);
> +}
Unless you need all these functions in the future, this feels like
it is a bit verbose.

> +
>  #endif /* HW_NVME_H */


Best regards,
Maxim Levitsky

Re: [PATCH v5 07/26] nvme: add support for the abort command

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1,
> Section 5.1 ("Abort command").
> 
> The Abort command is a best effort command; for now, the device always
> fails to abort the given command.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 28 
>  1 file changed, 28 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index ba5089df9ece..e1810260d40b 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -731,6 +731,18 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  }
>  }
>  
> +static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0x;
> +
> +req->cqe.result = 1;
> +if (nvme_check_sqid(n, sqid)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +return NVME_SUCCESS;
> +}

Looks 100% up to spec.

In my nvme-mdev it looks like I implemented this wrongly by failing this with
NVME_SC_ABORT_MISSING (which is defined in the kernel sources, but looks like a 
reserved
error code in the spec. Not that it matters that much.

Also unrelated to this but something I would like to point out 
(this applies not only to this command but to all admin and IO commands) the 
device
should check for various reserved fields in the command descriptor, which it 
doesn't currently.

This is what I do:
https://gitlab.com/maximlevitsky/linux/blob/mdev-work-5.2/drivers/nvme/mdev/adm.c#L783

> +
>  static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
>  {
>  trace_nvme_dev_setfeat_timestamp(ts);
> @@ -848,6 +860,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  trace_nvme_dev_err_invalid_setfeat(dw10);
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
> +
Nitpick: Unrelated whitespace change.
>  return NVME_SUCCESS;
>  }
>  
> @@ -864,6 +877,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  return nvme_create_cq(n, cmd);
>  case NVME_ADM_CMD_IDENTIFY:
>  return nvme_identify(n, cmd);
> +case NVME_ADM_CMD_ABORT:
> +return nvme_abort(n, cmd, req);
>  case NVME_ADM_CMD_SET_FEATURES:
>  return nvme_set_feature(n, cmd, req);
>  case NVME_ADM_CMD_GET_FEATURES:
> @@ -1377,6 +1392,19 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  id->ieee[2] = 0xb3;
>  id->ver = cpu_to_le32(NVME_SPEC_VER);
>  id->oacs = cpu_to_le16(0);
> +
> +/*
> + * Because the controller always completes the Abort command immediately,
> + * there can never be more than one concurrently executing Abort command,
> + * so this value is never used for anything. Note that there can easily 
> be
> + * many Abort commands in the queues, but they are not considered
> + * "executing" until processed by nvme_abort.
> + *
> + * The specification recommends a value of 3 for Abort Command Limit 
> (four
> + * concurrently outstanding Abort commands), so lets use that though it 
> is
> + * inconsequential.
> + */
> +id->acl = 3;
Yep.
>  id->frmw = 7 << 1;
>  id->lpa = 1 << 0;
>  id->sqes = (0x6 << 4) | 0x6;


Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 06/26] nvme: refactor nvme_addr_read

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Pull the controller memory buffer check to its own function. The check
> will be used on its own in later patches.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 18 +-
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9abf74da20f2..ba5089df9ece 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -54,14 +54,22 @@
>  
>  static void nvme_process_sq(void *opaque);
>  
> +static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> +{
> +hwaddr low = n->ctrl_mem.addr;
> +hwaddr hi  = n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size);
> +
> +return addr >= low && addr < hi;
> +}
> +
>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
> -if (n->cmbsz && addr >= n->ctrl_mem.addr &&
> -addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
> -memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
> -} else {
> -pci_dma_read(&n->parent_obj, addr, buf, size);
> +if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> +memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size);
Nitpick:
I am not an expert on qemu coding style but I suspect that there is extra space 
after that (void *).

Also note that in following patches you fix a serious bug in this function that 
it doesn't
check that the whole range is in CMB but only that the start of the area is.
I would move it here, or even to a separate patch.

> +return;
>  }
> +
> +pci_dma_read(&n->parent_obj, addr, buf, size);
>  }
>  
>  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)

Best regards,
Maxim Levitsky

Re: [PATCH v5 05/26] nvme: populate the mandatory subnqn and ver fields

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Required for compliance with NVMe revision 1.2.1 or later. See NVM
> Express 1.2.1, Section 5.11 ("Identify command"), Figure 90 and Section
> 7.9 ("NVMe Qualified Names").
> 
> This also bumps the supported version to 1.2.1.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f05ebcce3f53..9abf74da20f2 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -9,9 +9,9 @@
>   */
>  
>  /**
> - * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
> + * Reference Specification: NVM Express 1.2.1
>   *
> - *  http://www.nvmexpress.org/resources/
> + *   https://nvmexpress.org/resources/specifications/
To be honest that redirects to https://nvmexpress.org/specifications/
Not a problem though.
>   */
>  
>  /**
> @@ -43,6 +43,8 @@
>  #include "trace.h"
>  #include "nvme.h"
>  
> +#define NVME_SPEC_VER 0x00010201
> +
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>  do { \
>  (trace_##trace)(__VA_ARGS__); \
> @@ -1365,6 +1367,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  id->ieee[0] = 0x00;
>  id->ieee[1] = 0x02;
>  id->ieee[2] = 0xb3;
> +id->ver = cpu_to_le32(NVME_SPEC_VER);
This is indeed 1.2 addition
>  id->oacs = cpu_to_le16(0);
>  id->frmw = 7 << 1;
>  id->lpa = 1 << 0;
> @@ -1372,6 +1375,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  id->cqes = (0x4 << 4) | 0x4;
>  id->nn = cpu_to_le32(n->num_namespaces);
>  id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
> +
> +strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
> +pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
Looks OK, this is first format according to the spec.
> +
>  id->psd[0].mp = cpu_to_le16(0x9c4);
>  id->psd[0].enlat = cpu_to_le32(0x10);
>  id->psd[0].exlat = cpu_to_le32(0x4);
> @@ -1386,7 +1393,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  NVME_CAP_SET_CSS(n->bar.cap, 1);
>  NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
>  
> -n->bar.vs = 0x00010200;
> +n->bar.vs = NVME_SPEC_VER;
>  n->bar.intmc = n->bar.intms = 0;
>  
>  if (n->params.cmb_size_mb) {

To be really pedantic, the 1.2 spec also requires at least:
  * wctemp and cctemp to be nonzero in Identify Controller (yea, this is stupid 
to report temperature for virtual controller)
  * NVME_ADM_CMD_GET_LOG_PAGE, with some mandatory log pages
  * NVME_ADM_CMD_SET_FEATURES/NVME_ADM_CMD_GET_FEATURES - The device currently 
doesn't implement some mandatory features.

And there are probably more. This is what I can recall from my nvme-mdev.

However I see that you implmented these in following patches, so I suggest you 
first put patches that implement all that features,
and then bump the NVME version.
Most of these features I mentioned were mandatory even in version 1.0 of the 
spec, so current version is not even
compliant with 1.0 IMHO.

Best regards,
Maxim Levitsky

Re: [PATCH v5 04/26] nvme: add missing fields in the identify data structures

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Not used by the device model but added for completeness. See NVM Express
> 1.2.1, Section 5.11 ("Identify command"), Figure 90 and Figure 93.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  include/block/nvme.h | 48 
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 8fb941c6537c..d2f65e8fe496 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -543,7 +543,13 @@ typedef struct NvmeIdCtrl {
>  uint8_t ieee[3];
>  uint8_t cmic;
>  uint8_t mdts;
> -uint8_t rsvd255[178];
> +uint16_tcntlid;
> +uint32_tver;
> +uint32_trtd3r;
> +uint32_trtd3e;
> +uint32_toaes;
> +uint32_tctratt;
> +uint8_t rsvd100[156];
>  uint16_toacs;
>  uint8_t acl;
>  uint8_t aerl;
> @@ -551,10 +557,22 @@ typedef struct NvmeIdCtrl {
>  uint8_t lpa;
>  uint8_t elpe;
>  uint8_t npss;
> -uint8_t rsvd511[248];
> +uint8_t avscc;
> +uint8_t apsta;
> +uint16_twctemp;
> +uint16_tcctemp;
> +uint16_tmtfa;
> +uint32_thmpre;
> +uint32_thmmin;
> +uint8_t tnvmcap[16];
> +uint8_t unvmcap[16];
> +uint32_trpmbs;
> +uint8_t rsvd316[4];
> +uint16_tkas;
> +uint8_t rsvd322[190];
>  uint8_t sqes;
>  uint8_t cqes;
> -uint16_trsvd515;
> +uint16_tmaxcmd;
>  uint32_tnn;
>  uint16_toncs;
>  uint16_tfuses;
> @@ -562,8 +580,14 @@ typedef struct NvmeIdCtrl {
>  uint8_t vwc;
>  uint16_tawun;
>  uint16_tawupf;
> -uint8_t rsvd703[174];
> -uint8_t rsvd2047[1344];
> +uint8_t nvscc;
> +uint8_t rsvd531;
> +uint16_tacwu;
> +uint8_t rsvd534[2];
> +uint32_tsgls;
> +uint8_t rsvd540[228];
> +uint8_t subnqn[256];
> +uint8_t rsvd1024[1024];
>  NvmePSD psd[32];
>  uint8_t vs[1024];
>  } NvmeIdCtrl;
> @@ -653,13 +677,21 @@ typedef struct NvmeIdNs {
>  uint8_t mc;
>  uint8_t dpc;
>  uint8_t dps;
> -
>  uint8_t nmic;
>  uint8_t rescap;
>  uint8_t fpi;
>  uint8_t dlfeat;
> -
> -uint8_t res34[94];
> +uint8_t rsvd33;
This is wrong. nawun comes right after dlfeat
> +uint16_tnawun;
> +uint16_tnawupf;
And here the error cancels out since here there should be 'nacwu' field.
> +uint16_tnabsn;
> +uint16_tnabo;
> +uint16_tnabspf;
> +uint8_t rsvd46[2];
> +uint8_t nvmcap[16];
> +uint8_t rsvd64[40];
> +uint8_t nguid[16];
> +uint64_teui64;
>  NvmeLBAFlbaf[16];
>  uint8_t res192[192];
Not related to the patch, but maybe rename this to rsvd192 for the sake of 
consistency?
>  uint8_t vs[3712];


I reviewed this patch by cross referencing the nvme structures as defined in 
the kernel,
and the spec.

I prefer to merge this patch with all other spec updates you do in following 
patches,
to bring nvme.h up to date to 1.3d,
so that it will be easier to review this and remove some noise from other 
patches.

Best regards,
Maxim Levitsky

Re: [PATCH v5 03/26] nvme: move device parameters to separate struct

2020-02-12 Thread Maxim Levitsky

_new0(NvmeSQueue *, n->num_queues);
> -n->cq = g_new0(NvmeCQueue *, n->num_queues);
> +n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
> +n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
>  
>  memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
>"nvme", n->reg_size);
>  pci_register_bar(pci_dev, 0,
>  PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
>  &n->iomem);
> -msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL);
> +msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
>  
>  id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
>  id->ssvid = cpu_to_le16(pci_get_word(pci_conf + 
> PCI_SUBSYSTEM_VENDOR_ID));
>  strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
>  strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
> -strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' ');
> +strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
>  id->rab = 6;
>  id->ieee[0] = 0x00;
>  id->ieee[1] = 0x02;
> @@ -1387,7 +1389,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  n->bar.vs = 0x00010200;
>  n->bar.intmc = n->bar.intms = 0;
>  
> -if (n->cmb_size_mb) {
> +if (n->params.cmb_size_mb) {
>  
>  NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
>  NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> @@ -1398,7 +1400,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
>  NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> -NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
> +NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
>  
>  n->cmbloc = n->bar.cmbloc;
>  n->cmbsz = n->bar.cmbsz;
> @@ -1437,7 +1439,7 @@ static void nvme_exit(PCIDevice *pci_dev)
>  g_free(n->cq);
>  g_free(n->sq);
>  
> -if (n->cmb_size_mb) {
> +if (n->params.cmb_size_mb) {
>  g_free(n->cmbuf);
>  }
>  msix_uninit_exclusive_bar(pci_dev);
> @@ -1445,9 +1447,7 @@ static void nvme_exit(PCIDevice *pci_dev)
>  
>  static Property nvme_props[] = {
>  DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
> -DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
> -DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
> -DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
> +DEFINE_NVME_PROPERTIES(NvmeCtrl, params),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 557194ee1954..9957c4a200e2 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -1,7 +1,19 @@
>  #ifndef HW_NVME_H
>  #define HW_NVME_H
> +
>  #include "block/nvme.h"
>  
> +#define DEFINE_NVME_PROPERTIES(_state, _props) \
> +DEFINE_PROP_STRING("serial", _state, _props.serial), \
> +DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
> +DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64)
> +
> +typedef struct NvmeParams {
> +char *serial;
> +uint32_t num_queues;
> +    uint32_t cmb_size_mb;
> +} NvmeParams;
> +
>  typedef struct NvmeAsyncEvent {
>  QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry;
>  NvmeAerResult result;
> @@ -63,6 +75,7 @@ typedef struct NvmeCtrl {
>  MemoryRegion ctrl_mem;
>  NvmeBar  bar;
>  BlockConfconf;
> +NvmeParams   params;
>  
>  uint32_tpage_size;
>  uint16_tpage_bits;
> @@ -71,10 +84,8 @@ typedef struct NvmeCtrl {
>  uint16_tsqe_size;
>  uint32_treg_size;
>  uint32_tnum_namespaces;
> -uint32_tnum_queues;
>  uint32_tmax_q_ents;
>  uint64_tns_size;
> -uint32_tcmb_size_mb;
>  uint32_tcmbsz;
>  uint32_tcmbloc;
>  uint8_t *cmbuf;
> @@ -82,7 +93,6 @@ typedef struct NvmeCtrl {
>  uint64_thost_timestamp; /* Timestamp sent by the 
> host */
>  uint64_ttimestamp_set_qemu_clock_ms;/* QEMU clock time */
>  
> -char*serial;
>  NvmeNamespace   *namespaces;
>  NvmeSQueue  **sq;
>  NvmeCQueue  **cq;

With line wrapping issues fixed (this is an issue in all the patches),

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v5 01/26] nvme: rename trace events to nvme_dev

2020-02-12 Thread Maxim Levitsky

log2size=%u, min=%u"
> -nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) 
> "nvme_start_ctrl failed because the completion queue entry size is too large: 
> log2size=%u, max=%u"
> -nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) 
> "nvme_start_ctrl failed because the submission queue entry size is too small: 
> log2size=%u, min=%u"
> -nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) 
> "nvme_start_ctrl failed because the submission queue entry size is too large: 
> log2size=%u, max=%u"
> -nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the 
> admin submission queue size is zero"
> -nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the 
> admin completion queue size is zero"
> -nvme_err_startfail(void) "setting controller enable bit failed"
> +nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
> +nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null 
> or not page aligned: 0x%"PRIx64""
> +nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""
> +nvme_dev_err_invalid_prp2_missing(void) "PRP2 is null and more data to be 
> transferred"
> +nvme_dev_err_invalid_prp(void) "invalid PRP"
> +nvme_dev_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u 
> not within 1-%u"
> +nvme_dev_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
> +nvme_dev_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
> +nvme_dev_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) 
> "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
> +nvme_dev_err_invalid_del_sq(uint16_t qid) "invalid submission queue 
> deletion, sid=%"PRIu16""
> +nvme_dev_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating 
> submission queue, invalid cqid=%"PRIu16""
> +nvme_dev_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating 
> submission queue, invalid sqid=%"PRIu16""
> +nvme_dev_err_invalid_create_sq_size(uint16_t qsize) "failed creating 
> submission queue, invalid qsize=%"PRIu16""
> +nvme_dev_err_invalid_create_sq_addr(uint64_t addr) "failed creating 
> submission queue, addr=0x%"PRIx64""
> +nvme_dev_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating 
> submission queue, qflags=%"PRIu16""
> +nvme_dev_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion 
> queue, cqid=%"PRIu16""
> +nvme_dev_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting 
> completion queue, it is not empty, cqid=%"PRIu16""
> +nvme_dev_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating 
> completion queue, cqid=%"PRIu16""
> +nvme_dev_err_invalid_create_cq_size(uint16_t size) "failed creating 
> completion queue, size=%"PRIu16""
> +nvme_dev_err_invalid_create_cq_addr(uint64_t addr) "failed creating 
> completion queue, addr=0x%"PRIx64""
> +nvme_dev_err_invalid_create_cq_vector(uint16_t vector) "failed creating 
> completion queue, vector=%"PRIu16""
> +nvme_dev_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating 
> completion queue, qflags=%"PRIu16""
> +nvme_dev_err_invalid_identify_cns(uint16_t cns) "identify, invalid 
> cns=0x%"PRIx16""
> +nvme_dev_err_invalid_getfeat(int dw10) "invalid get features, 
> dw10=0x%"PRIx32""
> +nvme_dev_err_invalid_setfeat(uint3

Re: [PATCH v5 02/26] nvme: remove superfluous breaks

2020-02-12 Thread Maxim Levitsky

On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> These break statements was left over when commit 3036a626e9ef ("nvme:
> add Get/Set Feature Timestamp support") was merged.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index dd548d9b6605..c9ad6aaa5f95 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -788,7 +788,6 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  break;
>  case NVME_TIMESTAMP:
>  return nvme_get_feature_timestamp(n, cmd);
> -break;
>  default:
>  trace_nvme_dev_err_invalid_getfeat(dw10);
>  return NVME_INVALID_FIELD | NVME_DNR;
> @@ -831,11 +830,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  req->cqe.result =
>  cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
>  break;
> -
>  case NVME_TIMESTAMP:
>  return nvme_set_feature_timestamp(n, cmd);
> -break;
> -
>  default:
>  trace_nvme_dev_err_invalid_setfeat(dw10);
>  return NVME_INVALID_FIELD | NVME_DNR;

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH 02/13] qcrypto-luks: implement encryption key management

2020-02-06 Thread Maxim Levitsky

On Thu, 2020-02-06 at 16:19 +0100, Markus Armbruster wrote:
> Kevin Wolf  writes:
> 
> > Am 06.02.2020 um 14:36 hat Daniel P. Berrangé geschrieben:
> > > On Thu, Feb 06, 2020 at 02:20:11PM +0100, Markus Armbruster wrote:
> > > > One more question regarding the array in
> > > > 
> > > > { 'struct': 'QCryptoBlockAmendOptionsLUKS',
> > > >   'data' : {
> > > > 'keys': ['LUKSKeyslotUpdate'],
> > > >  '*unlock-secret' : 'str' } }
> > > > 
> > > > Why an array?  Do we really need multiple keyslot updates in one amend
> > > > operation?
> > > 
> > > I think it it is unlikely we'd use this in libvirt. In the case of wanting
> > > to *change* a key, it is safer to do a sequence of "add key" and then
> > > "remove key". If you combine them into the same operation, and you get
> > > an error back, it is hard to know /where/ it failed ? was the new key
> > > added or not ?
> > 
> > I think the array came in because of the "describe the new state"
> > approach. The state has eight keyslots, so in order to fully describe
> > the new state, you would have to be able to pass multiple slots at once.
> 
> I see.
> 
> Of course, it can also describe multiple new states for the same slot.
> 
> Example:
> 
> [{'state': 'active', 'keyslot': 0, 'secret': 'sec0'},
>  {'state': 'active', 'keyslot': 0, 'secret': 'sec1'}]
> 
> where slot 0's old state is 'inactive'.
> 
> Which one is the new state?
> 
> If we execute the array elements one by one, this first makes slot 0
> active with secret 'sec0', then tries to make it active with secret
> 'sec1', which fails.  Simple enough, but it's not really "describe the
> new state", it's still "specify a series of state transitions".
> 
> If we merge the array elements into a description of the new state of
> all eight slots, where a slot's description can be "same as old state",
> then this makes slot 0 active with either secret 'sec0' or 'sec1',
> depending on how we resolve the conflict.  We could even make conflicts
> an error, and then this would fail without changing anything.
> 
> What do we want?
> 
> Is this worth the trouble?

Yes, that is my thoughts on this as well.

Best regards,
Maxim Levitsky

Re: [PATCH v2] qxl: introduce hardware revision 5

2020-02-06 Thread Maxim Levitsky

On Thu, 2020-02-06 at 08:43 +0100, Gerd Hoffmann wrote:
> The only difference to hardware revision 4 is that the device doesn't
> switch to VGA mode in case someone happens to touch a VGA register,
> which should make things more robust in configurations with multiple
> vga devices.
> 
> Swtiching back to VGA mode happens on reset, either full machine
> reset or qxl device reset (QXL_IO_RESET ioport command).
> 
> Signed-off-by: Gerd Hoffmann 
> ---
>  hw/display/qxl.h  | 2 +-
>  hw/core/machine.c | 2 ++
>  hw/display/qxl.c  | 7 ++-
>  3 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/display/qxl.h b/hw/display/qxl.h
> index 80eb0d267269..707631a1f573 100644
> --- a/hw/display/qxl.h
> +++ b/hw/display/qxl.h
> @@ -144,7 +144,7 @@ typedef struct PCIQXLDevice {
>  }   \
>  } while (0)
>  
> -#define QXL_DEFAULT_REVISION QXL_REVISION_STABLE_V12
> +#define QXL_DEFAULT_REVISION (QXL_REVISION_STABLE_V12 + 1)
Minor nitpick: Maybe add a #define instead of +1?

>  
>  /* qxl.c */
>  void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 3e288bfceb7f..13a3b2c9c425 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -34,6 +34,8 @@ GlobalProperty hw_compat_4_2[] = {
>  { "vhost-blk-device", "seg_max_adjust", "off"},
>  { "usb-host", "suppress-remote-wake", "off" },
>  { "usb-redir", "suppress-remote-wake", "off" },
> +{ "qxl", "revision", "4" },
> +{ "qxl-vga", "revision", "4" },
>  };
>  const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);
>  
> diff --git a/hw/display/qxl.c b/hw/display/qxl.c
> index c33b1915a52c..64884da70857 100644
> --- a/hw/display/qxl.c
> +++ b/hw/display/qxl.c
> @@ -1309,7 +1309,8 @@ static void qxl_vga_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  PCIQXLDevice *qxl = container_of(vga, PCIQXLDevice, vga);
>  
>  trace_qxl_io_write_vga(qxl->id, qxl_mode_to_string(qxl->mode), addr, 
> val);
> -if (qxl->mode != QXL_MODE_VGA) {
> +if (qxl->mode != QXL_MODE_VGA &&
> +qxl->revision <= QXL_REVISION_STABLE_V12) {
>  qxl_destroy_primary(qxl, QXL_SYNC);
>  qxl_soft_reset(qxl);
>  }
> @@ -2121,6 +2122,10 @@ static void qxl_realize_common(PCIQXLDevice *qxl, 
> Error **errp)
>  pci_device_rev = QXL_REVISION_STABLE_V12;
>  io_size = pow2ceil(QXL_IO_RANGE_SIZE);
>  break;
> +case 5: /* qxl-5 */
> +pci_device_rev = QXL_REVISION_STABLE_V12 + 1;
> +io_size = pow2ceil(QXL_IO_RANGE_SIZE);
> +break;
>  default:
>  error_setg(errp, "Invalid revision %d for qxl device (max %d)",
> qxl->revision, QXL_DEFAULT_REVISION);

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky

Re: [PATCH v4 00/11] RFC: [for 5.0]: HMP monitor handlers refactoring

2020-02-04 Thread Maxim Levitsky

On Mon, 2020-02-03 at 19:57 +, Dr. David Alan Gilbert wrote:
> * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > This patch series is bunch of cleanups to the hmp monitor code.
> > It mostly moves the blockdev related hmp handlers to its own file,
> > and does some minor refactoring.
> > 
> > No functional changes expected.
> 
> You've still got the title marked as RFC - are you actually ready for
> this log?

I forgot to update this to be honest, I don't consider this an RFC,
especially since I dropped for now the patches that might cause
issues. This is now just a nice refactoring.

Best regards,
Maxim Levitsky

> 
> Dave
> 
> > 
> > Changes from V1:
> >* move the handlers to block/monitor/block-hmp-cmds.c
> >* tiny cleanup for the commit messages
> > 
> > Changes from V2:
> >* Moved all the function prototypes to new header (blockdev-hmp-cmds.h)
> >* Set the license of blockdev-hmp-cmds.c to GPLv2+
> >* Moved hmp_snapshot_* functions to blockdev-hmp-cmds.c
> >* Moved hmp_drive_add_node to blockdev-hmp-cmds.c
> >  (this change needed some new exports, thus in separate new patch)
> >* Moved hmp_qemu_io and hmp_eject to blockdev-hmp-cmds.c
> >* Added 'error:' prefix to vreport, and updated the iotests
> >  This is invasive change, but really feels like the right one
> >* Added minor refactoring patch that drops an unused #include
> > 
> > Changes from V3:
> >* Dropped the error prefix patches for now due to fact that it seems
> >  that libvirt doesn't need that after all. Oh well...
> >  I'll send them in a separate series.
> > 
> >* Hopefully correctly merged the copyright info the new files
> >  Both files are GPLv2 now (due to code from hmp.h/hmp-cmds.c)
> > 
> >* Addressed review feedback
> >* Renamed the added header to block-hmp-cmds.h
> > 
> >* Got rid of checkpatch.pl warnings in the moved code
> >  (cosmetic code changes only)
> > 
> >* I kept the reviewed-by tags, since the changes I did are minor.
> >  I hope that this is right thing to do.
> > 
> > Best regards,
> > Maxim Levitsky
> > 
> > Maxim Levitsky (11):
> >   usb/dev-storage: remove unused include
> >   monitor/hmp: uninline add_init_drive
> >   monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c
> >   monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c
> >   monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to
> > block-hmp-cmds.c Moved code was added after 2012-01-13, thus under
> > GPLv2+
> >   monitor/hmp: move hmp_block_job* to block-hmp-cmds.c
> >   monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c
> > hmp_snapshot_blkdev is from GPLv2 version of the hmp-cmds.c thus
> > have to change the licence to GPLv2
> >   monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c
> >   monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c
> >   monitor/hmp: move hmp_info_block* to block-hmp-cmds.c
> >   monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c
> > 
> >  MAINTAINERS|1 +
> >  Makefile.objs  |2 +-
> >  block/Makefile.objs|1 +
> >  block/monitor/Makefile.objs|1 +
> >  block/monitor/block-hmp-cmds.c | 1002 
> >  blockdev.c |  137 +
> >  device-hotplug.c   |   91 ---
> >  hw/usb/dev-storage.c   |1 -
> >  include/block/block-hmp-cmds.h |   54 ++
> >  include/block/block_int.h  |5 +-
> >  include/monitor/hmp.h  |   24 -
> >  include/sysemu/blockdev.h  |4 -
> >  include/sysemu/sysemu.h|3 -
> >  monitor/hmp-cmds.c |  769 
> >  monitor/misc.c |1 +
> >  15 files changed, 1072 insertions(+), 1024 deletions(-)
> >  create mode 100644 block/monitor/Makefile.objs
> >  create mode 100644 block/monitor/block-hmp-cmds.c
> >  delete mode 100644 device-hotplug.c
> >  create mode 100644 include/block/block-hmp-cmds.h
> > 
> > -- 
> > 2.17.2
> > 
> 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v2 05/14] block/amend: refactor qcow2 amend options

2020-01-30 Thread Maxim Levitsky

Some qcow2 create options can't be used for amend.
Remove them from the qcow2 create options and add generic logic to detect
such options in qemu-img

Signed-off-by: Maxim Levitsky 
---
 block/qcow2.c  | 108 ++---
 qemu-img.c |  18 +++-
 tests/qemu-iotests/049.out | 102 ++--
 tests/qemu-iotests/061.out |  12 ++-
 tests/qemu-iotests/079.out |  18 ++--
 tests/qemu-iotests/082.out | 149 
 tests/qemu-iotests/085.out |  38 
 tests/qemu-iotests/087.out |   6 +-
 tests/qemu-iotests/115.out |   2 +-
 tests/qemu-iotests/121.out |   4 +-
 tests/qemu-iotests/125.out | 192 ++---
 tests/qemu-iotests/134.out |   2 +-
 tests/qemu-iotests/144.out |   4 +-
 tests/qemu-iotests/158.out |   4 +-
 tests/qemu-iotests/182.out |   2 +-
 tests/qemu-iotests/185.out |   8 +-
 tests/qemu-iotests/188.out |   2 +-
 tests/qemu-iotests/189.out |   4 +-
 tests/qemu-iotests/198.out |   4 +-
 tests/qemu-iotests/243.out |  16 ++--
 tests/qemu-iotests/250.out |   2 +-
 tests/qemu-iotests/255.out |   8 +-
 tests/qemu-iotests/263.out |   4 +-
 tests/qemu-iotests/280.out |   2 +-
 24 files changed, 283 insertions(+), 428 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index c6c2deee75..b1a03a56a5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2949,17 +2949,6 @@ static int qcow2_change_backing_file(BlockDriverState 
*bs,
 return qcow2_update_header(bs);
 }
 
-static int qcow2_crypt_method_from_format(const char *encryptfmt)
-{
-if (g_str_equal(encryptfmt, "luks")) {
-return QCOW_CRYPT_LUKS;
-} else if (g_str_equal(encryptfmt, "aes")) {
-return QCOW_CRYPT_AES;
-} else {
-return -EINVAL;
-}
-}
-
 static int qcow2_set_up_encryption(BlockDriverState *bs,
QCryptoBlockCreateOptions *cryptoopts,
Error **errp)
@@ -5160,9 +5149,6 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 bool lazy_refcounts = s->use_lazy_refcounts;
 bool data_file_raw = data_file_is_raw(bs);
 const char *compat = NULL;
-uint64_t cluster_size = s->cluster_size;
-bool encrypt;
-int encformat;
 int refcount_bits = s->refcount_bits;
 int ret;
 QemuOptDesc *desc = opts->list->desc;
@@ -5187,44 +5173,12 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 error_setg(errp, "Unknown compatibility level %s", compat);
 return -EINVAL;
 }
-} else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
-error_setg(errp, "Cannot change preallocation mode");
-return -ENOTSUP;
 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
-} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
-encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
-!!s->crypto);
-
-if (encrypt != !!s->crypto) {
-error_setg(errp,
-   "Changing the encryption flag is not supported");
-return -ENOTSUP;
-}
-} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
-encformat = qcow2_crypt_method_from_format(
-qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
-
-if (encformat != s->crypt_method_header) {
-error_setg(errp,
-   "Changing the encryption format is not supported");
-return -ENOTSUP;
-}
-} else if (g_str_has_prefix(desc->name, "encrypt.")) {
-error_setg(errp,
-   "Changing the encryption parameters is not supported");
-return -ENOTSUP;
-} else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
-cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
- cluster_size);
-if (cluster_size != s->cluster_size) {
-error_setg(errp, "Changing the cluster size is not supported");
-return -ENOTSUP;
-}
 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
@@ -5477,37 +5431,6 @@ void qcow2_signal_corruption(Block

[PATCH v2 13/14] block/qcow2: implement blockdev-amend

2020-01-30 Thread Maxim Levitsky

Currently the implementation only supports amending the encryption
options, unlike the qemu-img version

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---
 block/qcow2.c| 39 +++
 qapi/block-core.json | 16 +++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 59c8a772cc..a15d3f156b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5402,6 +5402,44 @@ static int qcow2_amend_options(BlockDriverState *bs, 
QemuOpts *opts,
 return 0;
 }
 
+static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
+   BlockdevAmendOptions *opts,
+   bool force,
+   Error **errp)
+{
+BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2;
+BDRVQcow2State *s = bs->opaque;
+int ret = 0;
+
+if (qopts->has_encrypt) {
+if (!s->crypto) {
+error_setg(errp, "image is not encrypted, can't amend");
+return -EOPNOTSUPP;
+}
+
+if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
+error_setg(errp,
+   "Amend can't be used to change the qcow2 encryption 
format");
+return -EOPNOTSUPP;
+}
+
+if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
+error_setg(errp,
+   "Only LUKS encryption options can be amended for qcow2 
with blockdev-amend");
+return -EOPNOTSUPP;
+}
+
+ret = qcrypto_block_amend_options(s->crypto,
+  qcow2_crypto_hdr_read_func,
+  qcow2_crypto_hdr_write_func,
+  bs,
+  qopts->encrypt,
+  force,
+  errp);
+}
+return ret;
+}
+
 /*
  * If offset or size are negative, respectively, they will not be included in
  * the BLOCK_IMAGE_CORRUPTED event emitted.
@@ -5614,6 +5652,7 @@ BlockDriver bdrv_qcow2 = {
 .mutable_opts= mutable_opts,
 .bdrv_co_check   = qcow2_co_check,
 .bdrv_amend_options  = qcow2_amend_options,
+.bdrv_co_amend   = qcow2_co_amend,
 
 .bdrv_detach_aio_context  = qcow2_detach_aio_context,
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 790aa40991..2cf1f443e5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4755,6 +4755,19 @@
   'data': { }
 }
 
+##
+# @BlockdevAmendOptionsQcow2:
+#
+# Driver specific image amend options for qcow2.
+# For now, only encryption options can be amended
+#
+# @encrypt  Encryption options to be amended
+#
+# Since: 5.0
+##
+{ 'struct': 'BlockdevAmendOptionsQcow2',
+  'data': { '*encrypt': 'QCryptoBlockAmendOptions' } }
+
 ##
 # @BlockdevAmendOptions:
 #
@@ -4769,7 +4782,8 @@
   'driver': 'BlockdevDriver' },
   'discriminator': 'driver',
   'data': {
-  'luks':   'BlockdevAmendOptionsLUKS' } }
+  'luks':   'BlockdevAmendOptionsLUKS',
+  'qcow2':  'BlockdevAmendOptionsQcow2' } }
 
 ##
 # @x-blockdev-amend:
-- 
2.17.2

< 1 2 3 4 5 6 7 8 9 10 >

501 - 600 of 1129 matches

Mail list logo