Re: [PATCH v2 1/1] block/nvme: introduce PMR support from NVMe 1.4 spec

2020-03-03 Thread Andrzej Jakowski
On 2/21/20 2:23 PM, Andrzej Jakowski wrote:
> This patch introduces support for PMR that has been defined as part of NVMe 
> 1.4
> spec. User can now specify a pmr_file which will be mmap'ed into qemu address
> space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
> to the PMR region that will stay persistent accross system reboot.
> 
> Signed-off-by: Andrzej Jakowski 
> ---
>  hw/block/nvme.c   | 165 +++-
>  hw/block/nvme.h   |   5 ++
>  hw/block/trace-events |   5 ++
>  include/block/nvme.h  | 172 ++
>  4 files changed, 346 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index d28335cbf3..ff7e74d765 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -19,10 +19,14 @@
>   *  -drive file=,if=none,id=
>   *  -device nvme,drive=,serial=,id=, \
>   *  cmb_size_mb=, \
> + *  [pmr_file=,] \
>   *  num_queues=
>   *
>   * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
>   * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
> + *
> + * Either cmb or pmr - due to limitation in avaialbe BAR indexes.
> + * pmr_file file needs to be preallocated and power of two in size.
>   */
>  
>  #include "qemu/osdep.h"
> @@ -1141,6 +1145,28 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
> "invalid write to read only CMBSZ, ignored");
>  return;
> +#ifndef _WIN32
> +case 0xE00: /* PMRCAP */
> +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
> +   "invalid write to PMRCAP register, ignored");
> +return;
> +case 0xE04: /* TODO PMRCTL */
> +break;
> +case 0xE08: /* PMRSTS */
> +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
> +   "invalid write to PMRSTS register, ignored");
> +return;
> +case 0xE0C: /* PMREBS */
> +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
> +   "invalid write to PMREBS register, ignored");
> +return;
> +case 0xE10: /* PMRSWTP */
> +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
> +   "invalid write to PMRSWTP register, ignored");
> +return;
> +case 0xE14: /* TODO PMRMSC */
> + break;
> +#endif /* !_WIN32 */
>  default:
>  NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
> "invalid MMIO write,"
> @@ -1169,6 +1195,22 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr 
> addr, unsigned size)
>  }
>  
>  if (addr < sizeof(n->bar)) {
> +#ifndef _WIN32
> +/*
> + * When PMRWBM bit 1 is set then read from
> + * from PMRSTS should ensure prior writes
> + * made it to persistent media
> + */
> +if (addr == 0xE08 &&
> +(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02) >> 1) {
> +int ret;
> +ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
> +if (!ret) {
> +NVME_GUEST_ERR(nvme_ub_mmiord_pmrread_barrier,
> +   "error while persisting data");
> +}
> +}
> +#endif /* !_WIN32 */
>  memcpy(, ptr + addr, size);
>  } else {
>  NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
> @@ -1303,6 +1345,31 @@ static const MemoryRegionOps nvme_cmb_ops = {
>  },
>  };
>  
> +#ifndef _WIN32
> +static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data,
> +unsigned size)
> +{
> +NvmeCtrl *n = (NvmeCtrl *)opaque;
> +stn_le_p(>pmrbuf[addr], size, data);
> +}
> +
> +static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +NvmeCtrl *n = (NvmeCtrl *)opaque;
> +return ldn_le_p(>pmrbuf[addr], size);
> +}
> +
> +static const MemoryRegionOps nvme_pmr_ops = {
> +.read = nvme_pmr_read,
> +.write = nvme_pmr_write,
> +.endianness = DEVICE_LITTLE_ENDIAN,
> +.impl = {
> +.min_access_size = 1,
> +.max_access_size = 8,
> +},
> +};
> +#endif /* !_WIN32 */
> +
>  static void nvme_realize(PCIDevice *pci_dev, Error **errp)
>  {
>  NvmeCtrl *n = NVME(pci_dev);
> @@ -1332,6 +1399,39 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>  error_setg(errp, "serial property not set");
>  return;
>  }
> +
> +#ifndef _WIN32
> +if (!n->cmb_size_mb && n->pmr_file) {
> +int fd;
> +
> +n->f_pmr = fopen(n->pmr_file, "r+b");
> +if (!n->f_pmr) {
> +error_setg(errp, "pmr backend file open error");
> +return;
> +}
> +
> +fseek(n->f_pmr, 0L, SEEK_END);
> +n->f_pmr_size = ftell(n->f_pmr);
> +fseek(n->f_pmr, 0L, SEEK_SET);
> +
> +/* pmr file size needs to be power of 2 in size */
> +if (!n->f_pmr_size || !is_power_of_2(n->f_pmr_size)) {
> + 

Re: [PATCH v2 1/1] block/nvme: introduce PMR support from NVMe 1.4 spec

2020-02-26 Thread Andrzej Jakowski
On 2/21/20 2:23 PM, Andrzej Jakowski wrote:
> This patch introduces support for PMR that has been defined as part of NVMe 
> 1.4
> spec. User can now specify a pmr_file which will be mmap'ed into qemu address
> space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
> to the PMR region that will stay persistent accross system reboot.
> 
> Signed-off-by: Andrzej Jakowski 

Hi,

v2 addresses feedback received in v1 and so far I haven't seen any other 
comments.
Is there anything else needed for inclusion of this patch in tree?

Thank you,
Andrzej



[PATCH v2 1/1] block/nvme: introduce PMR support from NVMe 1.4 spec

2020-02-21 Thread Andrzej Jakowski
This patch introduces support for PMR that has been defined as part of NVMe 1.4
spec. User can now specify a pmr_file which will be mmap'ed into qemu address
space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
to the PMR region that will stay persistent accross system reboot.

Signed-off-by: Andrzej Jakowski 
---
 hw/block/nvme.c   | 165 +++-
 hw/block/nvme.h   |   5 ++
 hw/block/trace-events |   5 ++
 include/block/nvme.h  | 172 ++
 4 files changed, 346 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d28335cbf3..ff7e74d765 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -19,10 +19,14 @@
  *  -drive file=,if=none,id=
  *  -device nvme,drive=,serial=,id=, \
  *  cmb_size_mb=, \
+ *  [pmr_file=,] \
  *  num_queues=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
+ *
+ * Either cmb or pmr - due to limitation in avaialbe BAR indexes.
+ * pmr_file file needs to be preallocated and power of two in size.
  */
 
 #include "qemu/osdep.h"
@@ -1141,6 +1145,28 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
"invalid write to read only CMBSZ, ignored");
 return;
+#ifndef _WIN32
+case 0xE00: /* PMRCAP */
+NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
+   "invalid write to PMRCAP register, ignored");
+return;
+case 0xE04: /* TODO PMRCTL */
+break;
+case 0xE08: /* PMRSTS */
+NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
+   "invalid write to PMRSTS register, ignored");
+return;
+case 0xE0C: /* PMREBS */
+NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
+   "invalid write to PMREBS register, ignored");
+return;
+case 0xE10: /* PMRSWTP */
+NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
+   "invalid write to PMRSWTP register, ignored");
+return;
+case 0xE14: /* TODO PMRMSC */
+ break;
+#endif /* !_WIN32 */
 default:
 NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
"invalid MMIO write,"
@@ -1169,6 +1195,22 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr 
addr, unsigned size)
 }
 
 if (addr < sizeof(n->bar)) {
+#ifndef _WIN32
+/*
+ * When PMRWBM bit 1 is set then read from
+ * from PMRSTS should ensure prior writes
+ * made it to persistent media
+ */
+if (addr == 0xE08 &&
+(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02) >> 1) {
+int ret;
+ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
+if (!ret) {
+NVME_GUEST_ERR(nvme_ub_mmiord_pmrread_barrier,
+   "error while persisting data");
+}
+}
+#endif /* !_WIN32 */
 memcpy(, ptr + addr, size);
 } else {
 NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
@@ -1303,6 +1345,31 @@ static const MemoryRegionOps nvme_cmb_ops = {
 },
 };
 
+#ifndef _WIN32
+static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data,
+unsigned size)
+{
+NvmeCtrl *n = (NvmeCtrl *)opaque;
+stn_le_p(>pmrbuf[addr], size, data);
+}
+
+static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size)
+{
+NvmeCtrl *n = (NvmeCtrl *)opaque;
+return ldn_le_p(>pmrbuf[addr], size);
+}
+
+static const MemoryRegionOps nvme_pmr_ops = {
+.read = nvme_pmr_read,
+.write = nvme_pmr_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+.impl = {
+.min_access_size = 1,
+.max_access_size = 8,
+},
+};
+#endif /* !_WIN32 */
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
 NvmeCtrl *n = NVME(pci_dev);
@@ -1332,6 +1399,39 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 error_setg(errp, "serial property not set");
 return;
 }
+
+#ifndef _WIN32
+if (!n->cmb_size_mb && n->pmr_file) {
+int fd;
+
+n->f_pmr = fopen(n->pmr_file, "r+b");
+if (!n->f_pmr) {
+error_setg(errp, "pmr backend file open error");
+return;
+}
+
+fseek(n->f_pmr, 0L, SEEK_END);
+n->f_pmr_size = ftell(n->f_pmr);
+fseek(n->f_pmr, 0L, SEEK_SET);
+
+/* pmr file size needs to be power of 2 in size */
+if (!n->f_pmr_size || !is_power_of_2(n->f_pmr_size)) {
+error_setg(errp, "pmr backend file size needs to be greater than 0"
+ "and power of 2 in size");
+return;
+}
+
+fd = fileno(n->f_pmr);
+n->pmrbuf = mmap(NULL, n->f_pmr_size,
+ (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0);
+
+