date:20220303

Re: [PATCH 0/6] 9pfs: convert Doxygen -> kerneldoc format

2022-03-03 Thread Greg Kurz

On Thu, 3 Mar 2022 14:40:56 +0100
Christian Schoenebeck  wrote:

> This patch set converts occurrences of API doc comments from Doxygen format
> into kerneldoc format. No behaviour change whatsoever.
> 
> Christian Schoenebeck (6):
>   9pfs/9p.h: convert Doxygen -> kerneldoc format
>   9pfs/codir.c: convert Doxygen -> kerneldoc format
>   9pfs/9p.c: convert Doxygen -> kerneldoc format
>   9pfs/9p-util.h: convert Doxygen -> kerneldoc format
>   9pfs/coth.h: drop Doxygen format on v9fs_co_run_in_worker()
>   fsdev/p9array.h: convert Doxygen -> kerneldoc format
> 
>  fsdev/p9array.h   | 38 -
>  hw/9pfs/9p-util.h | 10 
>  hw/9pfs/9p.c  | 62 ++-
>  hw/9pfs/9p.h  | 12 -
>  hw/9pfs/codir.c   | 30 +++
>  hw/9pfs/coth.h|  4 +--
>  6 files changed, 84 insertions(+), 72 deletions(-)
> 

LGTM.

Reviewed-by: Greg Kurz

Re: [PATCH v4 14/14] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-03-03 Thread Markus Armbruster

Eugenio Pérez  writes:

> Finally offering the possibility to enable SVQ from the command line.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  qapi/net.json|  8 +++-
>  net/vhost-vdpa.c | 48 
>  2 files changed, 47 insertions(+), 9 deletions(-)
>
> diff --git a/qapi/net.json b/qapi/net.json
> index 7fab2e7cd8..06a74d4224 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -445,12 +445,18 @@
>  # @queues: number of queues to be created for multiqueue vhost-vdpa
>  #  (default: 1)
>  #
> +# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.0)
> +#
> +# Features:
> +# @unstable: Member @x-svq could change in future revisions.

Elsewhere we document "Member @foo is experimental."  Does your
different phrasing indicate a difference in intent?

> +#
>  # Since: 5.1
>  ##
>  { 'struct': 'NetdevVhostVDPAOptions',
>'data': {
>  '*vhostdev': 'str',
> -'*queues':   'int' } }
> +'*queues':   'int',
> +'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
>  
>  ##
>  # @NetClientDriver:

Do you hope to make @x-svq stable eventually?  If yes: you'll want to
rename it to @svq then, which could be a bother.  Can be avoided by
naming it @svq now.  Up to you.

[...]

Re: [PATCH v3 14/14] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-03-03 Thread Markus Armbruster

Eugenio Perez Martin  writes:

> Yes, that's right. I expressed my point poorly actually, I'll go the reverse.
>
> qapi-gen.py forces me to write a comment in the doc:
> qapi/block-core.json:2971: feature 'unstable' lacks documentation
>
> When I add the documentation line, it's enough to add @unstable. But
> there is no way to tell if this tag is because the whole struct is
> unstable or if it's because individual members are unstable unless the
> reader either checks the tag or the struct code.
>
> I was mostly worried about doc generators, I would not like to make
> NetdevVhostVDPAOptions unstable at this point. But I see that there is
> no problem with that.
>
> Thanks!

Yes, the doc generator insists on features being documented, and it
doesn't provide for documenting them next to members, only top-level.
The common solution is to phrase the comment like we do in
BlockdevOptionsFile:

# @unstable: Member x-check-cache-dropped is meant for debugging.

If there were multiple members so flagged, we'd enumerate them all.

The generator doesn't check you do this right.  The existing check
guards against *forgetting* to do it, not against doing it wrong.

More questions?

[PULL 4/6] hw/nvme: add support for the lbafee hbs feature

2022-03-03 Thread Klaus Jensen

From: Naveen Nagar 

Add support for up to 64 LBA formats through the LBAFEE field of the
Host Behavior Support feature.

Reviewed-by: Keith Busch 
Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 34 +++---
 hw/nvme/ns.c | 15 +--
 hw/nvme/nvme.h   |  1 +
 include/block/nvme.h |  7 +--
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d8701ebf2fa8..52ab3450b975 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5165,6 +5165,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest 
*req)
 uint32_t nsid = le32_to_cpu(cmd->nsid);
 uint8_t fid = NVME_GETSETFEAT_FID(dw10);
 uint8_t save = NVME_SETFEAT_SAVE(dw10);
+uint16_t status;
 int i;
 
 trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@@ -5287,8 +5288,26 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
NvmeRequest *req)
 case NVME_TIMESTAMP:
 return nvme_set_feature_timestamp(n, req);
 case NVME_HOST_BEHAVIOR_SUPPORT:
-return nvme_h2c(n, (uint8_t *)>features.hbs,
-sizeof(n->features.hbs), req);
+status = nvme_h2c(n, (uint8_t *)>features.hbs,
+  sizeof(n->features.hbs), req);
+if (status) {
+return status;
+}
+
+for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ns = nvme_ns(n, i);
+
+if (!ns) {
+continue;
+}
+
+ns->id_ns.nlbaf = ns->nlbaf - 1;
+if (!n->features.hbs.lbafee) {
+ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
+}
+}
+
+return status;
 case NVME_COMMAND_SET_PROFILE:
 if (dw11 & 0x1ff) {
 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
@@ -5479,10 +5498,13 @@ static const AIOCBInfo nvme_format_aiocb_info = {
 static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
 uint8_t pi, uint8_t pil)
 {
+uint8_t lbafl = lbaf & 0xf;
+uint8_t lbafu = lbaf >> 4;
+
 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
 
 ns->id_ns.dps = (pil << 3) | pi;
-ns->id_ns.flbas = lbaf | (mset << 4);
+ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
 
 nvme_ns_init_format(ns);
 }
@@ -5596,6 +5618,7 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
 uint8_t mset = (dw10 >> 4) & 0x1;
 uint8_t pi = (dw10 >> 5) & 0x7;
 uint8_t pil = (dw10 >> 8) & 0x1;
+uint8_t lbafu = (dw10 >> 12) & 0x3;
 uint16_t status;
 
 iocb = qemu_aio_get(_format_aiocb_info, NULL, nvme_misc_cb, req);
@@ -5612,6 +5635,10 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
 iocb->offset = 0;
 
+if (n->features.hbs.lbafee) {
+iocb->lbaf |= lbafu << 4;
+}
+
 if (!iocb->broadcast) {
 if (!nvme_nsid_valid(n, nsid)) {
 status = NVME_INVALID_NSID | NVME_DNR;
@@ -6587,6 +6614,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->cntlid = cpu_to_le16(n->cntlid);
 
 id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
 
 id->rab = 6;
 
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index ee673f1a5bef..8dfb55130beb 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -112,10 +112,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 [7] = { .ds = 12, .ms = 64 },
 };
 
+ns->nlbaf = 8;
+
 memcpy(_ns->lbaf, , sizeof(lbaf));
-id_ns->nlbaf = 7;
 
-for (i = 0; i <= id_ns->nlbaf; i++) {
+for (i = 0; i < ns->nlbaf; i++) {
 NvmeLBAF *lbaf = _ns->lbaf[i];
 if (lbaf->ds == ds) {
 if (lbaf->ms == ms) {
@@ -126,12 +127,14 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 }
 
 /* add non-standard lba format */
-id_ns->nlbaf++;
-id_ns->lbaf[id_ns->nlbaf].ds = ds;
-id_ns->lbaf[id_ns->nlbaf].ms = ms;
-id_ns->flbas |= id_ns->nlbaf;
+id_ns->lbaf[ns->nlbaf].ds = ds;
+id_ns->lbaf[ns->nlbaf].ms = ms;
+ns->nlbaf++;
+
+id_ns->flbas |= i;
 
 lbaf_found:
+id_ns->nlbaf = ns->nlbaf - 1;
 nvme_ns_init_format(ns);
 
 return 0;
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 103407038e74..e715c3255a29 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -128,6 +128,7 @@ typedef struct NvmeNamespace {
 int64_t  moff;
 NvmeIdNs id_ns;
 NvmeLBAF lbaf;
+unsigned int nlbaf;
 size_t   lbasz;
 const uint32_t *iocs;
 uint8_t  csi;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index e527c728f975..37afc9be9b18 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -,6 +,10 @@ enum NvmeIdCtrlOaes {
 NVME_OAES_NS_ATTR   = 1 << 8,
 };
 
+enum NvmeIdCtrlCtratt {
+NVME_CTRATT_ELBAS   = 1 << 15,
+};
+
 enum

[PULL 6/6] hw/nvme: 64-bit pi support

2022-03-03 Thread Klaus Jensen

From: Naveen Nagar 

This adds support for one possible new protection information format
introduced in TP4068 (and integrated in NVMe 2.0): the 64-bit CRC guard
and 48-bit reference tag. This version does not support storage tags.

Like the CRC16 support already present, this uses a software
implementation of CRC64 (so it is naturally pretty slow). But its good
enough for verification purposes.

This may go nicely hand-in-hand with the support that Keith submitted
for the Linux kernel[1].

  [1]: 
https://lore.kernel.org/linux-nvme/20220126165214.ga1782...@dhcp-10-100-145-180.wdc.com/T/

Reviewed-by: Keith Busch 
Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 163 +++
 hw/nvme/dif.c| 363 +--
 hw/nvme/dif.h| 143 -
 hw/nvme/ns.c |  35 -
 hw/nvme/nvme.h   |   3 +
 hw/nvme/trace-events |  12 +-
 include/block/nvme.h |  67 ++--
 7 files changed, 648 insertions(+), 138 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f1683960b87e..03760ddeae8c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2050,9 +2050,12 @@ static void nvme_verify_cb(void *opaque, int ret)
 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 uint16_t apptag = le16_to_cpu(rw->apptag);
 uint16_t appmask = le16_to_cpu(rw->appmask);
-uint32_t reftag = le32_to_cpu(rw->reftag);
+uint64_t reftag = le32_to_cpu(rw->reftag);
+uint64_t cdw3 = le32_to_cpu(rw->cdw3);
 uint16_t status;
 
+reftag |= cdw3 << 32;
+
 trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
 
 if (ret) {
@@ -2141,7 +2144,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
 uint16_t apptag = le16_to_cpu(rw->apptag);
 uint16_t appmask = le16_to_cpu(rw->appmask);
-uint32_t reftag = le32_to_cpu(rw->reftag);
+uint64_t reftag = le32_to_cpu(rw->reftag);
+uint64_t cdw3 = le32_to_cpu(rw->cdw3);
 struct nvme_compare_ctx *ctx = req->opaque;
 g_autofree uint8_t *buf = NULL;
 BlockBackend *blk = ns->blkconf.blk;
@@ -2149,6 +2153,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
 BlockAcctStats *stats = blk_get_stats(blk);
 uint16_t status = NVME_SUCCESS;
 
+reftag |= cdw3 << 32;
+
 trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
 
 if (ret) {
@@ -2527,7 +2533,8 @@ typedef struct NvmeCopyAIOCB {
 QEMUBH *bh;
 int ret;
 
-NvmeCopySourceRange *ranges;
+void *ranges;
+unsigned int format;
 int nr;
 int idx;
 
@@ -2538,7 +2545,7 @@ typedef struct NvmeCopyAIOCB {
 BlockAcctCookie write;
 } acct;
 
-uint32_t reftag;
+uint64_t reftag;
 uint64_t slba;
 
 NvmeZone *zone;
@@ -2592,13 +2599,101 @@ static void nvme_copy_bh(void *opaque)
 
 static void nvme_copy_cb(void *opaque, int ret);
 
+static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+NvmeCopySourceRangeFormat0 *_ranges = ranges;
+
+if (slba) {
+*slba = le64_to_cpu(_ranges[idx].slba);
+}
+
+if (nlb) {
+*nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+}
+
+if (apptag) {
+*apptag = le16_to_cpu(_ranges[idx].apptag);
+}
+
+if (appmask) {
+*appmask = le16_to_cpu(_ranges[idx].appmask);
+}
+
+if (reftag) {
+*reftag = le32_to_cpu(_ranges[idx].reftag);
+}
+}
+
+static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+NvmeCopySourceRangeFormat1 *_ranges = ranges;
+
+if (slba) {
+*slba = le64_to_cpu(_ranges[idx].slba);
+}
+
+if (nlb) {
+*nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+}
+
+if (apptag) {
+*apptag = le16_to_cpu(_ranges[idx].apptag);
+}
+
+if (appmask) {
+*appmask = le16_to_cpu(_ranges[idx].appmask);
+}
+
+if (reftag) {
+*reftag = 0;
+
+*reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
+*reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
+*reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
+*reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
+*reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
+*reftag |= (uint64_t)_ranges[idx].sr[9];
+}
+}
+
+static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
+

[PULL 2/6] hw/nvme: add host behavior support feature

2022-03-03 Thread Klaus Jensen

From: Naveen Nagar 

Add support for getting and setting the Host Behavior Support feature.

Reviewed-by: Keith Busch 
Signed-off-by: Naveen Nagar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 8 
 hw/nvme/nvme.h   | 4 +++-
 include/block/nvme.h | 9 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d08af3bdc1a2..71c60482c75f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -196,6 +196,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 [NVME_WRITE_ATOMICITY]  = true,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
 [NVME_TIMESTAMP]= true,
+[NVME_HOST_BEHAVIOR_SUPPORT]= true,
 [NVME_COMMAND_SET_PROFILE]  = true,
 };
 
@@ -206,6 +207,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
 [NVME_TIMESTAMP]= NVME_FEAT_CAP_CHANGE,
+[NVME_HOST_BEHAVIOR_SUPPORT]= NVME_FEAT_CAP_CHANGE,
 [NVME_COMMAND_SET_PROFILE]  = NVME_FEAT_CAP_CHANGE,
 };
 
@@ -5091,6 +5093,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest 
*req)
 goto out;
 case NVME_TIMESTAMP:
 return nvme_get_feature_timestamp(n, req);
+case NVME_HOST_BEHAVIOR_SUPPORT:
+return nvme_c2h(n, (uint8_t *)>features.hbs,
+sizeof(n->features.hbs), req);
 default:
 break;
 }
@@ -5281,6 +5286,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest 
*req)
 break;
 case NVME_TIMESTAMP:
 return nvme_set_feature_timestamp(n, req);
+case NVME_HOST_BEHAVIOR_SUPPORT:
+return nvme_h2c(n, (uint8_t *)>features.hbs,
+sizeof(n->features.hbs), req);
 case NVME_COMMAND_SET_PROFILE:
 if (dw11 & 0x1ff) {
 trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 801176a2bd5e..103407038e74 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -468,7 +468,9 @@ typedef struct NvmeCtrl {
 uint16_t temp_thresh_hi;
 uint16_t temp_thresh_low;
 };
-uint32_tasync_config;
+
+uint32_tasync_config;
+NvmeHostBehaviorSupport hbs;
 } features;
 } NvmeCtrl;
 
diff --git a/include/block/nvme.h b/include/block/nvme.h
index cd068ac89142..e527c728f975 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1216,6 +1216,7 @@ enum NvmeFeatureIds {
 NVME_WRITE_ATOMICITY= 0xa,
 NVME_ASYNCHRONOUS_EVENT_CONF= 0xb,
 NVME_TIMESTAMP  = 0xe,
+NVME_HOST_BEHAVIOR_SUPPORT  = 0x16,
 NVME_COMMAND_SET_PROFILE= 0x19,
 NVME_SOFTWARE_PROGRESS_MARKER   = 0x80,
 NVME_FID_MAX= 0x100,
@@ -1257,6 +1258,13 @@ typedef struct QEMU_PACKED NvmeRangeType {
 uint8_t rsvd48[16];
 } NvmeRangeType;
 
+typedef struct NvmeHostBehaviorSupport {
+uint8_t acre;
+uint8_t etdas;
+uint8_t lbafee;
+uint8_t rsvd3[509];
+} NvmeHostBehaviorSupport;
+
 typedef struct QEMU_PACKED NvmeLBAF {
 uint16_tms;
 uint8_t ds;
@@ -1520,6 +1528,7 @@ static inline void _nvme_check_size(void)
 QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
+QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512);
 QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
 QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
-- 
2.35.1

[PULL 5/6] hw/nvme: add pi tuple size helper

2022-03-03 Thread Klaus Jensen

From: Klaus Jensen 

A subsequent patch will introduce a new tuple size; so add a helper and
use that instead of sizeof() and magic numbers.

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 14 --
 hw/nvme/dif.c  | 16 
 hw/nvme/dif.h  |  5 +
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 52ab3450b975..f1683960b87e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1068,7 +1068,8 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, 
NvmeRequest *req)
 size_t len = nvme_l2b(ns, nlb);
 uint16_t status;
 
-if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+if (nvme_ns_ext(ns) &&
+!(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
 NvmeSg sg;
 
 len += nvme_m2b(ns, nlb);
@@ -1247,7 +1248,8 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, 
uint32_t len,
 bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
 bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
 
-if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+if (nvme_ns_ext(ns) &&
+!(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
 return nvme_tx_interleaved(n, >sg, ptr, len, ns->lbasz,
ns->lbaf.ms, 0, dir);
 }
@@ -2184,7 +2186,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
  * tuple.
  */
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += 
ns->lbaf.ms) {
@@ -3167,7 +3169,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 bool pract = prinfo & NVME_PRINFO_PRACT;
 
-if (pract && ns->lbaf.ms == 8) {
+if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 mapped_size = data_size;
 }
 }
@@ -3244,7 +3246,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
 bool pract = prinfo & NVME_PRINFO_PRACT;
 
-if (pract && ns->lbaf.ms == 8) {
+if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 mapped_size -= nvme_m2b(ns, nlb);
 }
 }
@@ -5553,7 +,7 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, 
uint8_t lbaf, uint8_t pi)
 return NVME_INVALID_FORMAT | NVME_DNR;
 }
 
-if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) {
+if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
 return NVME_INVALID_FORMAT | NVME_DNR;
 }
 
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index cd0cea2b5ebd..891385f33f20 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -48,7 +48,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t 
*buf, size_t len,
 int16_t pil = 0;
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
@@ -145,7 +145,7 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, 
size_t len,
 }
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
@@ -184,7 +184,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t 
*mbuf, size_t mlen,
 
 
 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 }
 
 do {
@@ -210,7 +210,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t 
*mbuf, size_t mlen,
 end = mbufp + mlen;
 
 for (; mbufp < end; mbufp += ns->lbaf.ms) {
-memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
+memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns));
 }
 }
 
@@ -284,7 +284,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
 goto out;
 }
 
-if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
+if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
 goto out;
 }
 
@@ -388,7 +388,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
 
 if (pract) {
 uint8_t *mbuf, *end;
-int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
 
 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
 if (status) {
@@ -428,7 +428,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n,

[PULL 1/6] hw/nvme: move dif/pi prototypes into dif.h

2022-03-03 Thread Klaus Jensen

From: Klaus Jensen 

Move dif/pi data structures and inlines to dif.h.

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c |  1 +
 hw/nvme/dif.c  |  1 +
 hw/nvme/dif.h  | 53 ++
 hw/nvme/nvme.h | 50 ---
 4 files changed, 55 insertions(+), 50 deletions(-)
 create mode 100644 hw/nvme/dif.h

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 98aac98bef5f..d08af3bdc1a2 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -163,6 +163,7 @@
 #include "migration/vmstate.h"
 
 #include "nvme.h"
+#include "dif.h"
 #include "trace.h"
 
 #define NVME_MAX_IOQPAIRS 0x
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 5dbd18b2a4a5..cd0cea2b5ebd 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -13,6 +13,7 @@
 #include "sysemu/block-backend.h"
 
 #include "nvme.h"
+#include "dif.h"
 #include "trace.h"
 
 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h
new file mode 100644
index ..e36fea30e71e
--- /dev/null
+++ b/hw/nvme/dif.h
@@ -0,0 +1,53 @@
+#ifndef HW_NVME_DIF_H
+#define HW_NVME_DIF_H
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t t10_dif_crc_table[256] = {
+0x, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
+   uint32_t reftag);
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+   uint64_t slba);
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t apptag,
+ uint32_t *reftag);
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+uint8_t *mbuf, size_t mlen, uint8_t prinfo,
+uint64_t slba, uint16_t apptag,
+uint16_t appmask, uint32_t *reftag);
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
+
+#endif /* HW_NVME_DIF_H */
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 90c0bb7ce236..801176a2bd5e 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -513,54 +513,4 @@ void nvme_rw_complete_cb(void *opaque, int ret);
 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
NvmeCmd *cmd);
 
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static const uint16_t t10_dif_crc_table[256] = {
-0x, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-

[PULL 3/6] hw/nvme: move format parameter parsing

2022-03-03 Thread Klaus Jensen

From: Klaus Jensen 

There is no need to extract the format command parameters for each
namespace. Move it to the entry point.

Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 71c60482c75f..d8701ebf2fa8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5452,6 +5452,11 @@ typedef struct NvmeFormatAIOCB {
 uint32_t nsid;
 bool broadcast;
 int64_t offset;
+
+uint8_t lbaf;
+uint8_t mset;
+uint8_t pi;
+uint8_t pil;
 } NvmeFormatAIOCB;
 
 static void nvme_format_bh(void *opaque);
@@ -5471,14 +5476,9 @@ static const AIOCBInfo nvme_format_aiocb_info = {
 .get_aio_context = nvme_get_aio_context,
 };
 
-static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
+static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
+uint8_t pi, uint8_t pil)
 {
-uint32_t dw10 = le32_to_cpu(cmd->cdw10);
-uint8_t lbaf = dw10 & 0xf;
-uint8_t pi = (dw10 >> 5) & 0x7;
-uint8_t mset = (dw10 >> 4) & 0x1;
-uint8_t pil = (dw10 >> 8) & 0x1;
-
 trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
 
 ns->id_ns.dps = (pil << 3) | pi;
@@ -5490,7 +5490,6 @@ static void nvme_format_set(NvmeNamespace *ns, NvmeCmd 
*cmd)
 static void nvme_format_ns_cb(void *opaque, int ret)
 {
 NvmeFormatAIOCB *iocb = opaque;
-NvmeRequest *req = iocb->req;
 NvmeNamespace *ns = iocb->ns;
 int bytes;
 
@@ -5512,7 +5511,7 @@ static void nvme_format_ns_cb(void *opaque, int ret)
 return;
 }
 
-nvme_format_set(ns, >cmd);
+nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
 ns->status = 0x0;
 iocb->ns = NULL;
 iocb->offset = 0;
@@ -5548,9 +5547,6 @@ static void nvme_format_bh(void *opaque)
 NvmeFormatAIOCB *iocb = opaque;
 NvmeRequest *req = iocb->req;
 NvmeCtrl *n = nvme_ctrl(req);
-uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
-uint8_t lbaf = dw10 & 0xf;
-uint8_t pi = (dw10 >> 5) & 0x7;
 uint16_t status;
 int i;
 
@@ -5572,7 +5568,7 @@ static void nvme_format_bh(void *opaque)
 goto done;
 }
 
-status = nvme_format_check(iocb->ns, lbaf, pi);
+status = nvme_format_check(iocb->ns, iocb->lbaf, iocb->pi);
 if (status) {
 req->status = status;
 goto done;
@@ -5595,6 +5591,11 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 {
 NvmeFormatAIOCB *iocb;
 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+uint8_t lbaf = dw10 & 0xf;
+uint8_t mset = (dw10 >> 4) & 0x1;
+uint8_t pi = (dw10 >> 5) & 0x7;
+uint8_t pil = (dw10 >> 8) & 0x1;
 uint16_t status;
 
 iocb = qemu_aio_get(_format_aiocb_info, NULL, nvme_misc_cb, req);
@@ -5604,6 +5605,10 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest 
*req)
 iocb->ret = 0;
 iocb->ns = NULL;
 iocb->nsid = 0;
+iocb->lbaf = lbaf;
+iocb->mset = mset;
+iocb->pi = pi;
+iocb->pil = pil;
 iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
 iocb->offset = 0;
 
-- 
2.35.1

[PULL 0/6] hw/nvme updates

2022-03-03 Thread Klaus Jensen

From: Klaus Jensen 

Hi Peter,

Last round of hw/nvme updates for v7.0.

The following changes since commit 64ada298b98a51eb2512607f6e6180cb330c47b1:

  Merge remote-tracking branch 'remotes/legoater/tags/pull-ppc-20220302' into 
staging (2022-03-02 12:38:46 +)

are available in the Git repository at:

  git://git.infradead.org/qemu-nvme.git tags/nvme-next-pull-request

for you to fetch changes up to 44219b6029fc52d5e967a963be91a9cf33f9f185:

  hw/nvme: 64-bit pi support (2022-03-03 09:30:21 +0100)


hw/nvme updates

- add enhanced protection information (64-bit guard)



Klaus Jensen (3):
  hw/nvme: move dif/pi prototypes into dif.h
  hw/nvme: move format parameter parsing
  hw/nvme: add pi tuple size helper

Naveen Nagar (3):
  hw/nvme: add host behavior support feature
  hw/nvme: add support for the lbafee hbs feature
  hw/nvme: 64-bit pi support

 hw/nvme/ctrl.c   | 235 +--
 hw/nvme/dif.c| 378 +--
 hw/nvme/dif.h| 191 ++
 hw/nvme/ns.c |  50 --
 hw/nvme/nvme.h   |  58 +--
 hw/nvme/trace-events |  12 +-
 include/block/nvme.h |  81 --
 7 files changed, 793 insertions(+), 212 deletions(-)
 create mode 100644 hw/nvme/dif.h

-- 
2.35.1

Re: [PATCH v15 3/8] net/vmnet: implement shared mode (vmnet-shared)

2022-03-03 Thread Akihiko Odaki

On 2022/03/04 3:34, Akihiko Odaki wrote:

On Fri, Mar 4, 2022 at 12:43 AM Vladislav Yaroshchuk
 wrote:

On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki  wrote:

On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:

  > Not sure that only one field is enough, cause
  > we may have two states on bh execution start:
  > 1. There are packets in vmnet buffer s->packets_buf
  >  that were rejected by qemu_send_async and waiting
  >  to be sent. If this happens, we should complete sending
  >  these waiting packets with qemu_send_async firstly,
  >  and after that we should call vmnet_read to get
  >  new ones and send them to QEMU;
  > 2. There are no packets in s->packets_buf to be sent to
  >  qemu, we only need to get new packets from vmnet
  >  with vmnet_read and send them to QEMU

 In case 1, you should just keep calling qemu_send_packet_async.
 Actually
 qemu_send_packet_async adds the packet to its internal queue and calls
 the callback when it is consumed.

I'm not sure we can keep calling qemu_send_packet_async,
because as docs from net/queue.c says:

/* [...]
   * If a sent callback is provided to send(), the caller must handle a
   * zero return from the delivery handler by not sending any more packets
   * until we have invoked the callback. Only in that case will we queue
   * the packet.
   *
   * If a sent callback isn't provided, we just drop the packet to avoid
   * unbounded queueing.
   */

So after we did vmnet_read and read N packets
into temporary s->packets_buf, we begin calling
qemu_send_packet_async. If it returns 0 - it says
"no more packets until sent_cb called please".
At this moment we have N packets in s->packets_buf
and already queued K < N of them. But, packets K..N
are not queued and keep waiting for sent_cb to be sent
with qemu_send_packet_async.
Thus when sent_cb called, we should finish
our transfer of packets K..N from s->packets_buf
to qemu calling qemu_send_packet_async.
I meant this.

I missed the comment. The description is contradicting with the actual
code; qemu_net_queue_send_iov appends the packet to the queue whenever
it cannot send one immediately.

Yes, it appends, but (net/queue.c):
*  qemu_net_queue_send tries to deliver the packet
 immediately. If the packet cannot be delivered, the
 qemu_net_queue_append is called and 0 is returned
 to say the caller "the receiver is not ready, hold on";
*  qemu_net_queue_append does a probe before adding
 the packet to the queue:
 if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
 return; /* drop if queue full and no callback */
 }

The queue is not infinite, so we have three cases:
1. The queue is not full -> append the packet, no
 problems here
2. The queue is full, no callback -> we cannot notify
 a caller when we're ready, so just drop the packet
 if we can't append it.
3. The queue is full, callback present -> we can notify
 a caller when we are ready, so "let's queue this packet,
 but expect no more (!) packets is sent until I call
 sent_cb when the queue is ready"

Therefore if we provide a callback and keep sending
packets if 0 is returned, this may cause unlimited(!)
queue growth. To prevent this, we should stop sending
packets and wait for notification callback to continue.

I don't see any contradiction with that comment.

Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
calling qemu_send_packet_async is allowed after it returns 0?

It may be wrong, but I think it's not allowed to send
packets after qemu_send_packet_async returns 0.

Jason Wang, can you confirm please?

Best Regards,

Vladislav Yaroshchuk

Regards,
Akihiko Odaki

The unlimited queue growth would not happen if you stop calling
vmnet_read after qemu_send_packet_async returns 0. So I think the
comment should be amended to say something like:
"Once qemu_send_packet_async returns 0, the client should stop reading
more packets from the underlying NIC to prevent infinite growth of the
queue until the last callback gets called."

The unique feature of vmnet is that it can read multiple packets at
once, and I guess it is the reason why the comment in net/queue.c
missed the case. But this is all my guess so I need confirmation from
the maintainer.

Regards,
Akihiko Odaki

I forgot to include Jason Wang to To for this email.

Re: [PATCH v15 3/8] net/vmnet: implement shared mode (vmnet-shared)

2022-03-03 Thread Akihiko Odaki

On 2022/03/04 10:37, Jason Wang wrote:

On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
 wrote:

On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki  wrote:

On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:

  > Not sure that only one field is enough, cause
  > we may have two states on bh execution start:
  > 1. There are packets in vmnet buffer s->packets_buf
  >  that were rejected by qemu_send_async and waiting
  >  to be sent. If this happens, we should complete sending
  >  these waiting packets with qemu_send_async firstly,
  >  and after that we should call vmnet_read to get
  >  new ones and send them to QEMU;
  > 2. There are no packets in s->packets_buf to be sent to
  >  qemu, we only need to get new packets from vmnet
  >  with vmnet_read and send them to QEMU

 In case 1, you should just keep calling qemu_send_packet_async.
 Actually
 qemu_send_packet_async adds the packet to its internal queue and calls
 the callback when it is consumed.

I'm not sure we can keep calling qemu_send_packet_async,
because as docs from net/queue.c says:

/* [...]
   * If a sent callback is provided to send(), the caller must handle a
   * zero return from the delivery handler by not sending any more packets
   * until we have invoked the callback. Only in that case will we queue
   * the packet.
   *
   * If a sent callback isn't provided, we just drop the packet to avoid
   * unbounded queueing.
   */

So after we did vmnet_read and read N packets
into temporary s->packets_buf, we begin calling
qemu_send_packet_async. If it returns 0 - it says
"no more packets until sent_cb called please".
At this moment we have N packets in s->packets_buf
and already queued K < N of them. But, packets K..N
are not queued and keep waiting for sent_cb to be sent
with qemu_send_packet_async.
Thus when sent_cb called, we should finish
our transfer of packets K..N from s->packets_buf
to qemu calling qemu_send_packet_async.
I meant this.

I missed the comment. The description is contradicting with the actual
code; qemu_net_queue_send_iov appends the packet to the queue whenever
it cannot send one immediately.

Yes, it appends, but (net/queue.c):
*  qemu_net_queue_send tries to deliver the packet
 immediately. If the packet cannot be delivered, the
 qemu_net_queue_append is called and 0 is returned
 to say the caller "the receiver is not ready, hold on";
*  qemu_net_queue_append does a probe before adding
 the packet to the queue:
 if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
 return; /* drop if queue full and no callback */
 }

The queue is not infinite, so we have three cases:
1. The queue is not full -> append the packet, no
 problems here
2. The queue is full, no callback -> we cannot notify
 a caller when we're ready, so just drop the packet
 if we can't append it.
3. The queue is full, callback present -> we can notify
 a caller when we are ready, so "let's queue this packet,
 but expect no more (!) packets is sent until I call
 sent_cb when the queue is ready"

Therefore if we provide a callback and keep sending
packets if 0 is returned, this may cause unlimited(!)
queue growth. To prevent this, we should stop sending
packets and wait for notification callback to continue.

Right.

I don't see any contradiction with that comment.

Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
calling qemu_send_packet_async is allowed after it returns 0?

It may be wrong, but I think it's not allowed to send
packets after qemu_send_packet_async returns 0.

Jason Wang, can you confirm please?

With a cb, we can't do this. All users with cb will disable the source
polling and depend on the cb to re-read the polling.
(tap/l2tpv3/socket).

Without a cb, we can. As analyzed above, qemu_net_queue_append() can
limit the number of packets queued in this case.

vmnet can read multiple packets at once. What about such a case? Isn't 
calling qemu_send_packet_async for already read packet and stopping 
reading more fine?

Regards,
Akihiko Odaki

Thanks

Best Regards,

Vladislav Yaroshchuk

Regards,
Akihiko Odaki

Re: [RFC PATCH 1/3] target/riscv: Rename timer & timecmp to mtimer and mtimecmp

2022-03-03 Thread Anup Patel

On Fri, Mar 4, 2022 at 8:50 AM Atish Patra  wrote:
>
> Currently, the aclint and ibex timer devices uses the "timer" &
> "timecmp" to generate the m-mode timer interrupt. In future,
> we will have timer interrupt injected to S/VS mode directly.
> No functionality change introduced in this patch.

s/have timer/have a timer/

>
> Add a prefix "m" these enviornment variables to indicate its
> true purpose.

s/enviornment/environment/

>
> Signed-off-by: Atish Patra 

I suggest we should remove mtimecmp and mtimer from
target/riscv because mtimer is a memory mapped device.

Regards,
Anup

> ---
>  hw/intc/riscv_aclint.c | 20 ++--
>  hw/timer/ibex_timer.c  | 14 +++---
>  target/riscv/cpu.h |  4 ++--
>  target/riscv/machine.c |  2 +-
>  4 files changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
> index f1a5d3d284fd..642794a06865 100644
> --- a/hw/intc/riscv_aclint.c
> +++ b/hw/intc/riscv_aclint.c
> @@ -59,8 +59,8 @@ static void 
> riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
>
>  uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq);
>
> -cpu->env.timecmp = value;
> -if (cpu->env.timecmp <= rtc_r) {
> +cpu->env.mtimecmp = value;
> +if (cpu->env.mtimecmp <= rtc_r) {
>  /*
>   * If we're setting an MTIMECMP value in the "past",
>   * immediately raise the timer interrupt
> @@ -71,7 +71,7 @@ static void 
> riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
>
>  /* otherwise, set up the future timer interrupt */
>  qemu_irq_lower(mtimer->timer_irqs[hartid - mtimer->hartid_base]);
> -diff = cpu->env.timecmp - rtc_r;
> +diff = cpu->env.mtimecmp - rtc_r;
>  /* back to ns (note args switched in muldiv64) */
>  uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
>
> @@ -96,7 +96,7 @@ static void 
> riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
>  next = MIN(next, INT64_MAX);
>  }
>
> -timer_mod(cpu->env.timer, next);
> +timer_mod(cpu->env.mtimer, next);
>  }
>
>  /*
> @@ -127,11 +127,11 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, 
> hwaddr addr,
>"aclint-mtimer: invalid hartid: %zu", hartid);
>  } else if ((addr & 0x7) == 0) {
>  /* timecmp_lo */
> -uint64_t timecmp = env->timecmp;
> +uint64_t timecmp = env->mtimecmp;
>  return timecmp & 0x;
>  } else if ((addr & 0x7) == 4) {
>  /* timecmp_hi */
> -uint64_t timecmp = env->timecmp;
> +uint64_t timecmp = env->mtimecmp;
>  return (timecmp >> 32) & 0x;
>  } else {
>  qemu_log_mask(LOG_UNIMP,
> @@ -168,14 +168,14 @@ static void riscv_aclint_mtimer_write(void *opaque, 
> hwaddr addr,
>"aclint-mtimer: invalid hartid: %zu", hartid);
>  } else if ((addr & 0x7) == 0) {
>  /* timecmp_lo */
> -uint64_t timecmp_hi = env->timecmp >> 32;
> +uint64_t timecmp_hi = env->mtimecmp >> 32;
>  riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
>  timecmp_hi << 32 | (value & 0x),
>  mtimer->timebase_freq);
>  return;
>  } else if ((addr & 0x7) == 4) {
>  /* timecmp_hi */
> -uint64_t timecmp_lo = env->timecmp;
> +uint64_t timecmp_lo = env->mtimecmp;
>  riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
>  value << 32 | (timecmp_lo & 0x),
>  mtimer->timebase_freq);
> @@ -304,9 +304,9 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, 
> hwaddr size,
>
>  cb->s = RISCV_ACLINT_MTIMER(dev);
>  cb->num = i;
> -env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> +env->mtimer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>_aclint_mtimer_cb, cb);
> -env->timecmp = 0;
> +env->mtimecmp = 0;
>
>  qdev_connect_gpio_out(dev, i,
>qdev_get_gpio_in(DEVICE(rvcpu), IRQ_M_TIMER));
> diff --git a/hw/timer/ibex_timer.c b/hw/timer/ibex_timer.c
> index 8c2ca364daab..4c34f9e08282 100644
> --- a/hw/timer/ibex_timer.c
> +++ b/hw/timer/ibex_timer.c
> @@ -73,9 +73,9 @@ static void ibex_timer_update_irqs(IbexTimerState *s)
>  }
>
>  /* Update the CPUs mtimecmp */
> -cpu->env.timecmp = value;
> +cpu->env.mtimecmp = value;
>
> -if (cpu->env.timecmp <= now) {
> +if (cpu->env.mtimecmp <= now) {
>  /*
>   * If the mtimecmp was in the past raise the interrupt now.
>   */
> @@ -91,7 +91,7 @@ static void ibex_timer_update_irqs(IbexTimerState *s)
>  qemu_irq_lower(s->m_timer_irq);
>  qemu_set_irq(s->irq, false);
>
> -diff = cpu->env.timecmp - now;
> +diff =

[RFC PATCH 3/3] target/riscv: Add vstimecmp support

2022-03-03 Thread Atish Patra

vstimecmp CSR allows the guest OS or to program the next guest timer
interrupt directly. Thus, hypervisor no longer need to inject the
timer interrupt to the guest if vstimecmp is used. This was ratified
as a part of the Sstc extension.

Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h |   3 ++
 target/riscv/cpu_bits.h|   4 ++
 target/riscv/cpu_helper.c  |  11 ++--
 target/riscv/csr.c | 103 -
 target/riscv/machine.c |   1 +
 target/riscv/time_helper.c |  16 ++
 6 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index fa90ab9f473b..62cbb7be666f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -269,6 +269,7 @@ struct CPURISCVState {
 
 /* Sstc CSRs */
 uint64_t stimecmp;
+uint64_t vstimecmp;
 
 /* physical memory protection */
 pmp_table_t pmp_state;
@@ -321,6 +322,8 @@ struct CPURISCVState {
 /* Fields from here on are preserved across CPU reset. */
 QEMUTimer *mtimer; /* Internal timer for M-mode interrupt */
 QEMUTimer *stimer; /* Internal timer for S-mode interrupt */
+QEMUTimer *vstimer; /* Internal timer for VS-mode interrupt */
+bool vstime_irq;
 
 hwaddr kernel_addr;
 hwaddr fdt_addr;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 34496ac5aa80..16d8aa24efe1 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -269,6 +269,10 @@
 #define CSR_VSIP0x244
 #define CSR_VSATP   0x280
 
+/* Sstc virtual CSRs */
+#define CSR_VSTIMECMP   0x24D
+#define CSR_VSTIMECMPH  0x25D
+
 #define CSR_MTINST  0x34a
 #define CSR_MTVAL2  0x34b
 
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 746335bfd6b9..fe3b8cb72bc7 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -344,8 +344,9 @@ static uint64_t riscv_cpu_all_pending(CPURISCVState *env)
 {
 uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN);
 uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0;
+uint64_t vstip = (env->vstime_irq) ? MIP_VSTIP : 0;
 
-return (env->mip | vsgein) & env->mie;
+return (env->mip | vsgein | vstip) & env->mie;
 }
 
 int riscv_cpu_mirq_pending(CPURISCVState *env)
@@ -600,7 +601,7 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, 
uint64_t value)
 {
 CPURISCVState *env = >env;
 CPUState *cs = CPU(cpu);
-uint64_t gein, vsgein = 0, old = env->mip;
+uint64_t gein, vsgein = 0, vstip = 0, old = env->mip;
 bool locked = false;
 
 if (riscv_cpu_virt_enabled(env)) {
@@ -608,6 +609,10 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t 
mask, uint64_t value)
 vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0;
 }
 
+/* No need to update mip for VSTIP */
+mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask;
+vstip = env->vstime_irq ? MIP_VSTIP : 0;
+
 if (!qemu_mutex_iothread_locked()) {
 locked = true;
 qemu_mutex_lock_iothread();
@@ -615,7 +620,7 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, 
uint64_t value)
 
 env->mip = (env->mip & ~mask) | (value & mask);
 
-if (env->mip | vsgein) {
+if (env->mip | vsgein | vstip) {
 cpu_interrupt(cs, CPU_INTERRUPT_HARD);
 } else {
 cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ad7a8db2dd0d..8ef9306ce494 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -550,17 +550,100 @@ static RISCVException sstc(CPURISCVState *env, int csrno)
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException sstc_hmode(CPURISCVState *env, int csrno)
+{
+CPUState *cs = env_cpu(env);
+RISCVCPU *cpu = RISCV_CPU(cs);
+
+if (!cpu->cfg.ext_sstc || !env->rdtime_fn) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+if (env->priv == PRV_M) {
+return RISCV_EXCP_NONE;
+}
+
+if (!(get_field(env->mcounteren, COUNTEREN_TM) &
+  get_field(env->menvcfg, MENVCFG_STCE))) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+if (!(get_field(env->hcounteren, COUNTEREN_TM) &
+  get_field(env->henvcfg, HENVCFG_STCE))) {
+return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+}
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_vstimecmp(CPURISCVState *env, int csrno,
+target_ulong *val)
+{
+*val = env->vstimecmp;
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_vstimecmph(CPURISCVState *env, int csrno,
+target_ulong *val)
+{
+*val = env->vstimecmp >> 32;
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_vstimecmp(CPURISCVState *env, int csrno,
+target_ulong val)
+{
+RISCVCPU *cpu = env_archcpu(env);
+
+if (riscv_cpu_mxl(env) == MXL_RV32) {
+uint64_t vstimecmp_hi = env->vstimecmp >> 32;
+

[RFC PATCH 1/3] target/riscv: Rename timer & timecmp to mtimer and mtimecmp

2022-03-03 Thread Atish Patra

Currently, the aclint and ibex timer devices uses the "timer" &
"timecmp" to generate the m-mode timer interrupt. In future,
we will have timer interrupt injected to S/VS mode directly.
No functionality change introduced in this patch.

Add a prefix "m" these enviornment variables to indicate its
true purpose.

Signed-off-by: Atish Patra 
---
 hw/intc/riscv_aclint.c | 20 ++--
 hw/timer/ibex_timer.c  | 14 +++---
 target/riscv/cpu.h |  4 ++--
 target/riscv/machine.c |  2 +-
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index f1a5d3d284fd..642794a06865 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -59,8 +59,8 @@ static void 
riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
 
 uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq);
 
-cpu->env.timecmp = value;
-if (cpu->env.timecmp <= rtc_r) {
+cpu->env.mtimecmp = value;
+if (cpu->env.mtimecmp <= rtc_r) {
 /*
  * If we're setting an MTIMECMP value in the "past",
  * immediately raise the timer interrupt
@@ -71,7 +71,7 @@ static void 
riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
 
 /* otherwise, set up the future timer interrupt */
 qemu_irq_lower(mtimer->timer_irqs[hartid - mtimer->hartid_base]);
-diff = cpu->env.timecmp - rtc_r;
+diff = cpu->env.mtimecmp - rtc_r;
 /* back to ns (note args switched in muldiv64) */
 uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
 
@@ -96,7 +96,7 @@ static void 
riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
 next = MIN(next, INT64_MAX);
 }
 
-timer_mod(cpu->env.timer, next);
+timer_mod(cpu->env.mtimer, next);
 }
 
 /*
@@ -127,11 +127,11 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, 
hwaddr addr,
   "aclint-mtimer: invalid hartid: %zu", hartid);
 } else if ((addr & 0x7) == 0) {
 /* timecmp_lo */
-uint64_t timecmp = env->timecmp;
+uint64_t timecmp = env->mtimecmp;
 return timecmp & 0x;
 } else if ((addr & 0x7) == 4) {
 /* timecmp_hi */
-uint64_t timecmp = env->timecmp;
+uint64_t timecmp = env->mtimecmp;
 return (timecmp >> 32) & 0x;
 } else {
 qemu_log_mask(LOG_UNIMP,
@@ -168,14 +168,14 @@ static void riscv_aclint_mtimer_write(void *opaque, 
hwaddr addr,
   "aclint-mtimer: invalid hartid: %zu", hartid);
 } else if ((addr & 0x7) == 0) {
 /* timecmp_lo */
-uint64_t timecmp_hi = env->timecmp >> 32;
+uint64_t timecmp_hi = env->mtimecmp >> 32;
 riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
 timecmp_hi << 32 | (value & 0x),
 mtimer->timebase_freq);
 return;
 } else if ((addr & 0x7) == 4) {
 /* timecmp_hi */
-uint64_t timecmp_lo = env->timecmp;
+uint64_t timecmp_lo = env->mtimecmp;
 riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
 value << 32 | (timecmp_lo & 0x),
 mtimer->timebase_freq);
@@ -304,9 +304,9 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr 
size,
 
 cb->s = RISCV_ACLINT_MTIMER(dev);
 cb->num = i;
-env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+env->mtimer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
   _aclint_mtimer_cb, cb);
-env->timecmp = 0;
+env->mtimecmp = 0;
 
 qdev_connect_gpio_out(dev, i,
   qdev_get_gpio_in(DEVICE(rvcpu), IRQ_M_TIMER));
diff --git a/hw/timer/ibex_timer.c b/hw/timer/ibex_timer.c
index 8c2ca364daab..4c34f9e08282 100644
--- a/hw/timer/ibex_timer.c
+++ b/hw/timer/ibex_timer.c
@@ -73,9 +73,9 @@ static void ibex_timer_update_irqs(IbexTimerState *s)
 }
 
 /* Update the CPUs mtimecmp */
-cpu->env.timecmp = value;
+cpu->env.mtimecmp = value;
 
-if (cpu->env.timecmp <= now) {
+if (cpu->env.mtimecmp <= now) {
 /*
  * If the mtimecmp was in the past raise the interrupt now.
  */
@@ -91,7 +91,7 @@ static void ibex_timer_update_irqs(IbexTimerState *s)
 qemu_irq_lower(s->m_timer_irq);
 qemu_set_irq(s->irq, false);
 
-diff = cpu->env.timecmp - now;
+diff = cpu->env.mtimecmp - now;
 next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  muldiv64(diff,
   NANOSECONDS_PER_SECOND,
@@ -99,9 +99,9 @@ static void ibex_timer_update_irqs(IbexTimerState *s)
 
 if (next < qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) {
 /* We overflowed the timer, just set it as large as we can */
-timer_mod(cpu->env.timer, 0x7FFF);
+

[RFC PATCH 2/3] target/riscv: Add stimecmp support

2022-03-03 Thread Atish Patra

stimecmp allows the supervisor mode to update stimecmp CSR directly
to program the next timer interrupt. This CSR is part of the Sstc
extension which was ratified recently.

Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c |  7 +++
 target/riscv/cpu.h |  5 ++
 target/riscv/cpu_bits.h|  4 ++
 target/riscv/csr.c | 84 +
 target/riscv/machine.c |  1 +
 target/riscv/meson.build   |  3 +-
 target/riscv/time_helper.c | 97 ++
 target/riscv/time_helper.h | 30 
 8 files changed, 230 insertions(+), 1 deletion(-)
 create mode 100644 target/riscv/time_helper.c
 create mode 100644 target/riscv/time_helper.h

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 5d24a191c059..c609ed3c5834 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -23,6 +23,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "internals.h"
+#include "time_helper.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
@@ -751,6 +752,10 @@ static void riscv_cpu_init(Object *obj)
 qdev_init_gpio_in(DEVICE(cpu), riscv_cpu_set_irq,
   IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX);
 #endif /* CONFIG_USER_ONLY */
+
+if (cpu->cfg.ext_sstc) {
+riscv_timer_init(cpu);
+}
 }
 
 static Property riscv_cpu_properties[] = {
@@ -776,6 +781,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false),
 DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
 DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
+DEFINE_PROP_BOOL("sstc", RISCVCPU, cfg.ext_sstc, true),
 
 DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
 DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
@@ -898,6 +904,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
 { "svpbmt", cpu->cfg.ext_svpbmt   },
 { "svinval", cpu->cfg.ext_svinval },
 { "svnapot", cpu->cfg.ext_svnapot },
+{ "sstc", cpu->cfg.ext_sstc },
 };
 
 for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 94234c59ffa8..fa90ab9f473b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -267,6 +267,9 @@ struct CPURISCVState {
 uint64_t mtohost;
 uint64_t mtimecmp;
 
+/* Sstc CSRs */
+uint64_t stimecmp;
+
 /* physical memory protection */
 pmp_table_t pmp_state;
 target_ulong mseccfg;
@@ -317,6 +320,7 @@ struct CPURISCVState {
 
 /* Fields from here on are preserved across CPU reset. */
 QEMUTimer *mtimer; /* Internal timer for M-mode interrupt */
+QEMUTimer *stimer; /* Internal timer for S-mode interrupt */
 
 hwaddr kernel_addr;
 hwaddr fdt_addr;
@@ -371,6 +375,7 @@ struct RISCVCPUConfig {
 bool ext_svinval;
 bool ext_svnapot;
 bool ext_svpbmt;
+bool ext_sstc;
 bool ext_zfh;
 bool ext_zfhmin;
 bool ext_zve32f;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index bb47cf7e77a2..34496ac5aa80 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -212,6 +212,10 @@
 #define CSR_STVAL   0x143
 #define CSR_SIP 0x144
 
+/* Sstc supervisor CSRs */
+#define CSR_STIMECMP0x14D
+#define CSR_STIMECMPH   0x15D
+
 /* Supervisor Protection and Translation */
 #define CSR_SPTBR   0x180
 #define CSR_SATP0x180
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index b16881615997..ad7a8db2dd0d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -20,8 +20,10 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
+#include "time_helper.h"
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
+#include "hw/intc/riscv_aclint.h"
 
 /* CSR function table public API */
 void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops)
@@ -519,6 +521,78 @@ static RISCVException read_timeh(CPURISCVState *env, int 
csrno,
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException sstc(CPURISCVState *env, int csrno)
+{
+CPUState *cs = env_cpu(env);
+RISCVCPU *cpu = RISCV_CPU(cs);
+
+if (!cpu->cfg.ext_sstc || !env->rdtime_fn) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+if (env->priv == PRV_M) {
+return RISCV_EXCP_NONE;
+}
+
+if (env->priv != PRV_S) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+/*
+ * No need of separate function for rv32 as menvcfg stores both menvcfg
+ * menvcfgh for RV32.
+ */
+if (!(get_field(env->mcounteren, COUNTEREN_TM) &&
+  get_field(env->menvcfg, MENVCFG_STCE))) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_stimecmp(CPURISCVState *env, int csrno,
+target_ulong *val)
+{
+*val = env->stimecmp;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_stimecmph(CPURISCVState *env,

[RFC PATCH 0/3] Implement Sstc extension

2022-03-03 Thread Atish Patra

This series implements Sstc extension[1] which was ratified recently.

The first patch is a prepartory patches while PATCH 2 adds stimecmp
support while PATCH 3 adds vstimecmp support. This series is based on
the ISA extension[2] & privilege version update series[3].

The series can also be found at
https://github.com/atishp04/qemu/tree/sstc_v1

It is tested on RV32 & RV64 with additional OpenSBI[4] & Linux kernel[5]
patches.

[1] https://drive.google.com/file/d/1m84Re2yK8m_vbW7TspvevCDR82MOBaSX/view
[2] https://lore.kernel.org/all/2022020704.2294924-1-ati...@rivosinc.com/
[3] https://www.mail-archive.com/qemu-devel@nongnu.org/msg870659.html
[4] https://github.com/atishp04/opensbi/tree/sstc_v1
[5] https://github.com/atishp04/linux/tree/sstc_v2

Atish Patra (3):
target/riscv: Rename timer & timecmp to mtimer and mtimecmp
target/riscv: Add stimecmp support
target/riscv: Add vstimecmp support

hw/intc/riscv_aclint.c |  20 ++--
hw/timer/ibex_timer.c  |  14 +--
target/riscv/cpu.c |   7 ++
target/riscv/cpu.h |  12 ++-
target/riscv/cpu_bits.h|   8 ++
target/riscv/cpu_helper.c  |  11 ++-
target/riscv/csr.c | 183 +
target/riscv/machine.c |   4 +-
target/riscv/meson.build   |   3 +-
target/riscv/time_helper.c | 113 +++
target/riscv/time_helper.h |  30 ++
11 files changed, 381 insertions(+), 24 deletions(-)
create mode 100644 target/riscv/time_helper.c
create mode 100644 target/riscv/time_helper.h

--
2.30.2

Re: [PATCH 5/5] iotests: fortify compare_images() against crashes

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 03:59:02PM -0500, John Snow wrote:
> Fority compare_images() to be more discerning about the status codes it

Fortify

> receives. If qemu_img() returns an exit code that implies it didn't
> actually perform the comparison, treat that as an exceptional
> circumstance and force the caller to be aware of the peril.
> 
> If a negative test is desired (Perhaps to test how qemu_img compare

perhaps

> behaves on malformed images, for instance), it is still possible to
> catch the exception in the test and deal with that circumstance
> manually.
> 
> Signed-off-by: John Snow 
> ---
>  tests/qemu-iotests/iotests.py | 21 -
>  1 file changed, 16 insertions(+), 5 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 4/5] iotests: make qemu_img raise on non-zero rc by default

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 03:59:01PM -0500, John Snow wrote:
> re-write qemu_img() as a function that will by default raise a
> VerboseProcessException (extended from CalledProcessException) on
> non-zero return codes. This will produce a stack trace that will show
> the command line arguments and return code from the failed process run.
> 
> Users that want something more flexible (there appears to be only one)
> can use check=False and manage the return themselves. However, when the
> return code is negative, the Exception will be raised no matter what.
> This is done under the belief that there's no legitimate reason, even in
> negative tests, to see a crash from qemu-img.
> 
> Signed-off-by: John Snow 
> ---
>  tests/qemu-iotests/257|  8 --
>  tests/qemu-iotests/iotests.py | 54 +++
>  2 files changed, 53 insertions(+), 9 deletions(-)
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 2/5] python/utils: add VerboseProcessError

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 03:58:59PM -0500, John Snow wrote:
> This adds an Exception that extends the Python stdlib
> subprocess.CalledProcessError.
> 
> The difference is that the str() method of this exception also adds the
> stdout/stderr logs. In effect, if this exception goes unhandled, Python
> will print the output in a visually distinct wrapper to the terminal so
> that it's easy to spot in a sea of traceback information.
> 
> Signed-off-by: John Snow 
> ---
>  python/qemu/utils/__init__.py | 36 +++
>  1 file changed, 36 insertions(+)
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v4 08/14] util: Add iova_tree_alloc_map

2022-03-03 Thread Liuxiangdong via





On 2022/3/4 2:51, Eugenio Pérez wrote:

This iova tree function allows it to look for a hole in allocated
regions and return a totally new translation for a given translated
address.

It's usage is mainly to allow devices to access qemu address space,
remapping guest's one into a new iova space where qemu can add chunks of
addresses.

Signed-off-by: Eugenio Pérez 
Reviewed-by: Peter Xu 
---
  include/qemu/iova-tree.h |  18 ++
  util/iova-tree.c | 135 +++
  2 files changed, 153 insertions(+)

diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
index 8249edd764..d066400f09 100644
--- a/include/qemu/iova-tree.h
+++ b/include/qemu/iova-tree.h
@@ -29,6 +29,7 @@
  #define  IOVA_OK   (0)
  #define  IOVA_ERR_INVALID  (-1) /* Invalid parameters */
  #define  IOVA_ERR_OVERLAP  (-2) /* IOVA range overlapped */
+#define  IOVA_ERR_NOMEM(-3) /* Cannot allocate */
  
  typedef struct IOVATree IOVATree;

  typedef struct DMAMap {
@@ -119,6 +120,23 @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, 
hwaddr iova);
   */
  void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
  
+/**

+ * iova_tree_alloc_map:
+ *
+ * @tree: the iova tree to allocate from
+ * @map: the new map (as translated addr & size) to allocate in the iova region
+ * @iova_begin: the minimum address of the allocation
+ * @iova_end: the maximum addressable direction of the allocation
+ *
+ * Allocates a new region of a given size, between iova_min and iova_max.
+ *
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
+ * in map->iova.
+ */
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
+hwaddr iova_end);
+
  /**
   * iova_tree_destroy:
   *
diff --git a/util/iova-tree.c b/util/iova-tree.c
index 23ea35b7a4..3160c50d3b 100644
--- a/util/iova-tree.c
+++ b/util/iova-tree.c
@@ -16,6 +16,39 @@ struct IOVATree {
  GTree *tree;
  };
  
+/* Args to pass to iova_tree_alloc foreach function. */

+struct IOVATreeAllocArgs {
+/* Size of the desired allocation */
+size_t new_size;
+
+/* The minimum address allowed in the allocation */
+hwaddr iova_begin;
+
+/* Map at the left of the hole, can be NULL if "this" is first one */
+const DMAMap *prev;
+
+/* Map at the right of the hole, can be NULL if "prev" is the last one */
+const DMAMap *this;
+
+/* If found, we fill in the IOVA here */
+hwaddr iova_result;
+
+/* Whether have we found a valid IOVA */
+bool iova_found;
+};
+
+/**
+ * Iterate args to the next hole
+ *
+ * @args: The alloc arguments
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
+ */
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
+ const DMAMap *next) {
+args->prev = args->this;
+args->this = next;
+}
+
  static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
  {
  const DMAMap *m1 = a, *m2 = b;
@@ -107,6 +140,108 @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
  return IOVA_OK;
  }
  
+/**

+ * Try to find an unallocated IOVA range between prev and this elements.
+ *
+ * @args: Arguments to allocation
+ *
+ * Cases:
+ *
+ * (1) !prev, !this: No entries allocated, always succeed
+ *
+ * (2) !prev, this: We're iterating at the 1st element.
+ *
+ * (3) prev, !this: We're iterating at the last element.
+ *
+ * (4) prev, this: this is the most common case, we'll try to find a hole
+ * between "prev" and "this" mapping.
+ *
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
+ * searches linearly so it's easy to discard the result if it's not the case.
+ */
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
+{
+const DMAMap *prev = args->prev, *this = args->this;
+uint64_t hole_start, hole_last;
+
+if (this && this->iova + this->size < args->iova_begin) {
+return;
+}
+


Hi, Eugenio.
Is there such a condition that

args->iova_begin > this->iova  and
args->iova_begin < this->iova + this->size


Thanks！
Xiangdong Liu



+hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
+hole_last = this ? this->iova : HWADDR_MAX;
+
+if (hole_last - hole_start > args->new_size) {
+args->iova_result = hole_start;
+args->iova_found = true;
+}
+}
+
+/**
+ * Foreach dma node in the tree, compare if there is a hole with its previous
+ * node (or minimum iova address allowed) and the node.
+ *
+ * @key: Node iterating
+ * @value: Node iterating
+ * @pargs: Struct to communicate with the outside world
+ *
+ * Return: false to keep iterating, true if needs break.
+ */
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
+ gpointer pargs)
+{
+

Re: [PATCH v2 09/14] vhost: Add VhostIOVATree

2022-03-03 Thread Jason Wang

On Fri, Mar 4, 2022 at 12:33 AM Eugenio Perez Martin
 wrote:
>
> On Mon, Feb 28, 2022 at 8:06 AM Jason Wang  wrote:
> >
> >
> > 在 2022/2/27 下午9:41, Eugenio Pérez 写道:
> > > This tree is able to look for a translated address from an IOVA address.
> > >
> > > At first glance it is similar to util/iova-tree. However, SVQ working on
> > > devices with limited IOVA space need more capabilities, like allocating
> > > IOVA chunks or performing reverse translations (qemu addresses to iova).
> > >
> > > The allocation capability, as "assign a free IOVA address to this chunk
> > > of memory in qemu's address space" allows shadow virtqueue to create a
> > > new address space that is not restricted by guest's addressable one, so
> > > we can allocate shadow vqs vrings outside of it.
> > >
> > > It duplicates the tree so it can search efficiently in both directions,
> > > and it will signal overlap if iova or the translated address is present
> > > in any tree.
> > >
> > > Signed-off-by: Eugenio Pérez 
> > > ---
> > >   hw/virtio/vhost-iova-tree.h |  27 +++
> > >   hw/virtio/vhost-iova-tree.c | 155 
> > >   hw/virtio/meson.build   |   2 +-
> > >   3 files changed, 183 insertions(+), 1 deletion(-)
> > >   create mode 100644 hw/virtio/vhost-iova-tree.h
> > >   create mode 100644 hw/virtio/vhost-iova-tree.c
> > >
> > > diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> > > new file mode 100644
> > > index 00..6a4f24e0f9
> > > --- /dev/null
> > > +++ b/hw/virtio/vhost-iova-tree.h
> > > @@ -0,0 +1,27 @@
> > > +/*
> > > + * vhost software live migration iova tree
> > > + *
> > > + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> > > + * SPDX-FileContributor: Author: Eugenio Pérez 
> > > + *
> > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > + */
> > > +
> > > +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
> > > +#define HW_VIRTIO_VHOST_IOVA_TREE_H
> > > +
> > > +#include "qemu/iova-tree.h"
> > > +#include "exec/memory.h"
> > > +
> > > +typedef struct VhostIOVATree VhostIOVATree;
> > > +
> > > +VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t 
> > > iova_last);
> > > +void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
> > > +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
> > > +
> > > +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
> > > +const DMAMap *map);
> > > +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
> > > +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
> > > +
> > > +#endif
> > > diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> > > new file mode 100644
> > > index 00..03496ac075
> > > --- /dev/null
> > > +++ b/hw/virtio/vhost-iova-tree.c
> > > @@ -0,0 +1,155 @@
> > > +/*
> > > + * vhost software live migration iova tree
> > > + *
> > > + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> > > + * SPDX-FileContributor: Author: Eugenio Pérez 
> > > + *
> > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "qemu/iova-tree.h"
> > > +#include "vhost-iova-tree.h"
> > > +
> > > +#define iova_min_addr qemu_real_host_page_size
> > > +
> > > +/**
> > > + * VhostIOVATree, able to:
> > > + * - Translate iova address
> > > + * - Reverse translate iova address (from translated to iova)
> > > + * - Allocate IOVA regions for translated range (linear operation)
> > > + */
> > > +struct VhostIOVATree {
> > > +/* First addressable iova address in the device */
> > > +uint64_t iova_first;
> > > +
> > > +/* Last addressable iova address in the device */
> > > +uint64_t iova_last;
> > > +
> > > +/* IOVA address to qemu memory maps. */
> > > +IOVATree *iova_taddr_map;
> > > +
> > > +/* QEMU virtual memory address to iova maps */
> > > +GTree *taddr_iova_map;
> > > +};
> > > +
> > > +static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
> > > +  gpointer data)
> > > +{
> > > +const DMAMap *m1 = a, *m2 = b;
> > > +
> > > +if (m1->translated_addr > m2->translated_addr + m2->size) {
> > > +return 1;
> > > +}
> > > +
> > > +if (m1->translated_addr + m1->size < m2->translated_addr) {
> > > +return -1;
> > > +}
> > > +
> > > +/* Overlapped */
> > > +return 0;
> > > +}
> > > +
> > > +/**
> > > + * Create a new IOVA tree
> > > + *
> > > + * Returns the new IOVA tree
> > > + */
> > > +VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
> > > +{
> > > +VhostIOVATree *tree = g_new(VhostIOVATree, 1);
> > > +
> > > +/* Some devices do not like 0 addresses */
> > > +tree->iova_first = MAX(iova_first, iova_min_addr);
> > > +tree->iova_last = iova_last;
> > > +
> > > +tree->iova_taddr_map = iova_tree_new();
> > > +tree->taddr_iova_map =

Re: [PATCH v2 02/14] vhost: Add Shadow VirtQueue kick forwarding capabilities

2022-03-03 Thread Jason Wang

On Thu, Mar 3, 2022 at 5:25 PM Eugenio Perez Martin  wrote:
>
> On Thu, Mar 3, 2022 at 8:12 AM Jason Wang  wrote:
> >
> >
> > 在 2022/3/2 上午2:49, Eugenio Perez Martin 写道:
> > > On Mon, Feb 28, 2022 at 3:57 AM Jason Wang  wrote:
> > >> 在 2022/2/27 下午9:40, Eugenio Pérez 写道:
> > >>> At this mode no buffer forwarding will be performed in SVQ mode: Qemu
> > >>> will just forward the guest's kicks to the device.
> > >>>
> > >>> Host memory notifiers regions are left out for simplicity, and they will
> > >>> not be addressed in this series.
> > >>>
> > >>> Signed-off-by: Eugenio Pérez
> > >>> ---
> > >>>hw/virtio/vhost-shadow-virtqueue.h |  14 +++
> > >>>include/hw/virtio/vhost-vdpa.h |   4 +
> > >>>hw/virtio/vhost-shadow-virtqueue.c |  52 +++
> > >>>hw/virtio/vhost-vdpa.c | 145 
> > >>> -
> > >>>4 files changed, 213 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > >>> b/hw/virtio/vhost-shadow-virtqueue.h
> > >>> index f1519e3c7b..1cbc87d5d8 100644
> > >>> --- a/hw/virtio/vhost-shadow-virtqueue.h
> > >>> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > >>> @@ -18,8 +18,22 @@ typedef struct VhostShadowVirtqueue {
> > >>>EventNotifier hdev_kick;
> > >>>/* Shadow call notifier, sent to vhost */
> > >>>EventNotifier hdev_call;
> > >>> +
> > >>> +/*
> > >>> + * Borrowed virtqueue's guest to host notifier. To borrow it in 
> > >>> this event
> > >>> + * notifier allows to recover the VhostShadowVirtqueue from the 
> > >>> event loop
> > >>> + * easily. If we use the VirtQueue's one, we don't have an easy 
> > >>> way to
> > >>> + * retrieve VhostShadowVirtqueue.
> > >>> + *
> > >>> + * So shadow virtqueue must not clean it, or we would lose 
> > >>> VirtQueue one.
> > >>> + */
> > >>> +EventNotifier svq_kick;
> > >>>} VhostShadowVirtqueue;
> > >>>
> > >>> +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int 
> > >>> svq_kick_fd);
> > >>> +
> > >>> +void vhost_svq_stop(VhostShadowVirtqueue *svq);
> > >>> +
> > >>>VhostShadowVirtqueue *vhost_svq_new(void);
> > >>>
> > >>>void vhost_svq_free(gpointer vq);
> > >>> diff --git a/include/hw/virtio/vhost-vdpa.h 
> > >>> b/include/hw/virtio/vhost-vdpa.h
> > >>> index 3ce79a646d..009a9f3b6b 100644
> > >>> --- a/include/hw/virtio/vhost-vdpa.h
> > >>> +++ b/include/hw/virtio/vhost-vdpa.h
> > >>> @@ -12,6 +12,8 @@
> > >>>#ifndef HW_VIRTIO_VHOST_VDPA_H
> > >>>#define HW_VIRTIO_VHOST_VDPA_H
> > >>>
> > >>> +#include 
> > >>> +
> > >>>#include "hw/virtio/virtio.h"
> > >>>#include "standard-headers/linux/vhost_types.h"
> > >>>
> > >>> @@ -27,6 +29,8 @@ typedef struct vhost_vdpa {
> > >>>bool iotlb_batch_begin_sent;
> > >>>MemoryListener listener;
> > >>>struct vhost_vdpa_iova_range iova_range;
> > >>> +bool shadow_vqs_enabled;
> > >>> +GPtrArray *shadow_vqs;
> > >>>struct vhost_dev *dev;
> > >>>VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> > >>>} VhostVDPA;
> > >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > >>> b/hw/virtio/vhost-shadow-virtqueue.c
> > >>> index 019cf1950f..a5d0659f86 100644
> > >>> --- a/hw/virtio/vhost-shadow-virtqueue.c
> > >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > >>> @@ -11,6 +11,56 @@
> > >>>#include "hw/virtio/vhost-shadow-virtqueue.h"
> > >>>
> > >>>#include "qemu/error-report.h"
> > >>> +#include "qemu/main-loop.h"
> > >>> +#include "linux-headers/linux/vhost.h"
> > >>> +
> > >>> +/** Forward guest notifications */
> > >>> +static void vhost_handle_guest_kick(EventNotifier *n)
> > >>> +{
> > >>> +VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> > >>> + svq_kick);
> > >>> +event_notifier_test_and_clear(n);
> > >>> +event_notifier_set(>hdev_kick);
> > >>> +}
> > >>> +
> > >>> +/**
> > >>> + * Set a new file descriptor for the guest to kick the SVQ and notify 
> > >>> for avail
> > >>> + *
> > >>> + * @svq  The svq
> > >>> + * @svq_kick_fd  The svq kick fd
> > >>> + *
> > >>> + * Note that the SVQ will never close the old file descriptor.
> > >>> + */
> > >>> +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int 
> > >>> svq_kick_fd)
> > >>> +{
> > >>> +EventNotifier *svq_kick = >svq_kick;
> > >>> +bool poll_stop = VHOST_FILE_UNBIND != 
> > >>> event_notifier_get_fd(svq_kick);
> > >> I wonder if this is robust. E.g is there any chance that may end up with
> > >> both poll_stop and poll_start are false?
> > >>
> > > I cannot make that happen in qemu, but the function supports that case
> > > well: It will do nothing. It's more or less the same code as used in
> > > the vhost kernel, and is the expected behaviour if you send two
> > > VHOST_FILE_UNBIND one right after another to me.
> >
> >
> > I would think it's just stop twice.
> >
> >
> > >
> > >> If not, can we simple

Re: [PATCH v15 3/8] net/vmnet: implement shared mode (vmnet-shared)

2022-03-03 Thread Jason Wang

On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
 wrote:
>
>
>
> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki  wrote:
>>
>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>> >  > Not sure that only one field is enough, cause
>> >  > we may have two states on bh execution start:
>> >  > 1. There are packets in vmnet buffer s->packets_buf
>> >  >  that were rejected by qemu_send_async and waiting
>> >  >  to be sent. If this happens, we should complete sending
>> >  >  these waiting packets with qemu_send_async firstly,
>> >  >  and after that we should call vmnet_read to get
>> >  >  new ones and send them to QEMU;
>> >  > 2. There are no packets in s->packets_buf to be sent to
>> >  >  qemu, we only need to get new packets from vmnet
>> >  >  with vmnet_read and send them to QEMU
>> >
>> > In case 1, you should just keep calling qemu_send_packet_async.
>> > Actually
>> > qemu_send_packet_async adds the packet to its internal queue and calls
>> > the callback when it is consumed.
>> >
>> >
>> > I'm not sure we can keep calling qemu_send_packet_async,
>> > because as docs from net/queue.c says:
>> >
>> > /* [...]
>> >   * If a sent callback is provided to send(), the caller must handle a
>> >   * zero return from the delivery handler by not sending any more packets
>> >   * until we have invoked the callback. Only in that case will we queue
>> >   * the packet.
>> >   *
>> >   * If a sent callback isn't provided, we just drop the packet to avoid
>> >   * unbounded queueing.
>> >   */
>> >
>> > So after we did vmnet_read and read N packets
>> > into temporary s->packets_buf, we begin calling
>> > qemu_send_packet_async. If it returns 0 - it says
>> > "no more packets until sent_cb called please".
>> > At this moment we have N packets in s->packets_buf
>> > and already queued K < N of them. But, packets K..N
>> > are not queued and keep waiting for sent_cb to be sent
>> > with qemu_send_packet_async.
>> > Thus when sent_cb called, we should finish
>> > our transfer of packets K..N from s->packets_buf
>> > to qemu calling qemu_send_packet_async.
>> > I meant this.
>>
>> I missed the comment. The description is contradicting with the actual
>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
>> it cannot send one immediately.
>>
>
> Yes, it appends, but (net/queue.c):
> *  qemu_net_queue_send tries to deliver the packet
> immediately. If the packet cannot be delivered, the
> qemu_net_queue_append is called and 0 is returned
> to say the caller "the receiver is not ready, hold on";
> *  qemu_net_queue_append does a probe before adding
> the packet to the queue:
> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
> return; /* drop if queue full and no callback */
> }
>
> The queue is not infinite, so we have three cases:
> 1. The queue is not full -> append the packet, no
> problems here
> 2. The queue is full, no callback -> we cannot notify
> a caller when we're ready, so just drop the packet
> if we can't append it.
> 3. The queue is full, callback present -> we can notify
> a caller when we are ready, so "let's queue this packet,
> but expect no more (!) packets is sent until I call
> sent_cb when the queue is ready"
>
> Therefore if we provide a callback and keep sending
> packets if 0 is returned, this may cause unlimited(!)
> queue growth. To prevent this, we should stop sending
> packets and wait for notification callback to continue.

Right.

>
> I don't see any contradiction with that comment.
>
>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
>> calling qemu_send_packet_async is allowed after it returns 0?
>>
>
> It may be wrong, but I think it's not allowed to send
> packets after qemu_send_packet_async returns 0.
>
> Jason Wang, can you confirm please?

With a cb, we can't do this. All users with cb will disable the source
polling and depend on the cb to re-read the polling.
(tap/l2tpv3/socket).

Without a cb, we can. As analyzed above, qemu_net_queue_append() can
limit the number of packets queued in this case.

Thanks

>
> Best Regards,
>
> Vladislav Yaroshchuk
>
>>
>> Regards,
>> Akihiko Odaki
>
>
>

[PATCH] target/arm: Fix sve2 ldnt1 (64-bit unscaled offset)

2022-03-03 Thread Richard Henderson

We were mapping this to ld1 (32-bit unpacked unscaled offset),
which discarded the upper 32 bits of the address coming from
the vector argument.

Fixed by setting XS=2, which is the existing translator internal
value for no extension.  Update the comments, which matched the
incorrect code.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/826
Signed-off-by: Richard Henderson 
---
 target/arm/sve.decode | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index c60b9f0fec..fd96baeb68 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1576,9 +1576,9 @@ USDOT_  01000100 .. 0 . 011 110 . .  
@rda_rn_rm
 ### SVE2 Memory Gather Load Group
 
 # SVE2 64-bit gather non-temporal load
-#   (scalar plus unpacked 32-bit unscaled offsets)
+#   (scalar plus 64-bit unscaled offsets)
 LDNT1_zprz  1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
-_gather_load xs=0 esz=3 scale=0 ff=0
+_gather_load xs=2 esz=3 scale=0 ff=0
 
 # SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets)
 LDNT1_zprz  110 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \
-- 
2.25.1

[PATCH v6 11/12] hw/riscv: virt: Add PMU DT node to the device tree

2022-03-03 Thread Atish Patra

Qemu virt machine can support few cache events and cycle/instret counters.
It also supports counter overflow for these events.

Add a DT node so that OpenSBI/Linux kernel is aware of the virt machine
capabilities. There are some dummy nodes added for testing as well.

Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 hw/riscv/virt.c| 28 +++
 target/riscv/cpu.c |  1 +
 target/riscv/pmu.c | 57 ++
 target/riscv/pmu.h |  1 +
 4 files changed, 87 insertions(+)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index da50cbed43ec..13d61bf476ff 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -28,6 +28,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/char/serial.h"
 #include "target/riscv/cpu.h"
+#include "target/riscv/pmu.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
@@ -687,6 +688,32 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
 aplic_phandles[socket] = aplic_s_phandle;
 }
 
+static void create_fdt_socket_pmu(RISCVVirtState *s,
+  int socket, uint32_t *phandle,
+  uint32_t *intc_phandles)
+{
+int cpu;
+char *pmu_name;
+uint32_t *pmu_cells;
+MachineState *mc = MACHINE(s);
+RISCVCPU hart = s->soc[socket].harts[0];
+
+pmu_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+
+for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+pmu_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+pmu_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_PMU_OVF);
+}
+
+pmu_name = g_strdup_printf("/soc/pmu");
+qemu_fdt_add_subnode(mc->fdt, pmu_name);
+qemu_fdt_setprop_string(mc->fdt, pmu_name, "compatible", "riscv,pmu");
+riscv_pmu_generate_fdt_node(mc->fdt, hart.cfg.pmu_num, pmu_name);
+
+g_free(pmu_name);
+g_free(pmu_cells);
+}
+
 static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
bool is_32_bit, uint32_t *phandle,
uint32_t *irq_mmio_phandle,
@@ -732,6 +759,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
 _phandles[phandle_pos]);
 }
 }
+create_fdt_socket_pmu(s, socket, phandle, intc_phandles);
 }
 
 if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e13d2f81a05c..3df53bcb48f9 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -922,6 +922,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
 int i;
 /* The extension names should be kept in alphabetically order */
 struct isa_ext_data isa_edata_arr[] = {
+{ "sscofpmf", cpu->cfg.ext_sscofpmf },
 { "svinval", cpu->cfg.ext_svinval },
 { "svnapot", cpu->cfg.ext_svnapot },
 { "svpbmt", cpu->cfg.ext_svpbmt   },
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index 5b212d2eb630..6e470a1d5f66 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -19,11 +19,68 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "pmu.h"
+#include "sysemu/device_tree.h"
 
 #define RISCV_TIMEBASE_FREQ 10 /* 1Ghz */
 #define MAKE_32BIT_MASK(shift, length) \
 (((uint32_t)(~0UL) >> (32 - (length))) << (shift))
 
+/**
+ * To keep it simple, any event can be mapped to any programmable counters in
+ * QEMU. The generic cycle & instruction count events can also be monitored
+ * using programmable counters. In that case, mcycle & minstret must continue
+ * to provide the correct value as well. Heterogeneous PMU per hart is not
+ * supported yet. Thus, number of counters are same across all harts.
+ */
+void riscv_pmu_generate_fdt_node(void *fdt, int num_ctrs, char *pmu_name)
+{
+uint32_t fdt_event_ctr_map[20] = {};
+uint32_t cmask;
+
+/* All the programmable counters can map to any event */
+cmask = MAKE_32BIT_MASK(3, num_ctrs);
+
+   /**
+* The event encoding is specified in the SBI specification
+* Event idx is a 20bits wide number encoded as follows:
+* event_idx[19:16] = type
+* event_idx[15:0] = code
+* The code field in cache events are encoded as follows:
+* event_idx.code[15:3] = cache_id
+* event_idx.code[2:1] = op_id
+* event_idx.code[0:0] = result_id
+*/
+
+   /* SBI_PMU_HW_CPU_CYCLES: 0x01 : type(0x00) */
+   fdt_event_ctr_map[0] = cpu_to_be32(0x0001);
+   fdt_event_ctr_map[1] = cpu_to_be32(0x0001);
+   fdt_event_ctr_map[2] = cpu_to_be32(cmask | 1 << 0);
+
+   /* SBI_PMU_HW_INSTRUCTIONS: 0x02 : type(0x00) */
+   fdt_event_ctr_map[3] = cpu_to_be32(0x0002);
+   fdt_event_ctr_map[4] = cpu_to_be32(0x0002);
+   fdt_event_ctr_map[5] = cpu_to_be32(cmask | 1 << 2);
+
+   /* SBI_PMU_HW_CACHE_DTLB : 0x03 READ : 0x00 MISS : 0x00 type(0x01) */
+   fdt_event_ctr_map[6] = cpu_to_be32(0x00010019);
+   fdt_event_ctr_map[7] =

[PATCH v6 07/12] target/riscv: Support mcycle/minstret write operation

2022-03-03 Thread Atish Patra

From: Atish Patra 

mcycle/minstret are actually WARL registers and can be written with any
given value. With SBI PMU extension, it will be used to store a initial
value provided from supervisor OS. The Qemu also need prohibit the counter
increment if mcountinhibit is set.

Support mcycle/minstret through generic counter infrastructure.

Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h   |  23 +--
 target/riscv/csr.c   | 145 +++
 target/riscv/machine.c   |  25 ++-
 target/riscv/meson.build |   1 +
 target/riscv/pmu.c   |  32 +
 target/riscv/pmu.h   |  28 
 6 files changed, 201 insertions(+), 53 deletions(-)
 create mode 100644 target/riscv/pmu.c
 create mode 100644 target/riscv/pmu.h

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 3eedd7a5888a..b49adcccd85d 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -109,7 +109,7 @@ typedef struct CPURISCVState CPURISCVState;
 #endif
 
 #define RV_VLEN_MAX 1024
-#define RV_MAX_MHPMEVENTS 29
+#define RV_MAX_MHPMEVENTS 32
 #define RV_MAX_MHPMCOUNTERS 32
 
 FIELD(VTYPE, VLMUL, 0, 3)
@@ -119,6 +119,18 @@ FIELD(VTYPE, VMA, 7, 1)
 FIELD(VTYPE, VEDIV, 8, 2)
 FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11)
 
+typedef struct PMUCTRState {
+/* Current value of a counter */
+target_ulong mhpmcounter_val;
+/* Current value of a counter in RV32*/
+target_ulong mhpmcounterh_val;
+/* Snapshot values of counter */
+target_ulong mhpmcounter_prev;
+/* Snapshort value of a counter in RV32 */
+target_ulong mhpmcounterh_prev;
+bool started;
+} PMUCTRState;
+
 struct CPURISCVState {
 target_ulong gpr[32];
 target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
@@ -263,13 +275,10 @@ struct CPURISCVState {
 
 target_ulong mcountinhibit;
 
-/* PMU counter configured values */
-target_ulong mhpmcounter_val[RV_MAX_MHPMCOUNTERS];
-
-/* for RV32 */
-target_ulong mhpmcounterh_val[RV_MAX_MHPMCOUNTERS];
+/* PMU counter state */
+PMUCTRState pmu_ctrs[RV_MAX_MHPMCOUNTERS];
 
-/* PMU event selector configured values */
+/* PMU event selector configured values. First three are unused*/
 target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
 
 target_ulong sscratch;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 10b6e498f059..3286e1e44455 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "qemu/timer.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 
@@ -547,39 +548,33 @@ static int write_vcsr(CPURISCVState *env, int csrno, 
target_ulong val)
 }
 
 /* User Timers and Counters */
-static RISCVException read_instret(CPURISCVState *env, int csrno,
-   target_ulong *val)
+static target_ulong get_ticks(bool shift)
 {
+int64_t val;
+target_ulong result;
+
 #if !defined(CONFIG_USER_ONLY)
 if (icount_enabled()) {
-*val = icount_get();
+val = icount_get();
 } else {
-*val = cpu_get_host_ticks();
+val = cpu_get_host_ticks();
 }
 #else
-*val = cpu_get_host_ticks();
+val = cpu_get_host_ticks();
 #endif
-return RISCV_EXCP_NONE;
-}
 
-static RISCVException read_instreth(CPURISCVState *env, int csrno,
-target_ulong *val)
-{
-#if !defined(CONFIG_USER_ONLY)
-if (icount_enabled()) {
-*val = icount_get() >> 32;
+if (shift) {
+result = val >> 32;
 } else {
-*val = cpu_get_host_ticks() >> 32;
+result = val;
 }
-#else
-*val = cpu_get_host_ticks() >> 32;
-#endif
-return RISCV_EXCP_NONE;
+
+return result;
 }
 
 static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
 {
-int evt_index = csrno - CSR_MHPMEVENT3;
+int evt_index = csrno - CSR_MCOUNTINHIBIT;
 
 *val = env->mhpmevent_val[evt_index];
 
@@ -588,7 +583,7 @@ static int read_mhpmevent(CPURISCVState *env, int csrno, 
target_ulong *val)
 
 static int write_mhpmevent(CPURISCVState *env, int csrno, target_ulong val)
 {
-int evt_index = csrno - CSR_MHPMEVENT3;
+int evt_index = csrno - CSR_MCOUNTINHIBIT;
 
 env->mhpmevent_val[evt_index] = val;
 
@@ -597,52 +592,102 @@ static int write_mhpmevent(CPURISCVState *env, int 
csrno, target_ulong val)
 
 static int write_mhpmcounter(CPURISCVState *env, int csrno, target_ulong val)
 {
-int ctr_index = csrno - CSR_MHPMCOUNTER3 + 3;
+int ctr_idx = csrno - CSR_MCYCLE;
+PMUCTRState *counter = >pmu_ctrs[ctr_idx];
 
-env->mhpmcounter_val[ctr_index] = val;
+counter->mhpmcounter_val = val;
+if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
+riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
+counter->mhpmcounter_prev = get_ticks(false);
+} else {
+/* Other counters can keep incrementing

[PATCH v6 12/12] target/riscv: Update the privilege field for sscofpmf CSRs

2022-03-03 Thread Atish Patra

The sscofpmf extension was ratified as a part of priv spec v1.12.
Mark the csr_ops accordingly.

Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 90 ++
 1 file changed, 60 insertions(+), 30 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 771e16d99f39..ac97727f8d6c 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -3802,63 +3802,92 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
write_mhpmevent },
 
 [CSR_MHPMEVENT3H]= { "mhpmevent3h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT4H]= { "mhpmevent4h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT5H]= { "mhpmevent5h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT6H]= { "mhpmevent6h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT7H]= { "mhpmevent7h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT8H]= { "mhpmevent8h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT9H]= { "mhpmevent9h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT10H]   = { "mhpmevent10h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT11H]   = { "mhpmevent11h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT12H]   = { "mhpmevent12h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT13H]   = { "mhpmevent13h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT14H]   = { "mhpmevent14h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT15H]   = { "mhpmevent15h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT16H]   = { "mhpmevent16h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT17H]   = { "mhpmevent17h",sscofpmf,  read_mhpmeventh,
-

[PATCH v6 10/12] target/riscv: Add few cache related PMU events

2022-03-03 Thread Atish Patra

From: Atish Patra 

Qemu can monitor the following cache related PMU events through
tlb_fill functions.

1. DTLB load/store miss
3. ITLB prefetch miss

Increment the PMU counter in tlb_fill function.

Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu_helper.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 1c60fb2e8057..72ae1a612ae8 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -21,10 +21,13 @@
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
+#include "cpu.h"
+#include "cpu_bits.h"
 
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 {
@@ -1178,6 +1181,28 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
addr,
 riscv_raise_exception(env, cs->exception_index, retaddr);
 }
 
+
+static void pmu_tlb_fill_incr_ctr(RISCVCPU *cpu, MMUAccessType access_type)
+{
+enum riscv_pmu_event_idx pmu_event_type;
+
+switch (access_type) {
+case MMU_INST_FETCH:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS;
+break;
+case MMU_DATA_LOAD:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_DTLB_READ_MISS;
+break;
+case MMU_DATA_STORE:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_DTLB_WRITE_MISS;
+break;
+default:
+return;
+}
+
+riscv_pmu_incr_ctr(cpu, pmu_event_type);
+}
+
 bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 MMUAccessType access_type, int mmu_idx,
 bool probe, uintptr_t retaddr)
@@ -1274,6 +1299,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 }
 }
 } else {
+pmu_tlb_fill_incr_ctr(cpu, access_type);
 /* Single stage lookup */
 ret = get_physical_address(env, , , address, NULL,
access_type, mmu_idx, true, false, false);
-- 
2.30.2

[PATCH v6 06/12] target/riscv: Add support for hpmcounters/hpmevents

2022-03-03 Thread Atish Patra

From: Atish Patra 

With SBI PMU extension, user can use any of the available hpmcounters to
track any perf events based on the value written to mhpmevent csr.
Add read/write functionality for these csrs.

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h |  11 +
 target/riscv/csr.c | 466 +++--
 target/riscv/machine.c |   3 +
 3 files changed, 328 insertions(+), 152 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6667ec963707..3eedd7a5888a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -109,6 +109,8 @@ typedef struct CPURISCVState CPURISCVState;
 #endif
 
 #define RV_VLEN_MAX 1024
+#define RV_MAX_MHPMEVENTS 29
+#define RV_MAX_MHPMCOUNTERS 32
 
 FIELD(VTYPE, VLMUL, 0, 3)
 FIELD(VTYPE, VSEW, 3, 3)
@@ -261,6 +263,15 @@ struct CPURISCVState {
 
 target_ulong mcountinhibit;
 
+/* PMU counter configured values */
+target_ulong mhpmcounter_val[RV_MAX_MHPMCOUNTERS];
+
+/* for RV32 */
+target_ulong mhpmcounterh_val[RV_MAX_MHPMCOUNTERS];
+
+/* PMU event selector configured values */
+target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
+
 target_ulong sscratch;
 target_ulong mscratch;
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 9cfbd60aaeb4..10b6e498f059 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -86,6 +86,15 @@ static RISCVException mctr(CPURISCVState *env, int csrno)
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException mctr32(CPURISCVState *env, int csrno)
+{
+if (riscv_cpu_mxl(env) != MXL_RV32) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+return mctr(env, csrno);
+}
+
 static RISCVException ctr(CPURISCVState *env, int csrno)
 {
 #if !defined(CONFIG_USER_ONLY)
@@ -568,6 +577,72 @@ static RISCVException read_instreth(CPURISCVState *env, 
int csrno,
 return RISCV_EXCP_NONE;
 }
 
+static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int evt_index = csrno - CSR_MHPMEVENT3;
+
+*val = env->mhpmevent_val[evt_index];
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmevent(CPURISCVState *env, int csrno, target_ulong val)
+{
+int evt_index = csrno - CSR_MHPMEVENT3;
+
+env->mhpmevent_val[evt_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmcounter(CPURISCVState *env, int csrno, target_ulong val)
+{
+int ctr_index = csrno - CSR_MHPMCOUNTER3 + 3;
+
+env->mhpmcounter_val[ctr_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmcounterh(CPURISCVState *env, int csrno, target_ulong val)
+{
+int ctr_index = csrno - CSR_MHPMCOUNTER3H + 3;
+
+env->mhpmcounterh_val[ctr_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int ctr_index;
+
+if (env->priv == PRV_M) {
+ctr_index = csrno - CSR_MHPMCOUNTER3 + 3;
+} else {
+ctr_index = csrno - CSR_HPMCOUNTER3 + 3;
+}
+*val = env->mhpmcounter_val[ctr_index];
+
+return RISCV_EXCP_NONE;
+}
+
+static int read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int ctr_index;
+
+if (env->priv == PRV_M) {
+ctr_index = csrno - CSR_MHPMCOUNTER3H + 3;
+} else {
+ctr_index = csrno - CSR_HPMCOUNTER3H + 3;
+}
+
+*val = env->mhpmcounterh_val[ctr_index];
+
+return RISCV_EXCP_NONE;
+}
+
+
 #if defined(CONFIG_USER_ONLY)
 static RISCVException read_time(CPURISCVState *env, int csrno,
 target_ulong *val)
@@ -3531,157 +3606,244 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_SPMBASE] ={ "spmbase", pointer_masking, read_spmbase, 
write_spmbase },
 
 /* Performance Counters */
-[CSR_HPMCOUNTER3]= { "hpmcounter3",ctr,read_zero },
-[CSR_HPMCOUNTER4]= { "hpmcounter4",ctr,read_zero },
-[CSR_HPMCOUNTER5]= { "hpmcounter5",ctr,read_zero },
-[CSR_HPMCOUNTER6]= { "hpmcounter6",ctr,read_zero },
-[CSR_HPMCOUNTER7]= { "hpmcounter7",ctr,read_zero },
-[CSR_HPMCOUNTER8]= { "hpmcounter8",ctr,read_zero },
-[CSR_HPMCOUNTER9]= { "hpmcounter9",ctr,read_zero },
-[CSR_HPMCOUNTER10]   = { "hpmcounter10",   ctr,read_zero },
-[CSR_HPMCOUNTER11]   = { "hpmcounter11",   ctr,read_zero },
-[CSR_HPMCOUNTER12]   = { "hpmcounter12",   ctr,read_zero },
-[CSR_HPMCOUNTER13]   = { "hpmcounter13",   ctr,read_zero },
-[CSR_HPMCOUNTER14]   = { "hpmcounter14",   ctr,read_zero },
-[CSR_HPMCOUNTER15]   = { "hpmcounter15",   ctr,read_zero },
-[CSR_HPMCOUNTER16]   = { "hpmcounter16",   ctr,read_zero },
-[CSR_HPMCOUNTER17]   = { "hpmcounter17",   ctr,read_zero },
-[CSR_HPMCOUNTER18]   = { "hpmcounter18",   ctr,read_zero },
-[CSR_HPMCOUNTER19]   = { "hpmcounter19",   ctr,read_zero },
-

[PATCH v6 09/12] target/riscv: Simplify counter predicate function

2022-03-03 Thread Atish Patra

All the hpmcounters and the fixed counters (CY, IR, TM) can be represented
as a unified counter. Thus, the predicate function doesn't need handle each
case separately.

Simplify the predicate function so that we just handle things differently
between RV32/RV64 and S/HS mode.

Reviewed-by: Bin Meng 
Acked-by: Alistair Francis 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 111 -
 1 file changed, 10 insertions(+), 101 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index cfcc1a0882d9..771e16d99f39 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -115,6 +115,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
+target_ulong ctr_mask;
 int base_csrno = CSR_CYCLE;
 bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
 
@@ -123,122 +124,30 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 base_csrno += 0x80;
 }
 ctr_index = csrno - base_csrno;
+ctr_mask = BIT(ctr_index);
 
 if ((csrno >= CSR_CYCLE && csrno <= CSR_INSTRET) ||
 (csrno >= CSR_CYCLEH && csrno <= CSR_INSTRETH)) {
 goto skip_ext_pmu_check;
 }
 
-if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs & BIT(ctr_index {
+if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs & ctr_mask))) {
 /* No counter is enabled in PMU or the counter is out of range */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
 skip_ext_pmu_check:
 
-if (env->priv == PRV_S) {
-switch (csrno) {
-case CSR_CYCLE:
-if (!get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_TIME:
-if (!get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_INSTRET:
-if (!get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-}
-if (rv32) {
-switch (csrno) {
-case CSR_CYCLEH:
-if (!get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_TIMEH:
-if (!get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_INSTRETH:
-if (!get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
-if (!get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-}
-}
+if ((env->priv == PRV_S) && (!get_field(env->mcounteren, ctr_mask))) {
+return RISCV_EXCP_ILLEGAL_INST;
 }
 
 if (riscv_cpu_virt_enabled(env)) {
-switch (csrno) {
-case CSR_CYCLE:
-if (!get_field(env->hcounteren, COUNTEREN_CY) &&
-get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_TIME:
-if (!get_field(env->hcounteren, COUNTEREN_TM) &&
-get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_INSTRET:
-if (!get_field(env->hcounteren, COUNTEREN_IR) &&
-get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->hcounteren, 1 << ctr_index) &&
- get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-}
-if (rv32) {
-switch (csrno) {
-case CSR_CYCLEH:
-if (!get_field(env->hcounteren, COUNTEREN_CY) &&
-get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_TIMEH:
-if (!get_field(env->hcounteren, COUNTEREN_TM) &&
-get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-

[PATCH v6 08/12] target/riscv: Add sscofpmf extension support

2022-03-03 Thread Atish Patra

The Sscofpmf ('Ss' for Privileged arch and Supervisor-level extensions,
and 'cofpmf' for Count OverFlow and Privilege Mode Filtering)
extension allows the perf to handle overflow interrupts and filtering
support. This patch provides a framework for programmable
counters to leverage the extension. As the extension doesn't have any
provision for the overflow bit for fixed counters, the fixed events
can also be monitoring using programmable counters. The underlying
counters for cycle and instruction counters are always running. Thus,
a separate timer device is programmed to handle the overflow.

Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c  |   9 ++
 target/riscv/cpu.h  |  25 +++
 target/riscv/cpu_bits.h |  55 +++
 target/riscv/csr.c  | 157 --
 target/riscv/pmu.c  | 346 +++-
 target/riscv/pmu.h  |   7 +
 6 files changed, 588 insertions(+), 11 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 2d2b0b04fbc8..e13d2f81a05c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -22,6 +22,7 @@
 #include "qemu/ctype.h"
 #include "qemu/log.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "internals.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
@@ -689,6 +690,13 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 set_misa(env, env->misa_mxl, ext);
 }
 
+if (cpu->cfg.pmu_num) {
+if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) {
+cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+  riscv_pmu_timer_cb, cpu);
+}
+ }
+
 riscv_cpu_register_gdb_regs_for_features(cs);
 
 qemu_init_vcpu(cs);
@@ -780,6 +788,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
+DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index b49adcccd85d..eaa5a73897eb 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -129,6 +129,8 @@ typedef struct PMUCTRState {
 /* Snapshort value of a counter in RV32 */
 target_ulong mhpmcounterh_prev;
 bool started;
+/* Value beyond UINT32_MAX/UINT64_MAX before overflow interrupt trigger */
+target_ulong irq_overflow_left;
 } PMUCTRState;
 
 struct CPURISCVState {
@@ -281,6 +283,9 @@ struct CPURISCVState {
 /* PMU event selector configured values. First three are unused*/
 target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
 
+/* PMU event selector configured values for RV32*/
+target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
+
 target_ulong sscratch;
 target_ulong mscratch;
 
@@ -400,6 +405,7 @@ struct RISCVCPUConfig {
 bool ext_zhinxmin;
 bool ext_zve32f;
 bool ext_zve64f;
+bool ext_sscofpmf;
 
 /* Vendor-specific custom extensions */
 bool ext_XVentanaCondOps;
@@ -438,6 +444,12 @@ struct RISCVCPU {
 
 /* Configuration Settings */
 RISCVCPUConfig cfg;
+
+QEMUTimer *pmu_timer;
+/* A bitmask of Available programmable counters */
+uint32_t pmu_avail_ctrs;
+/* Mapping of events to counters */
+GHashTable *pmu_event_ctr_map;
 };
 
 static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext)
@@ -697,6 +709,19 @@ enum {
 CSR_TABLE_SIZE = 0x1000
 };
 
+/**
+ * The event id are encoded based on the encoding specified in the
+ * SBI specification v0.3
+ */
+
+enum riscv_pmu_event_idx {
+RISCV_PMU_EVENT_HW_CPU_CYCLES = 0x01,
+RISCV_PMU_EVENT_HW_INSTRUCTIONS = 0x02,
+RISCV_PMU_EVENT_CACHE_DTLB_READ_MISS = 0x10019,
+RISCV_PMU_EVENT_CACHE_DTLB_WRITE_MISS = 0x1001B,
+RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021,
+};
+
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 48b39e6d52a7..da78e2704081 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -400,6 +400,37 @@
 #define CSR_MHPMEVENT29 0x33d
 #define CSR_MHPMEVENT30 0x33e
 #define CSR_MHPMEVENT31 0x33f
+
+#define CSR_MHPMEVENT3H 0x723
+#define CSR_MHPMEVENT4H 0x724
+#define CSR_MHPMEVENT5H 0x725
+#define CSR_MHPMEVENT6H 0x726
+#define CSR_MHPMEVENT7H 0x727
+#define CSR_MHPMEVENT8H 0x728
+#define CSR_MHPMEVENT9H 0x729
+#define CSR_MHPMEVENT10H0x72a
+#define CSR_MHPMEVENT11H0x72b
+#define CSR_MHPMEVENT12H0x72c
+#define CSR_MHPMEVENT13H0x72d
+#define CSR_MHPMEVENT14H0x72e
+#define CSR_MHPMEVENT15H0x72f
+#define CSR_MHPMEVENT16H0x730
+#define CSR_MHPMEVENT17H

[PATCH v6 01/12] target/riscv: Fix PMU CSR predicate function

2022-03-03 Thread Atish Patra

From: Atish Patra 

The predicate function calculates the counter index incorrectly for
hpmcounterx. Fix the counter index to reflect correct CSR number.

Fixes: e39a8320b088 ("target/riscv: Support the Virtual Instruction fault")

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index b1876b3b6466..6102d5e7e24f 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -69,6 +69,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 #if !defined(CONFIG_USER_ONLY)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
+int ctr_index;
 
 if (!cpu->cfg.ext_counters) {
 /* The Counters extensions is not enabled */
@@ -96,8 +97,9 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3)) &&
-get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3))) {
+ctr_index = csrno - CSR_CYCLE;
+if (!get_field(env->hcounteren, 1 << ctr_index) &&
+ get_field(env->mcounteren, 1 << ctr_index)) {
 return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
 }
 break;
@@ -123,8 +125,9 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
-if (!get_field(env->hcounteren, 1 << (csrno - 
CSR_HPMCOUNTER3H)) &&
-get_field(env->mcounteren, 1 << (csrno - 
CSR_HPMCOUNTER3H))) {
+ctr_index = csrno - CSR_CYCLEH;
+if (!get_field(env->hcounteren, 1 << ctr_index) &&
+ get_field(env->mcounteren, 1 << ctr_index)) {
 return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
 }
 break;
-- 
2.30.2

[PATCH v6 05/12] target/riscv: Implement mcountinhibit CSR

2022-03-03 Thread Atish Patra

From: Atish Patra 

As per the privilege specification v1.11, mcountinhibit allows to start/stop
a pmu counter selectively.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h  |  2 ++
 target/riscv/cpu_bits.h |  4 
 target/riscv/csr.c  | 25 +
 target/riscv/machine.c  |  1 +
 4 files changed, 32 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 299dccf286e0..6667ec963707 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -259,6 +259,8 @@ struct CPURISCVState {
 target_ulong scounteren;
 target_ulong mcounteren;
 
+target_ulong mcountinhibit;
+
 target_ulong sscratch;
 target_ulong mscratch;
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index bb47cf7e77a2..48b39e6d52a7 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -367,6 +367,10 @@
 #define CSR_MHPMCOUNTER29   0xb1d
 #define CSR_MHPMCOUNTER30   0xb1e
 #define CSR_MHPMCOUNTER31   0xb1f
+
+/* Machine counter-inhibit register */
+#define CSR_MCOUNTINHIBIT   0x320
+
 #define CSR_MHPMEVENT3  0x323
 #define CSR_MHPMEVENT4  0x324
 #define CSR_MHPMEVENT5  0x325
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 75f18a6323c2..9cfbd60aaeb4 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1395,6 +1395,28 @@ static RISCVException write_mtvec(CPURISCVState *env, 
int csrno,
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException read_mcountinhibit(CPURISCVState *env, int csrno,
+ target_ulong *val)
+{
+if (env->priv_ver < PRIV_VERSION_1_11_0) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+*val = env->mcountinhibit;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno,
+  target_ulong val)
+{
+if (env->priv_ver < PRIV_VERSION_1_11_0) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+env->mcountinhibit = val;
+return RISCV_EXCP_NONE;
+}
+
 static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
   target_ulong *val)
 {
@@ -3569,6 +3591,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_MHPMCOUNTER30]  = { "mhpmcounter30",  mctr,   read_zero },
 [CSR_MHPMCOUNTER31]  = { "mhpmcounter31",  mctr,   read_zero },
 
+[CSR_MCOUNTINHIBIT]  = { "mcountinhibit",   any,read_mcountinhibit,
+   write_mcountinhibit },
+
 [CSR_MHPMEVENT3] = { "mhpmevent3", any,read_zero },
 [CSR_MHPMEVENT4] = { "mhpmevent4", any,read_zero },
 [CSR_MHPMEVENT5] = { "mhpmevent5", any,read_zero },
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 243f567949ea..2a48bcf81d3d 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -299,6 +299,7 @@ const VMStateDescription vmstate_riscv_cpu = {
 VMSTATE_UINTTL(env.siselect, RISCVCPU),
 VMSTATE_UINTTL(env.scounteren, RISCVCPU),
 VMSTATE_UINTTL(env.mcounteren, RISCVCPU),
+VMSTATE_UINTTL(env.mcountinhibit, RISCVCPU),
 VMSTATE_UINTTL(env.sscratch, RISCVCPU),
 VMSTATE_UINTTL(env.mscratch, RISCVCPU),
 VMSTATE_UINT64(env.mfromhost, RISCVCPU),
-- 
2.30.2

[PATCH v6 03/12] target/riscv: pmu: Rename the counters extension to pmu

2022-03-03 Thread Atish Patra

From: Atish Patra 

The PMU counters are supported via cpu config "Counters" which doesn't
indicate the correct purpose of those counters.

Rename the config property to pmu to indicate that these counters
are performance monitoring counters. This aligns with cpu options for
ARM architecture as well.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c | 2 +-
 target/riscv/cpu.h | 2 +-
 target/riscv/csr.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 58c67148c235..3b781ef455ff 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -779,7 +779,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
-DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
+DEFINE_PROP_BOOL("pmu", RISCVCPU, cfg.ext_pmu, true),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index d67e0ba6f7e1..9bece5056c63 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -365,7 +365,7 @@ struct RISCVCPUConfig {
 bool ext_zbb;
 bool ext_zbc;
 bool ext_zbs;
-bool ext_counters;
+bool ext_pmu;
 bool ext_ifencei;
 bool ext_icsr;
 bool ext_svinval;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index cb4366b30095..88ae827ba174 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -71,8 +71,8 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
 
-if (!cpu->cfg.ext_counters) {
-/* The Counters extensions is not enabled */
+if (!cpu->cfg.ext_pmu) {
+/* The PMU extension is not enabled */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
-- 
2.30.2

[PATCH v6 04/12] target/riscv: pmu: Make number of counters configurable

2022-03-03 Thread Atish Patra

The RISC-V privilege specification provides flexibility to implement
any number of counters from 29 programmable counters. However, the QEMU
implements all the counters.

Make it configurable through pmu config parameter which now will indicate
how many programmable counters should be implemented by the cpu.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c |  2 +-
 target/riscv/cpu.h |  2 +-
 target/riscv/csr.c | 96 ++
 3 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 3b781ef455ff..2d2b0b04fbc8 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -779,7 +779,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
-DEFINE_PROP_BOOL("pmu", RISCVCPU, cfg.ext_pmu, true),
+DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 9bece5056c63..299dccf286e0 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -365,7 +365,6 @@ struct RISCVCPUConfig {
 bool ext_zbb;
 bool ext_zbc;
 bool ext_zbs;
-bool ext_pmu;
 bool ext_ifencei;
 bool ext_icsr;
 bool ext_svinval;
@@ -383,6 +382,7 @@ struct RISCVCPUConfig {
 /* Vendor-specific custom extensions */
 bool ext_XVentanaCondOps;
 
+uint8_t pmu_num;
 char *priv_spec;
 char *user_spec;
 char *bext_spec;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 88ae827ba174..75f18a6323c2 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -64,15 +64,45 @@ static RISCVException vs(CPURISCVState *env, int csrno)
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
+static RISCVException mctr(CPURISCVState *env, int csrno)
+{
+#if !defined(CONFIG_USER_ONLY)
+CPUState *cs = env_cpu(env);
+RISCVCPU *cpu = RISCV_CPU(cs);
+int ctr_index;
+int base_csrno = CSR_MHPMCOUNTER3;
+
+if ((riscv_cpu_mxl(env) == MXL_RV32) && csrno >= CSR_MCYCLEH) {
+/* Offset for RV32 mhpmcounternh counters */
+base_csrno += 0x80;
+}
+ctr_index = csrno - base_csrno;
+if (!cpu->cfg.pmu_num || ctr_index >= cpu->cfg.pmu_num) {
+/* The PMU is not enabled or counter is out of range*/
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+#endif
+return RISCV_EXCP_NONE;
+}
+
 static RISCVException ctr(CPURISCVState *env, int csrno)
 {
 #if !defined(CONFIG_USER_ONLY)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
+int base_csrno = CSR_HPMCOUNTER3;
+bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
+
+if (rv32 && csrno >= CSR_CYCLEH) {
+/* Offset for RV32 hpmcounternh counters */
+base_csrno += 0x80;
+}
+ctr_index = csrno - base_csrno;
 
-if (!cpu->cfg.ext_pmu) {
-/* The PMU extension is not enabled */
+if (!cpu->cfg.pmu_num || ctr_index >= (cpu->cfg.pmu_num)) {
+/* No counter is enabled in PMU or the counter is out of range */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
@@ -100,7 +130,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 }
-if (riscv_cpu_is_32bit(env)) {
+if (rv32) {
 switch (csrno) {
 case CSR_CYCLEH:
 if (!get_field(env->mcounteren, COUNTEREN_CY)) {
@@ -155,7 +185,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 }
-if (riscv_cpu_mxl(env) == MXL_RV32) {
+if (rv32) {
 switch (csrno) {
 case CSR_CYCLEH:
 if (!get_field(env->hcounteren, COUNTEREN_CY) &&
@@ -3509,35 +3539,35 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_HPMCOUNTER30]   = { "hpmcounter30",   ctr,read_zero },
 [CSR_HPMCOUNTER31]   = { "hpmcounter31",   ctr,read_zero },
 
-[CSR_MHPMCOUNTER3]   = { "mhpmcounter3",   any,read_zero },
-[CSR_MHPMCOUNTER4]   = { "mhpmcounter4",   any,read_zero },
-[CSR_MHPMCOUNTER5]   = { "mhpmcounter5",   any,read_zero },
-[CSR_MHPMCOUNTER6]   = { "mhpmcounter6",   any,read_zero },
-[CSR_MHPMCOUNTER7]   = { "mhpmcounter7",   any,read_zero },
-[CSR_MHPMCOUNTER8]   = { "mhpmcounter8",   any,read_zero },
-[CSR_MHPMCOUNTER9]   = { "mhpmcounter9",   any,read_zero },
-[CSR_MHPMCOUNTER10]  = { "mhpmcounter10",  any,read_zero },
-[CSR_MHPMCOUNTER11]  = { "mhpmcounter11",  any,read_zero },
-[CSR_MHPMCOUNTER12]  = { "mhpmcounter12",  any,read_zero },
-

[PATCH v6 02/12] target/riscv: Implement PMU CSR predicate function for S-mode

2022-03-03 Thread Atish Patra

From: Atish Patra 

Currently, the predicate function for PMU related CSRs only works if
virtualization is enabled. It also does not check mcounteren bits before
before cycle/minstret/hpmcounterx access.

Support supervisor mode access in the predicate function as well.

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 51 ++
 1 file changed, 51 insertions(+)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 6102d5e7e24f..cb4366b30095 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -76,6 +76,57 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
+if (env->priv == PRV_S) {
+switch (csrno) {
+case CSR_CYCLE:
+if (!get_field(env->mcounteren, COUNTEREN_CY)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_TIME:
+if (!get_field(env->mcounteren, COUNTEREN_TM)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_INSTRET:
+if (!get_field(env->mcounteren, COUNTEREN_IR)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
+ctr_index = csrno - CSR_CYCLE;
+if (!get_field(env->mcounteren, 1 << ctr_index)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+}
+if (riscv_cpu_is_32bit(env)) {
+switch (csrno) {
+case CSR_CYCLEH:
+if (!get_field(env->mcounteren, COUNTEREN_CY)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_TIMEH:
+if (!get_field(env->mcounteren, COUNTEREN_TM)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_INSTRETH:
+if (!get_field(env->mcounteren, COUNTEREN_IR)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
+ctr_index = csrno - CSR_CYCLEH;
+if (!get_field(env->mcounteren, 1 << ctr_index)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+}
+}
+}
+
 if (riscv_cpu_virt_enabled(env)) {
 switch (csrno) {
 case CSR_CYCLE:
-- 
2.30.2

[PATCH v6 00/12] Improve PMU support

2022-03-03 Thread Atish Patra

The latest version of the SBI specification includes a Performance Monitoring
Unit(PMU) extension[1] which allows the supervisor to start/stop/configure
various PMU events. The Sscofpmf ('Ss' for Privileged arch and Supervisor-level
extensions, and 'cofpmf' for Count OverFlow and Privilege Mode Filtering)
extension[2] allows the perf like tool to handle overflow interrupts and
filtering support.

This series implements full PMU infrastructure to support
PMU in virt machine. This will allow us to add any PMU events in future.

Currently, this series enables the following omu events.
1. cycle count
2. instruction count
3. DTLB load/store miss
4. ITLB prefetch miss

The first two are computed using host ticks while last three are counted during
cpu_tlb_fill. We can do both sampling and count from guest userspace.
This series has been tested on both RV64 and RV32. Both Linux[3] and Opensbi[4]
patches are required to get the perf working.

Here is an output of perf stat/report while running hackbench with OpenSBI & 
Linux
kernel patches applied [3].

Perf stat:
==
[root@fedora-riscv ~]# perf stat -e cycles -e instructions -e dTLB-load-misses 
-e dTLB-store-misses -e iTLB-load-misses \
> perf bench sched messaging -g 1 -l 10
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 1 groups == 40 processes run

 Total time: 0.265 [sec]

 Performance counter stats for 'perf bench sched messaging -g 1 -l 10':

 4,167,825,362  cycles  

 4,166,609,256  instructions  #1.00  insn per cycle 

 3,092,026  dTLB-load-misses

   258,280  dTLB-store-misses   

 2,068,966  iTLB-load-misses


   0.585791767 seconds time elapsed

   0.373802000 seconds user
   1.042359000 seconds sys

Perf record:

[root@fedora-riscv ~]# perf record -e cycles -e instructions \
> -e dTLB-load-misses -e dTLB-store-misses -e iTLB-load-misses -c 1 \
> perf bench sched messaging -g 1 -l 10
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 1 groups == 40 processes run

 Total time: 1.397 [sec]
[ perf record: Woken up 10 times to write data ]
Check IO/CPU overload!
[ perf record: Captured and wrote 8.211 MB perf.data (214486 samples) ]

[root@fedora-riscv riscv]# perf report
Available samples   
107K cycles◆
107K instructions  ▒
250 dTLB-load-misses   ▒
13 dTLB-store-misses   ▒
172 iTLB-load-misses  
..

Changes from v5->v6:
1. Fixed compilation issue with PATCH 1.
2. Addressed other comments.

Changes from v4->v5:
1. Rebased on top of the -next with following patches.
   - isa extension
   - priv 1.12 spec
2. Addressed all the comments on v4
3. Removed additional isa-ext DT node in favor of riscv,isa string update

Changes from v3->v4:
1. Removed the dummy events from pmu DT node.
2. Fixed pmu_avail_counters mask generation.
3. Added a patch to simplify the predicate function for counters. 

Changes from v2->v3:
1. Addressed all the comments on PATCH1-4.
2. Split patch1 into two separate patches.
3. Added explicit comments to explain the event types in DT node.
4. Rebased on latest Qemu.

Changes from v1->v2:
1. Dropped the ACks from v1 as signficant changes happened after v1.
2. sscofpmf support.
3. A generic counter management framework.

[1] https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/riscv-sbi.adoc
[2] https://drive.google.com/file/d/171j4jFjIkKdj5LWcExphq4xG_2sihbfd/edit
[3] https://github.com/atishp04/linux/tree/riscv_pmu_v6
[4] https://github.com/atishp04/qemu/tree/riscv_pmu_v5

Atish Patra (12):
target/riscv: Fix PMU CSR predicate function
target/riscv: Implement PMU CSR predicate function for S-mode
target/riscv: pmu: Rename the counters extension to pmu
target/riscv: pmu: Make number of counters configurable
target/riscv: Implement mcountinhibit CSR
target/riscv: Add support for hpmcounters/hpmevents
target/riscv: Support mcycle/minstret write operation
target/riscv: Add sscofpmf extension support
target/riscv: Simplify counter predicate function
target/riscv: Add few cache related PMU events
hw/riscv: virt: Add PMU DT node to the device tree
target/riscv: Update the privilege field for sscofpmf CSRs

hw/riscv/virt.c   |  28 ++
target/riscv/cpu.c|  12 +-
target/riscv/cpu.h|  49 ++-
target/riscv/cpu_bits.h   |  59 +++
target/riscv/cpu_helper.c |  26 ++
target/riscv/csr.c| 862 --
target/riscv/machine.c

Re: [PATCH V7 18/29] vfio-pci: refactor for cpr

2022-03-03 Thread Alex Williamson

On Wed, 22 Dec 2021 11:05:23 -0800
Steve Sistare  wrote:

> +if (vfio_notifier_init(vdev, >intx.unmask, "intx-unmask", 0)) {
...
> +vfio_notifier_cleanup(vdev, >intx.unmask, "intx-unmask", 0);
...
> +vfio_notifier_cleanup(vdev, >intx.unmask, "intx-unmask", 0);
...
> +ret = vfio_notifier_init(vdev, >intx.interrupt, "intx-interrupt", 
> 0);
...
> +vfio_notifier_cleanup(vdev, >intx.interrupt, "intx-interrupt", 
> 0);
...
> +vfio_notifier_cleanup(vdev, >intx.interrupt, "intx-interrupt", 0);
...
> +const char *name = "kvm_interrupt";
...
> +if (vfio_notifier_init(vdev, >kvm_interrupt, name, nr)) {
...
> +vfio_notifier_cleanup(vdev, >kvm_interrupt, name, nr);
...
> +vfio_notifier_cleanup(vdev, >kvm_interrupt, name, nr);
...
> +vfio_notifier_cleanup(vdev, >kvm_interrupt, "kvm_interrupt", nr);
...
> +if (vfio_notifier_init(vdev, >interrupt, "interrupt", nr)) {
...
> +if (vfio_notifier_init(vdev, >interrupt, "interrupt", i)) {
...
> +vfio_notifier_cleanup(vdev, >interrupt, "interrupt", i);
...
> +vfio_notifier_cleanup(vdev, >interrupt, "interrupt", i);
...
> +if (vfio_notifier_init(vdev, >err_notifier, "err", 0)) {
...
> +vfio_notifier_cleanup(vdev, >err_notifier, "err_notifier", 0);
...
> +vfio_notifier_cleanup(vdev, >err_notifier, "err_notifier", 0);
...
> +if (vfio_notifier_init(vdev, >req_notifier, "req", 0)) {
...
> +vfio_notifier_cleanup(vdev, >req_notifier, "req_notifier", 0);
...
> +vfio_notifier_cleanup(vdev, >req_notifier, "req_notifier", 0);

Something seems to have gone astray with "err" and "req" vs
"err_notifier" and "req_notifier".  The pattern is broken.  Thanks,

Alex

Re: [PATCH 4/9] util/oslib-win32: Return NULL on qemu_try_memalign() with zero size

2022-03-03 Thread Richard Henderson


On 3/3/22 06:55, Peter Maydell wrote:

Alternately, force size == 1, so that we always get a non-NULL value that can 
be freed.
That's a change on the POSIX side as well, of course.


Yes, I had a look at what actual malloc() implementations tend
to do, and the answer seems to be that forcing size to 1 gives
less weird behaviour for the application. So here that would be

if (size == 0) {
size++;
}
ptr = _aligned_malloc(size, alignment);

We don't need to do anything on the POSIX side (unless we want to
enforce consistency of handling the size==0 case).


I would do this unconditionally.  The POSIX manpage says that either NULL or a unique 
pointer is a valid return value into *memptr here for size == 0.  What we want in our 
caller is NULL if and only if error.



I'd quite like to get this series in before softfreeze (though mostly
just for my personal convenience so it's not hanging around as a
loose end I have to come back to after we reopen for 7.1). Does anybody
object if I squash in that change and put this in a pullrequest,
or would you prefer to see a v2 series first?


I'm happy with a squash and PR.


r~

Re: [PATCH 04/12] qemu-nbd: add --tls-hostname option for TLS certificate validation

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 04:03:22PM +, Daniel P. Berrangé wrote:
> When using the --list option, qemu-nbd acts as an NBD client rather
> than a server. As such when using TLS, it has a need to validate
> the server certificate. This adds a --tls-hostname option which can
> be used to override the default hostname used for certificate
> validation.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  docs/tools/qemu-nbd.rst | 14 ++
>  qemu-nbd.c  | 17 -
>  2 files changed, 30 insertions(+), 1 deletion(-)
> 
> diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
> index 6031f96893..acce54a39d 100644
> --- a/docs/tools/qemu-nbd.rst
> +++ b/docs/tools/qemu-nbd.rst
> @@ -169,6 +169,20 @@ driver options if ``--image-opts`` is specified.
>option; or provide the credentials needed for connecting as a client
>in list mode.
>  
> +.. option:: --tls-hostname=hostname
> +
> +  When validating an x509 certificate received over a TLS connection,
> +  the hostname that the NBD client used to connect will be checked
> +  against information in the server provided certificate. Sometimes
> +  it might be required to override the hostname used to perform this
> +  check. For example if the NBD client is using a tunnel from localhost
> +  to connect to the remote server. In this case the `--tls-hostname`

For example, if the ... to the remote server, the `--tls-hostname`


> +  option should be used to set the officially expected hostname of
> +  the remote NBD server. This can also be used if accessing NBD over
> +  a UNIX socket where there is no inherant hostname available. This

inherent

> +  only is only permitted when acting as a NBD client with the `--list`

s/only is/is/

> +  option.
> +
>  .. option:: --fork
>  

> @@ -835,6 +841,10 @@ int main(int argc, char **argv)
>  error_report("TLS authorization is incompatible with export 
> list");
>  exit(EXIT_FAILURE);
>  }
> +if (tlshostname && !list) {
> +error_report("TLS hostname is only required with export list");

maybe s/required/supported/

> +exit(EXIT_FAILURE);
> +}
>  tlscreds = nbd_get_tls_creds(tlscredsid, list, _err);
>  if (local_err) {
>  error_reportf_err(local_err, "Failed to get TLS creds: ");
> @@ -845,6 +855,10 @@ int main(int argc, char **argv)
>  error_report("--tls-authz is not permitted without --tls-creds");
>  exit(EXIT_FAILURE);
>  }
> +if (tlshostname) {
> +error_report("--tls-hostname is not permitted without 
> --tls-creds");
> +exit(EXIT_FAILURE);
> +}
>  }
>  
>  if (selinux_label) {
> @@ -861,7 +875,8 @@ int main(int argc, char **argv)
>  
>  if (list) {
>  saddr = nbd_build_socket_address(sockpath, bindto, port);
> -return qemu_nbd_client_list(saddr, tlscreds, bindto);
> +return qemu_nbd_client_list(saddr, tlscreds,
> +tlshostname ? tlshostname : bindto);

With the grammar fixes,

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 1/5] python/utils: add add_visual_margin() text decoration utility

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 03:58:58PM -0500, John Snow wrote:
> >>> print(add_visual_margin(msg, width=72, name="Commit Message"))
> ┏━ Commit Message ━━
> ┃ add_visual_margin() takes a chunk of text and wraps it in a visual
> ┃ container that force-wraps to a specified width. An optional title
> ┃ label may be given, and any of the individual glyphs used to draw the
> ┃ box may be replaced or specified as well.
> ┗━━━

I see you dropped the right margin compared to earlier versions, but
agree that this is still a nice visual indicator, and probably easier
to maintain in this form.  And it got rid of the weird spacing on the
left when the wrap point hit at the wrong time.

> +Decorate and wrap some text with a visual decoration around it.
> +
> +This function assumes that the text decoration characters are single
> +characters that display using a single monospace column.
> +
> +┏━ Example ━
> +┃ This is what this function looks like with text content that's
> +┃ wrapped to 72 characters. The right-hand margin is left open to
> +┃ acommodate the occasional unicode character that might make
> +┃ predicting the total "visual" width of a line difficult. This
> +┃ provides a visual distinction that's good-enough, though.
> +┗━━━

Yep - hand-waving away Unicode messiness is certainly easiest ;)

Reviewed-by: Eric Blake 

[take with a grain of salt - my python is weak. But as you said in the
cover letter, it's fairly straightforward to reproduce an environment
where you can see it in action for hands-on testing]

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 00/10] Python: Fix qmp race condition on accept()

2022-03-03 Thread John Snow

Ping - Dan, Kevin: Any thoughts on the new API here? I only ask
because there was a bit of feedback in response to the last patch and
I didn't want to stage this without giving you a fair chance to look.

I'm not expecting any real review on the Python, just wanted to see if
you felt like this design adequately addressed your feedback from
before.

Here is the super high level:

1. what was accept() is renamed start_server_and_accept(). It's a
one-shot command to do (effectively) bind(2) listen(2) accept(2).
2. start_server() method is added. It calls bind() and listen(), and
new connections will be accept(2)'d asynchronously.
3. accept() method is added. It does not *actually* call accept(2),
it's more like "wait for accept(2) to be called in the bottom half".
Similar enough.
4. start_server_and_accept() is, by the end of the series, literally
just two calls to start_server() and accept().

Otherwise, I'll just assume no news is good news and I'll send a
pullreq soon so that Peter's NetBSD tests stop being flaky.

--js

On Fri, Feb 25, 2022 at 4:00 PM John Snow  wrote:
>
> GitLab: https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-accept-changes
> CI: https://gitlab.com/jsnow/qemu/-/pipelines/479795153
>
> This redesigns the async QMP interface to allow for race-free
> connections from the synchronous interface. It should hopefully address
> the race conditions Peter has been seeing on the NetBSD vm tests.
>
> John Snow (10):
>   python/aqmp: add _session_guard()
>   python/aqmp: rename 'accept()' to 'start_server_and_accept()'
>   python/aqmp: remove _new_session and _establish_connection
>   python/aqmp: split _client_connected_cb() out as _incoming()
>   python/aqmp: squelch pylint warning for too many lines
>   python/aqmp: refactor _do_accept() into two distinct steps
>   python/aqmp: stop the server during disconnect()
>   python/aqmp: add start_server() and accept() methods
>   python/aqmp: fix race condition in legacy.py
>   python/aqmp: drop _bind_hack()
>
>  python/qemu/aqmp/legacy.py   |   7 +-
>  python/qemu/aqmp/protocol.py | 393 +--
>  python/tests/protocol.py |  45 ++--
>  3 files changed, 273 insertions(+), 172 deletions(-)
>
> --
> 2.34.1
>
>

Re: [PATCH v2 4/4] iotests/185: Add post-READY quit tests

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 05:48:14PM +0100, Hanna Reitz wrote:
> 185 tests quitting qemu while a block job is active.  It does not
> specifically test quitting qemu while a mirror or active commit job is
> in its READY phase.
> 
> Add two test cases for this, where we respectively mirror or commit to
> an external QSD instance, which provides a throttled block device.  qemu
> is supposed to cancel the job so that it can quit as soon as possible
> instead of waiting for the job to complete (which it did before 6.2).
> 
> Signed-off-by: Hanna Reitz 
> ---
>  tests/qemu-iotests/185 | 190 -
>  tests/qemu-iotests/185.out |  48 ++
>  2 files changed, 237 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
> index f2ec5c5ceb..8b1143dc16 100755
> --- a/tests/qemu-iotests/185
> +++ b/tests/qemu-iotests/185
> @@ -33,6 +33,12 @@ _cleanup()
>  _rm_test_img "${TEST_IMG}.copy"
>  _cleanup_test_img
>  _cleanup_qemu
> +
> +if [ -f "$TEST_DIR/qsd.pid" ]; then
> +kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")"
> +rm -f "$TEST_DIR/qsd.pid"
> +fi
> +rm -f "$SOCK_DIR/qsd.sock"
>  }
>  trap "_cleanup; exit \$status" 0 1 2 3 15
>  
> @@ -45,7 +51,7 @@ _supported_fmt qcow2
>  _supported_proto file
>  _supported_os Linux
>  
> -size=64M
> +size=$((64 * 1048576))

I tend to write $((64 * 1024 * 1024)) rather than remembering all the
digits of 2^20, but your way is fine.

Nice test addition!

Reviewed-by: Eric Blake 

I'm happy to queue this series through my NBD tree in time for
softfreeze, if no one else speaks for it first.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v2 3/4] qsd: Add --daemonize

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 05:48:13PM +0100, Hanna Reitz wrote:
> To implement this, we reuse the existing daemonizing functions from the
> system emulator, which mainly do the following:
> - Fork off a child process, and set up a pipe between parent and child
> - The parent process waits until the child sends a status byte over the
>   pipe (0 means that the child was set up successfully; anything else
>   (including errors or EOF) means that the child was not set up
>   successfully), and then exits with an appropriate exit status
> - The child process enters a new session (forking off again), changes
>   the umask, and will ignore terminal signals from then on
> - Once set-up is complete, the child will chdir to /, redirect all
>   standard I/O streams to /dev/null, and tell the parent that set-up has
>   been completed successfully
> 
> In contrast to qemu-nbd's --fork implementation, during the set up
> phase, error messages are not piped through the parent process.
> qemu-nbd mainly does this to detect errors, though (while os_daemonize()
> has the child explicitly signal success after set up); because we do not
> redirect stderr after forking, error messages continue to appear on
> whatever the parent's stderr was (until set up is complete).
> 
> Signed-off-by: Hanna Reitz 
> ---
>  docs/tools/qemu-storage-daemon.rst   |  7 +++
>  storage-daemon/qemu-storage-daemon.c | 15 +++
>  2 files changed, 22 insertions(+)
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v2 2/4] qsd: Add pre-init argument parsing pass

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 05:48:12PM +0100, Hanna Reitz wrote:
> In contrast to qemu-nbd (where it is called --fork) and the system
> emulator, QSD does not have a --daemonize switch yet.  Just like them,
> QSD allows setting up block devices and exports on the command line.
> When doing so, it is often necessary for whoever invoked the QSD to wait
> until these exports are fully set up.  A --daemonize switch allows
> precisely this, by virtue of the parent process exiting once everything
> is set up.
> 
> Note that there are alternative ways of waiting for all exports to be
> set up, for example:
> - Passing the --pidfile option and waiting until the respective file
>   exists (but I do not know if there is a way of implementing this
>   without a busy wait loop)

Non-portably, you could use inotify or similar, to get a true
event-driven wakeup when the file is created.  And here's the python
glue that libnbd uses, instead of --pidfile:

https://gitlab.com/nbdkit/libnbd/-/blob/master/interop/interop-qemu-storage-daemon.sh#L58

> - Set up some network server (e.g. on a Unix socket) and have the QSD
>   connect to it after all arguments have been processed by appending
>   corresponding --chardev and --monitor options to the command line,
>   and then wait until the QSD connects
> 
> Having a --daemonize option would make this simpler, though, without
> having to rely on additional tools (to set up a network server) or busy
> waiting.
> 
> Implementing a --daemonize switch means having to fork the QSD process.
> Ideally, we should do this as early as possible: All the parent process
> has to do is to wait for the child process to signal completion of its
> set-up phase, and therefore there is basically no initialization that
> needs to be done before the fork.  On the other hand, forking after
> initialization steps means having to consider how those steps (like
> setting up the block layer or QMP) interact with a later fork, which is
> often not trivial.
> 
> In order to fork this early, we must scan the command line for
> --daemonize long before our current process_options() call.  Instead of
> adding custom new code to do so, just reuse process_options() and give
> it a @pre_init_pass argument to distinguish the two passes.  I believe
> there are some other switches but --daemonize that deserve parsing in

s/but/beyond/

> the first pass:
> 
> - --help and --version are supposed to only print some text and then
>   immediately exit (so any initialization we do would be for naught).
>   This changes behavior, because now "--blockdev inv-drv --help" will
>   print a help text instead of complaining about the --blockdev
>   argument.
>   Note that this is similar in behavior to other tools, though: "--help"
>   is generally immediately acted upon when finding it in the argument
>   list, potentially before other arguments (even ones before it) are
>   acted on.  For example, "ls /does-not-exist --help" prints a help text
>   and does not complain about ENOENT.

Well, GNU ls does that, but only if POSIXLY_CORRECT is not set (a
strict POSIX ls must give you two ENOENT).

> 
> - --pidfile does not need initialization, and is already exempted from
>   the sequential order that process_options() claims to strictly follow
>   (the PID file is only created after all arguments are processed, not
>   at the time the --pidfile argument appears), so it makes sense to
>   include it in the same category as --daemonize.
> 
> - Invalid arguments should always be reported as soon as possible.  (The
>   same caveat with --help applies: That means that "--blockdev inv-drv
>   --inv-arg" will now complain about --inv-arg, not inv-drv.)
> 
> This patch does make some references to --daemonize without having
> implemented it yet, but that will happen in the next patch.
> 
> Signed-off-by: Hanna Reitz 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> ---
>  storage-daemon/qemu-storage-daemon.c | 43 
>  1 file changed, 38 insertions(+), 5 deletions(-)
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 03/12] block/nbd: support override of hostname for TLS certificate validation

2022-03-03 Thread Eric Blake

On Thu, Mar 03, 2022 at 04:03:21PM +, Daniel P. Berrangé wrote:
> When connecting to an NBD server with TLS and x509 credentials,
> the client must validate the hostname it uses for the connection,
> against that published in the server's certificate. If the client
> is tunnelling its connection over some other channel, however, the
> hostname it uses may not match the info reported in the server's
> certificate. In such a case, the user needs to explicitly set an
> override for the hostname to use for certificate validation.
> 
> This is achieved by adding a 'tls-hostname' property to the NBD
> block driver.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  block/nbd.c  | 18 +++---
>  qapi/block-core.json |  3 +++
>  2 files changed, 18 insertions(+), 3 deletions(-)
> 
> +++ b/qapi/block-core.json
> @@ -4078,6 +4078,8 @@
>  #
>  # @tls-creds: TLS credentials ID
>  #
> +# @tls-hostname: TLS hostname override for certificate validation

Add the tag '(since 7.0)' (in the interest of soft freeze deadlines, I
can do that as part of queuing through my NBD tree if nothing else
major turns up in the series), and you can have:

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 4/5] hw/intc/arm_gicv3: Fix missing spaces in error log messages

2022-03-03 Thread Richard Henderson


On 3/3/22 10:23, Peter Maydell wrote:

We forgot a space in some log messages, so the output ended
up looking like
gicv3_dist_write: invalid guest write at offset 8000size 8

with a missing space before "size". Add the missing spaces.

Signed-off-by: Peter Maydell
---
  hw/intc/arm_gicv3_dist.c | 4 ++--
  hw/intc/arm_gicv3_its.c  | 4 ++--
  2 files changed, 4 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 5/5] hw/intc/arm_gicv3_cpuif: Fix register names in ICV_HPPIR read trace event

2022-03-03 Thread Richard Henderson


On 3/3/22 10:23, Peter Maydell wrote:

The trace_gicv3_icv_hppir_read trace event takes an integer value
which it uses to form the register name, which should be either
ICV_HPPIR0 or ICV_HPPIR1.  We were passing in the 'grp' variable for
this, but that is either GICV3_G0 or GICV3_G1NS, which happen to be 0
and 2, which meant that tracing for the ICV_HPPIR1 register was
incorrectly printed as ICV_HPPIR2.

Use the same approach we do for all the other similar trace events,
and pass in 'ri->crm == 8 ?  0 : 1', deriving the index value
directly from the ARMCPRegInfo struct.

Signed-off-by: Peter Maydell 


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 3/5] hw/intc/arm_gicv3: Specify valid and impl in MemoryRegionOps

2022-03-03 Thread Richard Henderson


On 3/3/22 10:23, Peter Maydell wrote:

The GICv3 has some registers that support byte accesses, and some
that support 8-byte accesses.  Our TCG implementation implements all
of this, switching on the 'size' argument and handling the registers
that must support reads of that size while logging an error for
attempted accesses to registers that do not support that size access.
However we forgot to tell the core memory subsystem about this by
specifying the .impl and .valid fields in the MemoryRegionOps struct,
so the core was happily simulating 8 byte accesses by combining two 4
byte accesses.  This doesn't have much guest-visible effect, since
there aren't many 8 byte registers and they all support being written
in two 4 byte parts.

Set the .impl and .valid fields to say that all sizes from 1 to 8
bytes are both valid and implemented by the device.

Signed-off-by: Peter Maydell 


Reviewed-by: Richard Henderson 


r~

Re: [PATCH 2/5] hw/intc/arm_gicv3_its: Add trace events for table reads and writes

2022-03-03 Thread Richard Henderson


On 3/3/22 10:23, Peter Maydell wrote:

For debugging guest use of the ITS, it can be helpful to trace
when the ITS reads and writes the in-memory tables.

Signed-off-by: Peter Maydell
---
  hw/intc/arm_gicv3_its.c | 37 +++--
  hw/intc/trace-events|  9 +
  2 files changed, 40 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 1/5] hw/intc/arm_gicv3_its: Add trace events for commands

2022-03-03 Thread Richard Henderson


On 3/3/22 10:23, Peter Maydell wrote:

When debugging code that's using the ITS, it's helpful to
see tracing of the ITS commands that the guest executes. Add
suitable trace events.

Signed-off-by: Peter Maydell
---
  hw/intc/arm_gicv3_its.c | 28 ++--
  hw/intc/trace-events| 12 
  2 files changed, 38 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v2 5/5] tests/tcg/ppc64le: Use Altivec register names in clobbler list

2022-03-03 Thread Richard Henderson


On 3/3/22 10:53, Matheus K. Ferst wrote:

On 03/03/2022 16:30, Richard Henderson wrote:

On 3/3/22 07:20, matheus.fe...@eldorado.org.br wrote:

From: Matheus Ferst 

LLVM/Clang doesn't know the VSX registers when compiling with
-mabi=elfv1. Use only registers >= 32 and list them with their Altivec
name.

Signed-off-by: Matheus Ferst 


This description isn't quite right.  The change to the m[tf]vsr insns is a 
generic bug
fix, and not related to Clang.



I'm not sure if I understood. I'm targeting the Clang problem with this patch, is 
something else being fixed by this change?
AFAICT, the old "mtvsrd 0, %2" and "mfvsrd %0, 0" were correct, I'm just changing from VSR 
0 to VSR 32 to allow the clobber with Clang, but GCC doesn't seem to have this limitation 
with ELFv1.


Oh, whoops, I mis-read the patch.

Reviewed-by: Richard Henderson 


r~

[PULL 23/30] accel/tcg: Split out g2h_tlbe

2022-03-03 Thread Richard Henderson

Create a new function to combine a CPUTLBEntry addend
with the guest address to form a host address.

Reviewed-by: WANG Xuerui 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 accel/tcg/cputlb.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 3b918fe018..0e62aa5d7c 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -91,6 +91,11 @@ static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
 return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
 }
 
+static inline uintptr_t g2h_tlbe(const CPUTLBEntry *tlb, target_ulong gaddr)
+{
+return tlb->addend + (uintptr_t)gaddr;
+}
+
 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  size_t max_entries)
 {
@@ -986,8 +991,7 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry 
*tlb_entry,
 
 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
-addr &= TARGET_PAGE_MASK;
-addr += tlb_entry->addend;
+addr = g2h_tlbe(tlb_entry, addr & TARGET_PAGE_MASK);
 if ((addr - start) < length) {
 #if TCG_OVERSIZED_GUEST
 tlb_entry->addr_write |= TLB_NOTDIRTY;
@@ -1537,7 +1541,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState 
*env, target_ulong addr,
 return -1;
 }
 
-p = (void *)((uintptr_t)addr + entry->addend);
+p = (void *)g2h_tlbe(entry, addr);
 if (hostp) {
 *hostp = p;
 }
@@ -1629,7 +1633,7 @@ static int probe_access_internal(CPUArchState *env, 
target_ulong addr,
 }
 
 /* Everything else is RAM. */
-*phost = (void *)((uintptr_t)addr + entry->addend);
+*phost = (void *)g2h_tlbe(entry, addr);
 return flags;
 }
 
@@ -1737,7 +1741,7 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, 
int mmu_idx,
 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 } else {
 data->is_io = false;
-data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
+data->v.ram.hostaddr = (void *)g2h_tlbe(tlbe, addr);
 }
 return true;
 } else {
@@ -1836,7 +1840,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 goto stop_the_world;
 }
 
-hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
+hostaddr = (void *)g2h_tlbe(tlbe, addr);
 
 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
 notdirty_write(env_cpu(env), addr, size,
@@ -1967,7 +1971,7 @@ load_helper(CPUArchState *env, target_ulong addr, 
MemOpIdx oi,
 access_type, op ^ (need_swap * MO_BSWAP));
 }
 
-haddr = (void *)((uintptr_t)addr + entry->addend);
+haddr = (void *)g2h_tlbe(entry, addr);
 
 /*
  * Keep these two load_memop separate to ensure that the compiler
@@ -2004,7 +2008,7 @@ load_helper(CPUArchState *env, target_ulong addr, 
MemOpIdx oi,
 return res & MAKE_64BIT_MASK(0, size * 8);
 }
 
-haddr = (void *)((uintptr_t)addr + entry->addend);
+haddr = (void *)g2h_tlbe(entry, addr);
 return load_memop(haddr, op);
 }
 
@@ -2375,7 +2379,7 @@ store_helper(CPUArchState *env, target_ulong addr, 
uint64_t val,
 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
 }
 
-haddr = (void *)((uintptr_t)addr + entry->addend);
+haddr = (void *)g2h_tlbe(entry, addr);
 
 /*
  * Keep these two store_memop separate to ensure that the compiler
@@ -2400,7 +2404,7 @@ store_helper(CPUArchState *env, target_ulong addr, 
uint64_t val,
 return;
 }
 
-haddr = (void *)((uintptr_t)addr + entry->addend);
+haddr = (void *)g2h_tlbe(entry, addr);
 store_memop(haddr, val, op);
 }
 
-- 
2.25.1

[PULL 29/30] tcg/riscv: Support TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

All RV64 32-bit operations sign-extend the output, so we are easily
able to keep TCG_TYPE_I32 values sign-extended in host registers.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target-sa32.h | 6 +-
 tcg/riscv/tcg-target.c.inc  | 8 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tcg/riscv/tcg-target-sa32.h b/tcg/riscv/tcg-target-sa32.h
index cb185b1526..703467b37a 100644
--- a/tcg/riscv/tcg-target-sa32.h
+++ b/tcg/riscv/tcg-target-sa32.h
@@ -1 +1,5 @@
-#define TCG_TARGET_SIGNED_ADDR32 0
+/*
+ * Do not set TCG_TARGET_SIGNED_ADDR32 for RV32;
+ * TCG expects this to only be set for 64-bit hosts.
+ */
+#define TCG_TARGET_SIGNED_ADDR32  (__riscv_xlen == 64)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 6409d9c3d5..c999711494 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -951,10 +951,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
 tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
 
 /* TLB Hit - translate address using addend.  */
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
-tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
-addrl = TCG_REG_TMP0;
-}
 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
 }
 
@@ -1175,7 +1171,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is_64)
 data_regl, data_regh, addr_regl, addr_regh,
 s->code_ptr, label_ptr);
 #else
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
 tcg_out_ext32u(s, base, addr_regl);
 addr_regl = base;
 }
@@ -1247,7 +1243,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is_64)
 data_regl, data_regh, addr_regl, addr_regh,
 s->code_ptr, label_ptr);
 #else
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
 tcg_out_ext32u(s, base, addr_regl);
 addr_regl = base;
 }
-- 
2.25.1

[PULL 26/30] linux-user: Support TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

When using reserved_va, which is the default for a 64-bit host
and a 32-bit guest, set guest_base_signed_addr32 if requested
by TCG_TARGET_SIGNED_ADDR32, and the executable layout allows.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 include/exec/cpu-all.h |  4 ---
 linux-user/elfload.c   | 62 ++
 2 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 26ecd3c886..8bea0e069e 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -269,11 +269,7 @@ extern const TargetPageBits target_page;
 #define PAGE_RESET 0x0040
 /* For linux-user, indicates that the page is MAP_ANON. */
 #define PAGE_ANON  0x0080
-
-#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
-/* FIXME: Code that sets/uses this is broken and needs to go away.  */
 #define PAGE_RESERVED  0x0100
-#endif
 /* Target-specific bits that will be used via page_get_flags().  */
 #define PAGE_TARGET_1  0x0200
 #define PAGE_TARGET_2  0x0400
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 9628a38361..5522f9e721 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2482,34 +2482,72 @@ static void pgb_dynamic(const char *image_name, long 
align)
 static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
 abi_ulong guest_hiaddr, long align)
 {
-int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
+int flags = (MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_NORESERVE | MAP_FIXED_NOREPLACE);
+unsigned long local_rva = reserved_va;
+bool protect_wrap = false;
 void *addr, *test;
 
-if (guest_hiaddr > reserved_va) {
+if (guest_hiaddr > local_rva) {
 error_report("%s: requires more than reserved virtual "
  "address space (0x%" PRIx64 " > 0x%lx)",
- image_name, (uint64_t)guest_hiaddr, reserved_va);
+ image_name, (uint64_t)guest_hiaddr, local_rva);
 exit(EXIT_FAILURE);
 }
 
-/* Widen the "image" to the entire reserved address space. */
-pgb_static(image_name, 0, reserved_va, align);
+if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
+if (guest_loaddr < 0x8000u && guest_hiaddr > 0x8000u) {
+/*
+ * The executable itself wraps on signed addresses.
+ * Without per-page translation, we must keep the
+ * guest address 0x7fff_ adjacent to 0x8000_
+ * consecutive in host memory: unsigned addresses.
+ */
+} else {
+set_guest_base_signed_addr32();
+if (local_rva <= 0x8000u) {
+/* No guest addresses are "negative": win! */
+} else {
+/* Begin by allocating the entire address space. */
+local_rva = 0xul + 1;
+protect_wrap = true;
+}
+}
+}
 
-/* osdep.h defines this as 0 if it's missing */
-flags |= MAP_FIXED_NOREPLACE;
+/* Widen the "image" to the entire reserved address space. */
+pgb_static(image_name, 0, local_rva, align);
+assert(guest_base != 0);
 
 /* Reserve the memory on the host. */
-assert(guest_base != 0);
 test = g2h_untagged(0);
-addr = mmap(test, reserved_va, PROT_NONE, flags, -1, 0);
+addr = mmap(test, local_rva, PROT_NONE, flags, -1, 0);
 if (addr == MAP_FAILED || addr != test) {
+/*
+ * If protect_wrap, we could try again with the original reserved_va
+ * setting, but the edge case of low ulimit vm setting on a 64-bit
+ * host is probably useless.
+ */
 error_report("Unable to reserve 0x%lx bytes of virtual address "
- "space at %p (%s) for use as guest address space (check 
your"
- "virtual memory ulimit setting, min_mmap_addr or reserve 
less "
- "using -R option)", reserved_va, test, strerror(errno));
+ "space at %p (%s) for use as guest address space "
+ "(check your virtual memory ulimit setting, "
+ "min_mmap_addr or reserve less using -R option)",
+ local_rva, test, strerror(errno));
 exit(EXIT_FAILURE);
 }
 
+if (protect_wrap) {
+/*
+ * Prevent the page just before 0x8000 from being allocated.
+ * This prevents a single guest object/allocation from crossing
+ * the signed wrap, and thus being discontiguous in host memory.
+ */
+page_set_flags(0x7fff & TARGET_PAGE_MASK, 0x8000u,
+   PAGE_RESERVED);
+/* Adjust guest_base so that 0 is in the middle of the reservation. */
+guest_base += 0x8000ul;
+}
+
 qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",

[PULL 24/30] accel/tcg: Support TCG_TARGET_SIGNED_ADDR32 for softmmu

2022-03-03 Thread Richard Henderson

When TCG_TARGET_SIGNED_ADDR32 is set, adjust the tlb addend to
allow the 32-bit guest address to be sign extended within the
64-bit host register instead of zero extended.

This will simplify tcg hosts like MIPS, RISC-V, and LoongArch,
which naturally sign-extend 32-bit values, in contrast to x86_64
and AArch64 which zero-extend them.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 accel/tcg/cputlb.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 0e62aa5d7c..0dbc3efbc7 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -40,6 +40,7 @@
 #include "qemu/plugin-memory.h"
 #endif
 #include "tcg/tcg-ldst.h"
+#include "tcg-target-sa32.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -93,6 +94,9 @@ static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
 
 static inline uintptr_t g2h_tlbe(const CPUTLBEntry *tlb, target_ulong gaddr)
 {
+if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
+return tlb->addend + (int32_t)gaddr;
+}
 return tlb->addend + (uintptr_t)gaddr;
 }
 
@@ -1244,7 +1248,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
vaddr,
 desc->iotlb[index].attrs = attrs;
 
 /* Now calculate the new entry */
-tn.addend = addend - vaddr_page;
+
+if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
+tn.addend = addend - (int32_t)vaddr_page;
+} else {
+tn.addend = addend - vaddr_page;
+}
+
 if (prot & PAGE_READ) {
 tn.addr_read = address;
 if (wp_flags & BP_MEM_READ) {
-- 
2.25.1

[PULL 25/30] accel/tcg: Add guest_base_signed_addr32 for user-only

2022-03-03 Thread Richard Henderson

While the host may prefer to treat 32-bit addresses as signed,
there are edge cases of guests that cannot be implemented with
addresses 0x7fff_ and 0x8000_ being non-consecutive.

Therefore, default to guest_base_signed_addr32 false, and allow
probe_guest_base to determine whether it is possible to set it
to true.  A tcg backend which sets TCG_TARGET_SIGNED_ADDR32 will
have to cope with either setting for user-only.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 include/exec/cpu-all.h  | 16 
 include/exec/cpu_ldst.h |  3 ++-
 bsd-user/main.c |  4 
 linux-user/main.c   |  3 +++
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 84caf5c3d9..26ecd3c886 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -146,6 +146,7 @@ static inline void tswap64s(uint64_t *s)
 
 #if defined(CONFIG_USER_ONLY)
 #include "exec/user/abitypes.h"
+#include "tcg-target-sa32.h"
 
 /* On some host systems the guest address space is reserved on the host.
  * This allows the guest address space to be offset to a convenient location.
@@ -154,6 +155,21 @@ extern uintptr_t guest_base;
 extern bool have_guest_base;
 extern unsigned long reserved_va;
 
+#if TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32
+extern bool guest_base_signed_addr32;
+#else
+#define guest_base_signed_addr32  false
+#endif
+
+static inline void set_guest_base_signed_addr32(void)
+{
+#ifdef guest_base_signed_addr32
+qemu_build_not_reached();
+#else
+guest_base_signed_addr32 = true;
+#endif
+}
+
 /*
  * Limit the guest addresses as best we can.
  *
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index da987fe8ad..add45499ee 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -87,7 +87,8 @@ static inline abi_ptr cpu_untagged_addr(CPUState *cs, abi_ptr 
x)
 /* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
 static inline void *g2h_untagged(abi_ptr x)
 {
-return (void *)((uintptr_t)(x) + guest_base);
+uintptr_t hx = guest_base_signed_addr32 ? (int32_t)x : (uintptr_t)x;
+return (void *)(guest_base + hx);
 }
 
 static inline void *g2h(CPUState *cs, abi_ptr x)
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 88d347d05e..c181e54495 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -54,6 +54,10 @@
 int singlestep;
 uintptr_t guest_base;
 bool have_guest_base;
+#ifndef guest_base_signed_addr32
+bool guest_base_signed_addr32;
+#endif
+
 /*
  * When running 32-on-64 we should make sure we can fit all of the possible
  * guest address space into a contiguous chunk of virtual host memory.
diff --git a/linux-user/main.c b/linux-user/main.c
index fbc9bcfd5f..5d963ddb64 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -72,6 +72,9 @@ static const char *seed_optarg;
 unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
+#ifndef guest_base_signed_addr32
+bool guest_base_signed_addr32;
+#endif
 
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
-- 
2.25.1

[PULL 28/30] tcg/mips: Support TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

All 32-bit mips operations sign-extend the output, so we are easily
able to keep TCG_TYPE_I32 values sign-extended in host registers.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target-sa32.h |  8 
 tcg/mips/tcg-target.c.inc  | 10 ++
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tcg/mips/tcg-target-sa32.h b/tcg/mips/tcg-target-sa32.h
index cb185b1526..51255e7cba 100644
--- a/tcg/mips/tcg-target-sa32.h
+++ b/tcg/mips/tcg-target-sa32.h
@@ -1 +1,9 @@
+/*
+ * Do not set TCG_TARGET_SIGNED_ADDR32 for mips32;
+ * TCG expects this to only be set for 64-bit hosts.
+ */
+#ifdef __mips64
+#define TCG_TARGET_SIGNED_ADDR32 1
+#else
 #define TCG_TARGET_SIGNED_ADDR32 0
+#endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 993149d18a..b97c032ded 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1168,12 +1168,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, 
TCGReg addrl,
  TCG_TMP0, TCG_TMP3, cmp_off);
 }
 
-/* Zero extend a 32-bit guest address for a 64-bit host. */
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
-tcg_out_ext32u(s, base, addrl);
-addrl = base;
-}
-
 /*
  * Mask the page bits, keeping the alignment bits to compare against.
  * For unaligned accesses, compare against the end of the access to
@@ -1679,7 +1673,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is_64)
 data_regl, data_regh, addr_regl, addr_regh,
 s->code_ptr, label_ptr);
 #else
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
 tcg_out_ext32u(s, base, addr_regl);
 addr_regl = base;
 }
@@ -1878,7 +1872,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is_64)
 data_regl, data_regh, addr_regl, addr_regh,
 s->code_ptr, label_ptr);
 #else
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
 tcg_out_ext32u(s, base, addr_regl);
 addr_regl = base;
 }
-- 
2.25.1

[PULL 22/30] tcg: Add TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

Define as 0 for all tcg hosts.  Put this in a separate header,
because we'll want this in places that do not ordinarily have
access to all of tcg/tcg.h.

Reviewed-by: WANG Xuerui 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target-sa32.h | 1 +
 tcg/arm/tcg-target-sa32.h | 1 +
 tcg/i386/tcg-target-sa32.h| 1 +
 tcg/loongarch64/tcg-target-sa32.h | 1 +
 tcg/mips/tcg-target-sa32.h| 1 +
 tcg/ppc/tcg-target-sa32.h | 1 +
 tcg/riscv/tcg-target-sa32.h   | 1 +
 tcg/s390x/tcg-target-sa32.h   | 1 +
 tcg/sparc/tcg-target-sa32.h   | 1 +
 tcg/tci/tcg-target-sa32.h | 1 +
 tcg/tcg.c | 4 
 11 files changed, 14 insertions(+)
 create mode 100644 tcg/aarch64/tcg-target-sa32.h
 create mode 100644 tcg/arm/tcg-target-sa32.h
 create mode 100644 tcg/i386/tcg-target-sa32.h
 create mode 100644 tcg/loongarch64/tcg-target-sa32.h
 create mode 100644 tcg/mips/tcg-target-sa32.h
 create mode 100644 tcg/ppc/tcg-target-sa32.h
 create mode 100644 tcg/riscv/tcg-target-sa32.h
 create mode 100644 tcg/s390x/tcg-target-sa32.h
 create mode 100644 tcg/sparc/tcg-target-sa32.h
 create mode 100644 tcg/tci/tcg-target-sa32.h

diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/aarch64/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/arm/tcg-target-sa32.h b/tcg/arm/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/arm/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/i386/tcg-target-sa32.h b/tcg/i386/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/i386/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/loongarch64/tcg-target-sa32.h 
b/tcg/loongarch64/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/loongarch64/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/mips/tcg-target-sa32.h b/tcg/mips/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/mips/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/ppc/tcg-target-sa32.h b/tcg/ppc/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/ppc/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/riscv/tcg-target-sa32.h b/tcg/riscv/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/riscv/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/s390x/tcg-target-sa32.h b/tcg/s390x/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/s390x/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/sparc/tcg-target-sa32.h b/tcg/sparc/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/sparc/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/tci/tcg-target-sa32.h b/tcg/tci/tcg-target-sa32.h
new file mode 100644
index 00..cb185b1526
--- /dev/null
+++ b/tcg/tci/tcg-target-sa32.h
@@ -0,0 +1 @@
+#define TCG_TARGET_SIGNED_ADDR32 0
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 33a97eabdb..8c131293fe 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -61,6 +61,10 @@
 #include "exec/log.h"
 #include "tcg/tcg-ldst.h"
 #include "tcg-internal.h"
+#include "tcg-target-sa32.h"
+
+/* Sanity check for TCG_TARGET_SIGNED_ADDR32. */
+QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS == 32 && TCG_TARGET_SIGNED_ADDR32);
 
 #ifdef CONFIG_TCG_INTERPRETER
 #include 
-- 
2.25.1

[PULL 21/30] tcg/i386: Implement bitsel for avx512

2022-03-03 Thread Richard Henderson

The general ternary logic operation can implement BITSEL.
Funnel the 4-operand operation into three variants of the
3-operand instruction, depending on input operand overlap.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.h |  2 +-
 tcg/i386/tcg-target.c.inc | 20 +++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index e02cef7575..00fcbe297d 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -204,7 +204,7 @@ extern bool have_movbe;
 #define TCG_TARGET_HAS_mul_vec  1
 #define TCG_TARGET_HAS_sat_vec  1
 #define TCG_TARGET_HAS_minmax_vec   1
-#define TCG_TARGET_HAS_bitsel_vec   0
+#define TCG_TARGET_HAS_bitsel_vec   have_avx512vl
 #define TCG_TARGET_HAS_cmpsel_vec   -1
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 140a51ce70..b5c6159853 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2898,7 +2898,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 
 TCGType type = vecl + TCG_TYPE_V64;
 int insn, sub;
-TCGArg a0, a1, a2;
+TCGArg a0, a1, a2, a3;
 
 a0 = args[0];
 a1 = args[1];
@@ -3122,6 +3122,22 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 sub = 0xdd; /* orB!C */
 goto gen_simd_imm8;
 
+case INDEX_op_bitsel_vec:
+insn = OPC_VPTERNLOGQ;
+a3 = args[3];
+if (a0 == a1) {
+a1 = a2;
+a2 = a3;
+sub = 0xca; /* A?B:C */
+} else if (a0 == a2) {
+a2 = a3;
+sub = 0xe2; /* B?A:C */
+} else {
+tcg_out_mov(s, type, a0, a3);
+sub = 0xb8; /* B?C:A */
+}
+goto gen_simd_imm8;
+
 gen_simd_imm8:
 tcg_debug_assert(insn != OPC_UD2);
 if (type == TCG_TYPE_V256) {
@@ -3390,6 +3406,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_x86_vpshrdv_vec:
 return C_O1_I3(x, 0, x, x);
 
+case INDEX_op_bitsel_vec:
 case INDEX_op_x86_vpblendvb_vec:
 return C_O1_I3(x, x, x, x);
 
@@ -3412,6 +3429,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_nor_vec:
 case INDEX_op_eqv_vec:
 case INDEX_op_not_vec:
+case INDEX_op_bitsel_vec:
 return 1;
 case INDEX_op_cmp_vec:
 case INDEX_op_cmpsel_vec:
-- 
2.25.1

[PULL 20/30] tcg/i386: Implement more logical operations for avx512

2022-03-03 Thread Richard Henderson

AVX512VL has a general ternary logic operation, VPTERNLOGQ,
which can implement NOT, ORC, NAND, NOR, EQV.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.h | 10 +-
 tcg/i386/tcg-target.c.inc | 34 ++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index da1eff59aa..e02cef7575 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -188,11 +188,11 @@ extern bool have_movbe;
 #define TCG_TARGET_HAS_v256 have_avx2
 
 #define TCG_TARGET_HAS_andc_vec 1
-#define TCG_TARGET_HAS_orc_vec  0
-#define TCG_TARGET_HAS_nand_vec 0
-#define TCG_TARGET_HAS_nor_vec  0
-#define TCG_TARGET_HAS_eqv_vec  0
-#define TCG_TARGET_HAS_not_vec  0
+#define TCG_TARGET_HAS_orc_vec  have_avx512vl
+#define TCG_TARGET_HAS_nand_vec have_avx512vl
+#define TCG_TARGET_HAS_nor_vec  have_avx512vl
+#define TCG_TARGET_HAS_eqv_vec  have_avx512vl
+#define TCG_TARGET_HAS_not_vec  have_avx512vl
 #define TCG_TARGET_HAS_neg_vec  0
 #define TCG_TARGET_HAS_abs_vec  1
 #define TCG_TARGET_HAS_roti_vec have_avx512vl
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a800764d2f..140a51ce70 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -448,6 +448,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
 #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VPTERNLOGQ  (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VZEROUPPER  (0x77 | P_EXT)
 #define OPC_XCHG_ax_r32(0x90)
 
@@ -3098,6 +3099,29 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 insn = vpshldi_insn[vece];
 sub = args[3];
 goto gen_simd_imm8;
+
+case INDEX_op_not_vec:
+insn = OPC_VPTERNLOGQ;
+a2 = a1;
+sub = 0x33; /* !B */
+goto gen_simd_imm8;
+case INDEX_op_nor_vec:
+insn = OPC_VPTERNLOGQ;
+sub = 0x11; /* norCB */
+goto gen_simd_imm8;
+case INDEX_op_nand_vec:
+insn = OPC_VPTERNLOGQ;
+sub = 0x77; /* nandCB */
+goto gen_simd_imm8;
+case INDEX_op_eqv_vec:
+insn = OPC_VPTERNLOGQ;
+sub = 0x99; /* xnorCB */
+goto gen_simd_imm8;
+case INDEX_op_orc_vec:
+insn = OPC_VPTERNLOGQ;
+sub = 0xdd; /* orB!C */
+goto gen_simd_imm8;
+
 gen_simd_imm8:
 tcg_debug_assert(insn != OPC_UD2);
 if (type == TCG_TYPE_V256) {
@@ -3318,6 +3342,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_or_vec:
 case INDEX_op_xor_vec:
 case INDEX_op_andc_vec:
+case INDEX_op_orc_vec:
+case INDEX_op_nand_vec:
+case INDEX_op_nor_vec:
+case INDEX_op_eqv_vec:
 case INDEX_op_ssadd_vec:
 case INDEX_op_usadd_vec:
 case INDEX_op_sssub_vec:
@@ -3350,6 +3378,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 
 case INDEX_op_abs_vec:
 case INDEX_op_dup_vec:
+case INDEX_op_not_vec:
 case INDEX_op_shli_vec:
 case INDEX_op_shri_vec:
 case INDEX_op_sari_vec:
@@ -3378,6 +3407,11 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_or_vec:
 case INDEX_op_xor_vec:
 case INDEX_op_andc_vec:
+case INDEX_op_orc_vec:
+case INDEX_op_nand_vec:
+case INDEX_op_nor_vec:
+case INDEX_op_eqv_vec:
+case INDEX_op_not_vec:
 return 1;
 case INDEX_op_cmp_vec:
 case INDEX_op_cmpsel_vec:
-- 
2.25.1

[PULL 27/30] tcg/aarch64: Support TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

AArch64 has both sign and zero-extending addressing modes, which
means that either treatment of guest addresses is equally efficient.
Enabling this for AArch64 gives us testing of the feature in CI.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target-sa32.h |  8 +++-
 tcg/aarch64/tcg-target.c.inc  | 81 ---
 2 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h
index cb185b1526..c99e502e4c 100644
--- a/tcg/aarch64/tcg-target-sa32.h
+++ b/tcg/aarch64/tcg-target-sa32.h
@@ -1 +1,7 @@
-#define TCG_TARGET_SIGNED_ADDR32 0
+/*
+ * AArch64 has both SXTW and UXTW addressing modes, which means that
+ * it is agnostic to how guest addresses should be represented.
+ * Because aarch64 is more common than the other hosts that will
+ * want to use this feature, enable it for continuous testing.
+ */
+#define TCG_TARGET_SIGNED_ADDR32 1
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 077fc51401..4a3edd6963 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -361,6 +361,16 @@ typedef enum {
 LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 } AArch64LdstType;
 
+/*
+ * See aarch64/instrs/extendreg/DecodeRegExtend
+ * But note that option<1> == 0 is UNDEFINED for LDR/STR.
+ */
+typedef enum {
+LDST_EXT_UXTW = 2,  /* zero-extend from uint32_t */
+LDST_EXT_UXTX = 3,  /* zero-extend from uint64_t (i.e. no extension) */
+LDST_EXT_SXTW = 6,  /* sign-extend from int32_t */
+} AArch64LdstExt;
+
 /* We encode the format of the insn into the beginning of the name, so that
we can have the preprocessor help "typecheck" the insn vs the output
function.  Arm didn't provide us with nice names for the formats, so we
@@ -806,12 +816,12 @@ static void tcg_out_insn_3617(TCGContext *s, AArch64Insn 
insn, bool q,
 }
 
 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
-  TCGReg rd, TCGReg base, TCGType ext,
+  TCGReg rd, TCGReg base, AArch64LdstExt option,
   TCGReg regoff)
 {
 /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
-  0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
+  option << 13 | base << 5 | (rd & 0x1f));
 }
 
 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
@@ -1126,7 +1136,7 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, 
TCGReg rd,
 
 /* Worst-case scenario, move offset to temp register, use reg offset.  */
 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
-tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
+tcg_out_ldst_r(s, insn, rd, rn, LDST_EXT_UXTX, TCG_REG_TMP);
 }
 
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
@@ -1765,31 +1775,31 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
TCGReg data_r, TCGReg addr_r,
-   TCGType otype, TCGReg off_r)
+   AArch64LdstExt option, TCGReg off_r)
 {
 switch (memop & MO_SSIZE) {
 case MO_UB:
-tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
+tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, option, off_r);
 break;
 case MO_SB:
 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
-   data_r, addr_r, otype, off_r);
+   data_r, addr_r, option, off_r);
 break;
 case MO_UW:
-tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
+tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, option, off_r);
 break;
 case MO_SW:
 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
-   data_r, addr_r, otype, off_r);
+   data_r, addr_r, option, off_r);
 break;
 case MO_UL:
-tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
+tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, option, off_r);
 break;
 case MO_SL:
-tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
+tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, option, off_r);
 break;
 case MO_UQ:
-tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
+tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, option, off_r);
 break;
 default:
 tcg_abort();
@@ -1798,31 +1808,52 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp 
memop, TCGType ext,
 
 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
TCGReg data_r, TCGReg addr_r,
-   TCGType otype, TCGReg

[PULL 30/30] tcg/loongarch64: Support TCG_TARGET_SIGNED_ADDR32

2022-03-03 Thread Richard Henderson

All 32-bit LoongArch operations sign-extend the output, so we are easily
able to keep TCG_TYPE_I32 values sign-extended in host registers.

Cc: WANG Xuerui 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-sa32.h |  2 +-
 tcg/loongarch64/tcg-target.c.inc  | 15 ++-
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tcg/loongarch64/tcg-target-sa32.h 
b/tcg/loongarch64/tcg-target-sa32.h
index cb185b1526..aaffd777bf 100644
--- a/tcg/loongarch64/tcg-target-sa32.h
+++ b/tcg/loongarch64/tcg-target-sa32.h
@@ -1 +1 @@
-#define TCG_TARGET_SIGNED_ADDR32 0
+#define TCG_TARGET_SIGNED_ADDR32 1
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index a3debf6da7..425f6629ca 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -880,8 +880,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 return tcg_out_fail_alignment(s, l);
 }
 
-#endif /* CONFIG_SOFTMMU */
-
 /*
  * `ext32u` the address register into the temp register given,
  * if target is 32-bit, no-op otherwise.
@@ -891,12 +889,13 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
   TCGReg addr, TCGReg tmp)
 {
-if (TARGET_LONG_BITS == 32) {
+if (TARGET_LONG_BITS == 32 && !guest_base_signed_addr32) {
 tcg_out_ext32u(s, tmp, addr);
 return tmp;
 }
 return addr;
 }
+#endif /* CONFIG_SOFTMMU */
 
 static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj,
TCGReg rk, MemOp opc, TCGType type)
@@ -944,8 +943,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, TCGType type)
 tcg_insn_unit *label_ptr[1];
 #else
 unsigned a_bits;
-#endif
 TCGReg base;
+#endif
 
 data_regl = *args++;
 addr_regl = *args++;
@@ -954,8 +953,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, TCGType type)
 
 #if defined(CONFIG_SOFTMMU)
 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type);
+tcg_out_qemu_ld_indexed(s, data_regl, addr_regl, TCG_REG_TMP2, opc, type);
 add_qemu_ldst_label(s, 1, oi, type,
 data_regl, addr_regl,
 s->code_ptr, label_ptr);
@@ -1004,8 +1002,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args)
 tcg_insn_unit *label_ptr[1];
 #else
 unsigned a_bits;
-#endif
 TCGReg base;
+#endif
 
 data_regl = *args++;
 addr_regl = *args++;
@@ -1014,8 +1012,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args)
 
 #if defined(CONFIG_SOFTMMU)
 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc);
+tcg_out_qemu_st_indexed(s, data_regl, addr_regl, TCG_REG_TMP2, opc);
 add_qemu_ldst_label(s, 0, oi,
 0, /* type param is unused for stores */
 data_regl, addr_regl,
-- 
2.25.1

[PULL 17/30] tcg/i386: Expand scalar rotate with avx512 insns

2022-03-03 Thread Richard Henderson

Expand 32-bit and 64-bit scalar rotate with VPRO[LR]V;
expand 16-bit scalar rotate with VPSHLDV.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 49 +++
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 1fbb4b0593..edf0d066e7 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3602,26 +3602,6 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
 tcg_temp_free_vec(t);
 }
 
-static void expand_vec_rotls(TCGType type, unsigned vece,
- TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
-{
-TCGv_i32 rsh;
-TCGv_vec t;
-
-tcg_debug_assert(vece != MO_8);
-
-t = tcg_temp_new_vec(type);
-rsh = tcg_temp_new_i32();
-
-tcg_gen_neg_i32(rsh, lsh);
-tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
-tcg_gen_shls_vec(vece, t, v1, lsh);
-tcg_gen_shrs_vec(vece, v0, v1, rsh);
-tcg_gen_or_vec(vece, v0, v0, t);
-tcg_temp_free_vec(t);
-tcg_temp_free_i32(rsh);
-}
-
 static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
 TCGv_vec v1, TCGv_vec sh, bool right)
 {
@@ -3648,6 +3628,35 @@ static void expand_vec_rotv(TCGType type, unsigned vece, 
TCGv_vec v0,
 tcg_temp_free_vec(t);
 }
 
+static void expand_vec_rotls(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+TCGv_vec t = tcg_temp_new_vec(type);
+
+tcg_debug_assert(vece != MO_8);
+
+if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) {
+tcg_gen_dup_i32_vec(vece, t, lsh);
+if (vece >= MO_32) {
+tcg_gen_rotlv_vec(vece, v0, v1, t);
+} else {
+expand_vec_rotv(type, vece, v0, v1, t, false);
+}
+} else {
+TCGv_i32 rsh = tcg_temp_new_i32();
+
+tcg_gen_neg_i32(rsh, lsh);
+tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+tcg_gen_shls_vec(vece, t, v1, lsh);
+tcg_gen_shrs_vec(vece, v0, v1, rsh);
+tcg_gen_or_vec(vece, v0, v0, t);
+
+tcg_temp_free_i32(rsh);
+}
+
+tcg_temp_free_vec(t);
+}
+
 static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
 {
-- 
2.25.1

[PULL 18/30] tcg/i386: Implement avx512 min/max/abs

2022-03-03 Thread Richard Henderson

AVX512VL has VPABSQ, VPMAXSQ, VPMAXUQ, VPMINSQ, VPMINUQ.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index edf0d066e7..be94b82fd6 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -313,6 +313,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_PABSB   (0x1c | P_EXT38 | P_DATA16)
 #define OPC_PABSW   (0x1d | P_EXT38 | P_DATA16)
 #define OPC_PABSD   (0x1e | P_EXT38 | P_DATA16)
+#define OPC_VPABSQ  (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PACKSSDW(0x6b | P_EXT | P_DATA16)
 #define OPC_PACKSSWB(0x63 | P_EXT | P_DATA16)
 #define OPC_PACKUSDW(0x2b | P_EXT38 | P_DATA16)
@@ -339,15 +340,19 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_PMAXSB  (0x3c | P_EXT38 | P_DATA16)
 #define OPC_PMAXSW  (0xee | P_EXT | P_DATA16)
 #define OPC_PMAXSD  (0x3d | P_EXT38 | P_DATA16)
+#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PMAXUB  (0xde | P_EXT | P_DATA16)
 #define OPC_PMAXUW  (0x3e | P_EXT38 | P_DATA16)
 #define OPC_PMAXUD  (0x3f | P_EXT38 | P_DATA16)
+#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PMINSB  (0x38 | P_EXT38 | P_DATA16)
 #define OPC_PMINSW  (0xea | P_EXT | P_DATA16)
 #define OPC_PMINSD  (0x39 | P_EXT38 | P_DATA16)
+#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PMINUB  (0xda | P_EXT | P_DATA16)
 #define OPC_PMINUW  (0x3a | P_EXT38 | P_DATA16)
 #define OPC_PMINUD  (0x3b | P_EXT38 | P_DATA16)
+#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PMOVSXBW(0x20 | P_EXT38 | P_DATA16)
 #define OPC_PMOVSXWD(0x23 | P_EXT38 | P_DATA16)
 #define OPC_PMOVSXDQ(0x25 | P_EXT38 | P_DATA16)
@@ -2841,16 +2846,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
 };
 static int const smin_insn[4] = {
-OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
+OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ
 };
 static int const smax_insn[4] = {
-OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
+OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
 };
 static int const umin_insn[4] = {
-OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
+OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
 };
 static int const umax_insn[4] = {
-OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
+OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
 };
 static int const rotlv_insn[4] = {
 OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
@@ -2886,8 +2891,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
 };
 static int const abs_insn[4] = {
-/* TODO: AVX512 adds support for MO_64.  */
-OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
+OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ
 };
 
 TCGType type = vecl + TCG_TYPE_V64;
@@ -3471,7 +3475,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_umin_vec:
 case INDEX_op_umax_vec:
 case INDEX_op_abs_vec:
-return vece <= MO_32;
+return vece <= MO_32 || have_avx512vl;
 
 default:
 return 0;
-- 
2.25.1

[PULL 16/30] tcg/i386: Remove rotls_vec from tcg_target_op_def

2022-03-03 Thread Richard Henderson

There is no such instruction on x86, so we should
not be pretending it has arguments.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 19cf124456..1fbb4b0593 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3329,7 +3329,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_shls_vec:
 case INDEX_op_shrs_vec:
 case INDEX_op_sars_vec:
-case INDEX_op_rotls_vec:
 case INDEX_op_cmp_vec:
 case INDEX_op_x86_shufps_vec:
 case INDEX_op_x86_blend_vec:
-- 
2.25.1

[PULL 19/30] tcg/i386: Implement avx512 multiply

2022-03-03 Thread Richard Henderson

AVX512DQ has VPMULLQ.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index be94b82fd6..a800764d2f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -361,6 +361,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_PMOVZXDQ(0x35 | P_EXT38 | P_DATA16)
 #define OPC_PMULLW  (0xd5 | P_EXT | P_DATA16)
 #define OPC_PMULLD  (0x40 | P_EXT38 | P_DATA16)
+#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_POR (0xeb | P_EXT | P_DATA16)
 #define OPC_PSHUFB  (0x00 | P_EXT38 | P_DATA16)
 #define OPC_PSHUFD  (0x70 | P_EXT | P_DATA16)
@@ -2822,7 +2823,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
 };
 static int const mul_insn[4] = {
-OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
+OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ
 };
 static int const shift_imm_insn[4] = {
 OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
@@ -3456,12 +3457,11 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 return 0;
 
 case INDEX_op_mul_vec:
-if (vece == MO_8) {
-/* We can expand the operation for MO_8.  */
+switch (vece) {
+case MO_8:
 return -1;
-}
-if (vece == MO_64) {
-return 0;
+case MO_64:
+return have_avx512dq;
 }
 return 1;
 
-- 
2.25.1

[PULL 08/30] tcg/i386: Use tcg_can_emit_vec_op in expand_vec_cmp_noinv

2022-03-03 Thread Richard Henderson

The condition for UMIN/UMAX availability is about to change;
use the canonical version.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 89497b2b45..6a53f378cc 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3620,28 +3620,28 @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned 
vece, TCGv_vec v0,
 fixup = NEED_SWAP | NEED_INV;
 break;
 case TCG_COND_LEU:
-if (vece <= MO_32) {
+if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
 fixup = NEED_UMIN;
 } else {
 fixup = NEED_BIAS | NEED_INV;
 }
 break;
 case TCG_COND_GTU:
-if (vece <= MO_32) {
+if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
 fixup = NEED_UMIN | NEED_INV;
 } else {
 fixup = NEED_BIAS;
 }
 break;
 case TCG_COND_GEU:
-if (vece <= MO_32) {
+if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
 fixup = NEED_UMAX;
 } else {
 fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
 }
 break;
 case TCG_COND_LTU:
-if (vece <= MO_32) {
+if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
 fixup = NEED_UMAX | NEED_INV;
 } else {
 fixup = NEED_BIAS | NEED_SWAP;
-- 
2.25.1

[PULL 15/30] tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double

2022-03-03 Thread Richard Henderson

While there are no specific 16-bit rotate instructions, there
are double-word shifts, which can perform the same operation.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a39f890a7d..19cf124456 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3444,6 +3444,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_rotlv_vec:
 case INDEX_op_rotrv_vec:
 switch (vece) {
+case MO_16:
+return have_avx512vbmi2 ? -1 : 0;
 case MO_32:
 case MO_64:
 return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
@@ -3588,6 +3590,12 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
 return;
 }
 
+if (have_avx512vbmi2) {
+vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
+  tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
+return;
+}
+
 t = tcg_temp_new_vec(type);
 tcg_gen_shli_vec(vece, t, v1, imm);
 tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
@@ -3618,8 +3626,16 @@ static void expand_vec_rotls(TCGType type, unsigned vece,
 static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
 TCGv_vec v1, TCGv_vec sh, bool right)
 {
-TCGv_vec t = tcg_temp_new_vec(type);
+TCGv_vec t;
 
+if (have_avx512vbmi2) {
+vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
+  type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
+  tcgv_vec_arg(v1), tcgv_vec_arg(sh));
+return;
+}
+
+t = tcg_temp_new_vec(type);
 tcg_gen_dupi_vec(vece, t, 8 << vece);
 tcg_gen_sub_vec(vece, t, t, sh);
 if (right) {
-- 
2.25.1

[PULL 14/30] tcg/i386: Support avx512vbmi2 vector shift-double instructions

2022-03-03 Thread Richard Henderson

We will use VPSHLD, VPSHLDV and VPSHRDV for 16-bit rotates.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target-con-set.h |  1 +
 tcg/i386/tcg-target.opc.h |  3 +++
 tcg/i386/tcg-target.c.inc | 38 +++
 3 files changed, 42 insertions(+)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 78774d1005..91ceb0e1da 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -45,6 +45,7 @@ C_O1_I2(r, r, rI)
 C_O1_I2(x, x, x)
 C_N1_I2(r, r, r)
 C_N1_I2(r, r, rW)
+C_O1_I3(x, 0, x, x)
 C_O1_I3(x, x, x, x)
 C_O1_I4(r, r, re, r, 0)
 C_O1_I4(r, r, r, ri, ri)
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
index 1312941800..b5f403e35e 100644
--- a/tcg/i386/tcg-target.opc.h
+++ b/tcg/i386/tcg-target.opc.h
@@ -33,3 +33,6 @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
 DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
 DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
 DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 712ae3a168..a39f890a7d 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -424,6 +424,15 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
 #define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVW(0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVD(0x71 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDVQ(0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVW(0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVD(0x73 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHRDVQ(0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
 #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
@@ -2867,6 +2876,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 static int const sars_insn[4] = {
 OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
 };
+static int const vpshldi_insn[4] = {
+OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
+};
+static int const vpshldv_insn[4] = {
+OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
+};
+static int const vpshrdv_insn[4] = {
+OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
+};
 static int const abs_insn[4] = {
 /* TODO: AVX512 adds support for MO_64.  */
 OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
@@ -2959,6 +2977,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_x86_packus_vec:
 insn = packus_insn[vece];
 goto gen_simd;
+case INDEX_op_x86_vpshldv_vec:
+insn = vpshldv_insn[vece];
+a1 = a2;
+a2 = args[3];
+goto gen_simd;
+case INDEX_op_x86_vpshrdv_vec:
+insn = vpshrdv_insn[vece];
+a1 = a2;
+a2 = args[3];
+goto gen_simd;
 #if TCG_TARGET_REG_BITS == 32
 case INDEX_op_dup2_vec:
 /* First merge the two 32-bit inputs to a single 64-bit element. */
@@ -3061,7 +3089,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 insn = OPC_VPERM2I128;
 sub = args[3];
 goto gen_simd_imm8;
+case INDEX_op_x86_vpshldi_vec:
+insn = vpshldi_insn[vece];
+sub = args[3];
+goto gen_simd_imm8;
 gen_simd_imm8:
+tcg_debug_assert(insn != OPC_UD2);
 if (type == TCG_TYPE_V256) {
 insn |= P_VEXL;
 }
@@ -3305,6 +3338,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_x86_vperm2i128_vec:
 case INDEX_op_x86_punpckl_vec:
 case INDEX_op_x86_punpckh_vec:
+case INDEX_op_x86_vpshldi_vec:
 #if TCG_TARGET_REG_BITS == 32
 case INDEX_op_dup2_vec:
 #endif
@@ -3319,6 +3353,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_x86_psrldq_vec:
 return C_O1_I1(x, x);
 
+case INDEX_op_x86_vpshldv_vec:
+case INDEX_op_x86_vpshrdv_vec:
+return C_O1_I3(x, 0, x, x);
+
 case INDEX_op_x86_vpblendvb_vec:
 return C_O1_I3(x, x, x, x);
 
-- 
2.25.1

[PULL 13/30] tcg/i386: Implement avx512 variable rotate

2022-03-03 Thread Richard Henderson

AVX512VL has VPROLVD and VPRORVQ.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.h |  2 +-
 tcg/i386/tcg-target.c.inc | 25 -
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 23a8b2a8c8..da1eff59aa 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -197,7 +197,7 @@ extern bool have_movbe;
 #define TCG_TARGET_HAS_abs_vec  1
 #define TCG_TARGET_HAS_roti_vec have_avx512vl
 #define TCG_TARGET_HAS_rots_vec 0
-#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_rotv_vec have_avx512vl
 #define TCG_TARGET_HAS_shi_vec  1
 #define TCG_TARGET_HAS_shs_vec  1
 #define TCG_TARGET_HAS_shv_vec  have_avx2
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 3a9f6a3360..712ae3a168 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -420,6 +420,10 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
 #define OPC_VPERMQ  (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
 #define OPC_VPERM2I128  (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
+#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
 #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
@@ -2839,6 +2843,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 static int const umax_insn[4] = {
 OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
 };
+static int const rotlv_insn[4] = {
+OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
+};
+static int const rotrv_insn[4] = {
+OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ
+};
 static int const shlv_insn[4] = {
 OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
 };
@@ -2922,6 +2932,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_sarv_vec:
 insn = sarv_insn[vece];
 goto gen_simd;
+case INDEX_op_rotlv_vec:
+insn = rotlv_insn[vece];
+goto gen_simd;
+case INDEX_op_rotrv_vec:
+insn = rotrv_insn[vece];
+goto gen_simd;
 case INDEX_op_shls_vec:
 insn = shls_insn[vece];
 goto gen_simd;
@@ -3275,6 +3291,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_shlv_vec:
 case INDEX_op_shrv_vec:
 case INDEX_op_sarv_vec:
+case INDEX_op_rotlv_vec:
+case INDEX_op_rotrv_vec:
 case INDEX_op_shls_vec:
 case INDEX_op_shrs_vec:
 case INDEX_op_sars_vec:
@@ -3387,7 +3405,12 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 return 0;
 case INDEX_op_rotlv_vec:
 case INDEX_op_rotrv_vec:
-return have_avx2 && vece >= MO_32 ? -1 : 0;
+switch (vece) {
+case MO_32:
+case MO_64:
+return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
+}
+return 0;
 
 case INDEX_op_mul_vec:
 if (vece == MO_8) {
-- 
2.25.1

[PULL 11/30] tcg/i386: Implement avx512 immediate sari shift

2022-03-03 Thread Richard Henderson

AVX512 has VPSRAQ with immediate operand, in the same form as
with AVX, but requires EVEX encoding and W1.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 30 +-
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 1ef34f0b52..de01fbf40c 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2986,17 +2986,22 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_shli_vec:
+insn = shift_imm_insn[vece];
 sub = 6;
 goto gen_shift;
 case INDEX_op_shri_vec:
+insn = shift_imm_insn[vece];
 sub = 2;
 goto gen_shift;
 case INDEX_op_sari_vec:
-tcg_debug_assert(vece != MO_64);
+if (vece == MO_64) {
+insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX;
+} else {
+insn = shift_imm_insn[vece];
+}
 sub = 4;
 gen_shift:
 tcg_debug_assert(vece != MO_8);
-insn = shift_imm_insn[vece];
 if (type == TCG_TYPE_V256) {
 insn |= P_VEXL;
 }
@@ -3316,16 +3321,23 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 return vece == MO_8 ? -1 : 1;
 
 case INDEX_op_sari_vec:
-/* We must expand the operation for MO_8.  */
-if (vece == MO_8) {
+switch (vece) {
+case MO_8:
 return -1;
-}
-/* We can emulate this for MO_64, but it does not pay off
-   unless we're producing at least 4 values.  */
-if (vece == MO_64) {
+case MO_16:
+case MO_32:
+return 1;
+case MO_64:
+if (have_avx512vl) {
+return 1;
+}
+/*
+ * We can emulate this for MO_64, but it does not pay off
+ * unless we're producing at least 4 values.
+ */
 return type >= TCG_TYPE_V256 ? -1 : 0;
 }
-return 1;
+return 0;
 
 case INDEX_op_shls_vec:
 case INDEX_op_shrs_vec:
-- 
2.25.1

[PULL 06/30] tcg/i386: Detect AVX512

2022-03-03 Thread Richard Henderson

There are some operation sizes in some subsets of AVX512 that
are missing from previous iterations of AVX.  Detect them.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 include/qemu/cpuid.h  | 20 +---
 tcg/i386/tcg-target.h |  4 
 tcg/i386/tcg-target.c.inc | 24 ++--
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 09fc245b91..7adb12d320 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -45,12 +45,26 @@
 #ifndef bit_AVX2
 #define bit_AVX2(1 << 5)
 #endif
-#ifndef bit_AVX512F
-#define bit_AVX512F(1 << 16)
-#endif
 #ifndef bit_BMI2
 #define bit_BMI2(1 << 8)
 #endif
+#ifndef bit_AVX512F
+#define bit_AVX512F (1 << 16)
+#endif
+#ifndef bit_AVX512DQ
+#define bit_AVX512DQ(1 << 17)
+#endif
+#ifndef bit_AVX512BW
+#define bit_AVX512BW(1 << 30)
+#endif
+#ifndef bit_AVX512VL
+#define bit_AVX512VL(1u << 31)
+#endif
+
+/* Leaf 7, %ecx */
+#ifndef bit_AVX512VBMI2
+#define bit_AVX512VBMI2 (1 << 6)
+#endif
 
 /* Leaf 0x8001, %ecx */
 #ifndef bit_LZCNT
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index ecd0fa6e05..79af353860 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -103,6 +103,10 @@ extern bool have_bmi1;
 extern bool have_popcnt;
 extern bool have_avx1;
 extern bool have_avx2;
+extern bool have_avx512bw;
+extern bool have_avx512dq;
+extern bool have_avx512vbmi2;
+extern bool have_avx512vl;
 extern bool have_movbe;
 
 /* optional instructions */
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index faa15eecab..7516be5d5f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -171,6 +171,10 @@ bool have_bmi1;
 bool have_popcnt;
 bool have_avx1;
 bool have_avx2;
+bool have_avx512bw;
+bool have_avx512dq;
+bool have_avx512vbmi2;
+bool have_avx512vl;
 bool have_movbe;
 
 #ifdef CONFIG_CPUID_H
@@ -3839,12 +3843,12 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int 
count)
 static void tcg_target_init(TCGContext *s)
 {
 #ifdef CONFIG_CPUID_H
-unsigned a, b, c, d, b7 = 0;
+unsigned a, b, c, d, b7 = 0, c7 = 0;
 unsigned max = __get_cpuid_max(0, 0);
 
 if (max >= 7) {
 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
-__cpuid_count(7, 0, a, b7, c, d);
+__cpuid_count(7, 0, a, b7, c7, d);
 have_bmi1 = (b7 & bit_BMI) != 0;
 have_bmi2 = (b7 & bit_BMI2) != 0;
 }
@@ -3874,6 +3878,22 @@ static void tcg_target_init(TCGContext *s)
 if ((xcrl & 6) == 6) {
 have_avx1 = (c & bit_AVX) != 0;
 have_avx2 = (b7 & bit_AVX2) != 0;
+
+/*
+ * There are interesting instructions in AVX512, so long
+ * as we have AVX512VL, which indicates support for EVEX
+ * on sizes smaller than 512 bits.  We are required to
+ * check that OPMASK and all extended ZMM state are enabled
+ * even if we're not using them -- the insns will fault.
+ */
+if ((xcrl & 0xe0) == 0xe0
+&& (b7 & bit_AVX512F)
+&& (b7 & bit_AVX512VL)) {
+have_avx512vl = true;
+have_avx512bw = (b7 & bit_AVX512BW) != 0;
+have_avx512dq = (b7 & bit_AVX512DQ) != 0;
+have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
+}
 }
 }
 }
-- 
2.25.1

[PULL 12/30] tcg/i386: Implement avx512 immediate rotate

2022-03-03 Thread Richard Henderson

AVX512VL has VPROLD and VPROLQ, layered onto the same
opcode as PSHIFTD, but requires EVEX encoding and W1.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.h |  2 +-
 tcg/i386/tcg-target.c.inc | 15 +--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 79af353860..23a8b2a8c8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -195,7 +195,7 @@ extern bool have_movbe;
 #define TCG_TARGET_HAS_not_vec  0
 #define TCG_TARGET_HAS_neg_vec  0
 #define TCG_TARGET_HAS_abs_vec  1
-#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_roti_vec have_avx512vl
 #define TCG_TARGET_HAS_rots_vec 0
 #define TCG_TARGET_HAS_rotv_vec 0
 #define TCG_TARGET_HAS_shi_vec  1
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index de01fbf40c..3a9f6a3360 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -362,7 +362,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
 #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
 #define OPC_PSHIFTW_Ib  (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
-#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
 #define OPC_PSHIFTQ_Ib  (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
 #define OPC_PSLLW   (0xf1 | P_EXT | P_DATA16)
 #define OPC_PSLLD   (0xf2 | P_EXT | P_DATA16)
@@ -3000,6 +3000,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 insn = shift_imm_insn[vece];
 }
 sub = 4;
+goto gen_shift;
+case INDEX_op_rotli_vec:
+insn = OPC_PSHIFTD_Ib | P_EVEX;  /* VPROL[DQ] */
+if (vece == MO_64) {
+insn |= P_VEXW;
+}
+sub = 1;
+goto gen_shift;
 gen_shift:
 tcg_debug_assert(vece != MO_8);
 if (type == TCG_TYPE_V256) {
@@ -3289,6 +3297,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_shli_vec:
 case INDEX_op_shri_vec:
 case INDEX_op_sari_vec:
+case INDEX_op_rotli_vec:
 case INDEX_op_x86_psrldq_vec:
 return C_O1_I1(x, x);
 
@@ -3310,11 +3319,13 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_xor_vec:
 case INDEX_op_andc_vec:
 return 1;
-case INDEX_op_rotli_vec:
 case INDEX_op_cmp_vec:
 case INDEX_op_cmpsel_vec:
 return -1;
 
+case INDEX_op_rotli_vec:
+return have_avx512vl && vece >= MO_32 ? 1 : -1;
+
 case INDEX_op_shli_vec:
 case INDEX_op_shri_vec:
 /* We must expand the operation for MO_8.  */
-- 
2.25.1

[PULL 09/30] tcg/i386: Implement avx512 variable shifts

2022-03-03 Thread Richard Henderson

AVX512VL has VPSRAVQ, and
AVX512BW has VPSLLVW, VPSRAVW, VPSRLVW.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 32 
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 6a53f378cc..055db88422 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -419,9 +419,13 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
 #define OPC_VPERMQ  (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
 #define OPC_VPERM2I128  (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
+#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
 #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
+#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
 #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
 #define OPC_VZEROUPPER  (0x77 | P_EXT)
@@ -2835,16 +2839,13 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
 };
 static int const shlv_insn[4] = {
-/* TODO: AVX512 adds support for MO_16.  */
-OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
+OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
 };
 static int const shrv_insn[4] = {
-/* TODO: AVX512 adds support for MO_16.  */
-OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
+OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ
 };
 static int const sarv_insn[4] = {
-/* TODO: AVX512 adds support for MO_16, MO_64.  */
-OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
+OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ
 };
 static int const shls_insn[4] = {
 OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
@@ -3335,9 +3336,24 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 
 case INDEX_op_shlv_vec:
 case INDEX_op_shrv_vec:
-return have_avx2 && vece >= MO_32;
+switch (vece) {
+case MO_16:
+return have_avx512bw;
+case MO_32:
+case MO_64:
+return have_avx2;
+}
+return 0;
 case INDEX_op_sarv_vec:
-return have_avx2 && vece == MO_32;
+switch (vece) {
+case MO_16:
+return have_avx512bw;
+case MO_32:
+return have_avx2;
+case MO_64:
+return have_avx512vl;
+}
+return 0;
 case INDEX_op_rotlv_vec:
 case INDEX_op_rotrv_vec:
 return have_avx2 && vece >= MO_32 ? -1 : 0;
-- 
2.25.1

[PULL 07/30] tcg/i386: Add tcg_out_evex_opc

2022-03-03 Thread Richard Henderson

The evex encoding is added here, for use in a subsequent patch.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 51 ++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 7516be5d5f..89497b2b45 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -262,6 +262,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define P_SIMDF30x2 /* 0xf3 opcode prefix */
 #define P_SIMDF20x4 /* 0xf2 opcode prefix */
 #define P_VEXL  0x8 /* Set VEX.L = 1 */
+#define P_EVEX  0x10/* Requires EVEX encoding */
 
 #define OPC_ARITH_EvIz (0x81)
 #define OPC_ARITH_EvIb (0x83)
@@ -626,9 +627,57 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, 
int v,
 tcg_out8(s, opc);
 }
 
+static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
+ int rm, int index)
+{
+/* The entire 4-byte evex prefix; with R' and V' set. */
+uint32_t p = 0x08041062;
+int mm, pp;
+
+tcg_debug_assert(have_avx512vl);
+
+/* EVEX.mm */
+if (opc & P_EXT3A) {
+mm = 3;
+} else if (opc & P_EXT38) {
+mm = 2;
+} else if (opc & P_EXT) {
+mm = 1;
+} else {
+g_assert_not_reached();
+}
+
+/* EVEX.pp */
+if (opc & P_DATA16) {
+pp = 1;  /* 0x66 */
+} else if (opc & P_SIMDF3) {
+pp = 2;  /* 0xf3 */
+} else if (opc & P_SIMDF2) {
+pp = 3;  /* 0xf2 */
+} else {
+pp = 0;
+}
+
+p = deposit32(p, 8, 2, mm);
+p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */
+p = deposit32(p, 14, 1, (index & 8) == 0);  /* EVEX.RXB.X */
+p = deposit32(p, 15, 1, (r & 8) == 0);  /* EVEX.RXB.R */
+p = deposit32(p, 16, 2, pp);
+p = deposit32(p, 19, 4, ~v);
+p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
+p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
+
+tcg_out32(s, p);
+tcg_out8(s, opc);
+}
+
 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
 {
-tcg_out_vex_opc(s, opc, r, v, rm, 0);
+if (opc & P_EVEX) {
+tcg_out_evex_opc(s, opc, r, v, rm, 0);
+} else {
+tcg_out_vex_opc(s, opc, r, v, rm, 0);
+}
 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 }
 
-- 
2.25.1

[PULL 10/30] tcg/i386: Implement avx512 scalar shift

2022-03-03 Thread Richard Henderson

AVX512VL has VPSRAQ.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 055db88422..1ef34f0b52 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -369,6 +369,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_PSLLQ   (0xf3 | P_EXT | P_DATA16)
 #define OPC_PSRAW   (0xe1 | P_EXT | P_DATA16)
 #define OPC_PSRAD   (0xe2 | P_EXT | P_DATA16)
+#define OPC_VPSRAQ  (0x72 | P_EXT | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_PSRLW   (0xd1 | P_EXT | P_DATA16)
 #define OPC_PSRLD   (0xd2 | P_EXT | P_DATA16)
 #define OPC_PSRLQ   (0xd3 | P_EXT | P_DATA16)
@@ -2854,7 +2855,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
 };
 static int const sars_insn[4] = {
-OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
+OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
 };
 static int const abs_insn[4] = {
 /* TODO: AVX512 adds support for MO_64.  */
@@ -3330,7 +3331,14 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_shrs_vec:
 return vece >= MO_16;
 case INDEX_op_sars_vec:
-return vece >= MO_16 && vece <= MO_32;
+switch (vece) {
+case MO_16:
+case MO_32:
+return 1;
+case MO_64:
+return have_avx512vl;
+}
+return 0;
 case INDEX_op_rotls_vec:
 return vece >= MO_16 ? -1 : 0;
 
-- 
2.25.1

[PULL 05/30] tcg/s390x: Implement vector NAND, NOR, EQV

2022-03-03 Thread Richard Henderson

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target.h |  6 +++---
 tcg/s390x/tcg-target.c.inc | 17 +
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 94ccb179b8..23e2063667 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -145,9 +145,9 @@ extern uint64_t s390_facilities[3];
 
 #define TCG_TARGET_HAS_andc_vec   1
 #define TCG_TARGET_HAS_orc_vecHAVE_FACILITY(VECTOR_ENH1)
-#define TCG_TARGET_HAS_nand_vec   0
-#define TCG_TARGET_HAS_nor_vec0
-#define TCG_TARGET_HAS_eqv_vec0
+#define TCG_TARGET_HAS_nand_vec   HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nor_vec1
+#define TCG_TARGET_HAS_eqv_vecHAVE_FACILITY(VECTOR_ENH1)
 #define TCG_TARGET_HAS_not_vec1
 #define TCG_TARGET_HAS_neg_vec1
 #define TCG_TARGET_HAS_abs_vec1
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index d56c1e51e4..6e65828c09 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -290,7 +290,9 @@ typedef enum S390Opcode {
 VRRc_VMXL   = 0xe7fd,
 VRRc_VN = 0xe768,
 VRRc_VNC= 0xe769,
+VRRc_VNN= 0xe76e,
 VRRc_VNO= 0xe76b,
+VRRc_VNX= 0xe76c,
 VRRc_VO = 0xe76a,
 VRRc_VOC= 0xe76f,
 VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
@@ -2805,6 +2807,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_xor_vec:
 tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
 break;
+case INDEX_op_nand_vec:
+tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
+break;
+case INDEX_op_nor_vec:
+tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
+break;
+case INDEX_op_eqv_vec:
+tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
+break;
 
 case INDEX_op_shli_vec:
 tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
@@ -2901,7 +2912,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_and_vec:
 case INDEX_op_andc_vec:
 case INDEX_op_bitsel_vec:
+case INDEX_op_eqv_vec:
+case INDEX_op_nand_vec:
 case INDEX_op_neg_vec:
+case INDEX_op_nor_vec:
 case INDEX_op_not_vec:
 case INDEX_op_or_vec:
 case INDEX_op_orc_vec:
@@ -3246,6 +3260,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_or_vec:
 case INDEX_op_orc_vec:
 case INDEX_op_xor_vec:
+case INDEX_op_nand_vec:
+case INDEX_op_nor_vec:
+case INDEX_op_eqv_vec:
 case INDEX_op_cmp_vec:
 case INDEX_op_mul_vec:
 case INDEX_op_rotlv_vec:
-- 
2.25.1

[PULL 04/30] tcg/ppc: Implement vector NAND, NOR, EQV

2022-03-03 Thread Richard Henderson

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.h |  6 +++---
 tcg/ppc/tcg-target.c.inc | 15 +++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 3e543161eb..e6cf72503f 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -162,9 +162,9 @@ extern bool have_vsx;
 
 #define TCG_TARGET_HAS_andc_vec 1
 #define TCG_TARGET_HAS_orc_vec  have_isa_2_07
-#define TCG_TARGET_HAS_nand_vec 0
-#define TCG_TARGET_HAS_nor_vec  0
-#define TCG_TARGET_HAS_eqv_vec  0
+#define TCG_TARGET_HAS_nand_vec have_isa_2_07
+#define TCG_TARGET_HAS_nor_vec  1
+#define TCG_TARGET_HAS_eqv_vec  have_isa_2_07
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  have_isa_3_00
 #define TCG_TARGET_HAS_abs_vec  0
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 69d22e08cb..1f3c5c171c 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3122,6 +3122,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_xor_vec:
 case INDEX_op_andc_vec:
 case INDEX_op_not_vec:
+case INDEX_op_nor_vec:
+case INDEX_op_eqv_vec:
+case INDEX_op_nand_vec:
 return 1;
 case INDEX_op_orc_vec:
 return have_isa_2_07;
@@ -3400,6 +3403,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_orc_vec:
 insn = VORC;
 break;
+case INDEX_op_nand_vec:
+insn = VNAND;
+break;
+case INDEX_op_nor_vec:
+insn = VNOR;
+break;
+case INDEX_op_eqv_vec:
+insn = VEQV;
+break;
 
 case INDEX_op_cmp_vec:
 switch (args[3]) {
@@ -3787,6 +3799,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_xor_vec:
 case INDEX_op_andc_vec:
 case INDEX_op_orc_vec:
+case INDEX_op_nor_vec:
+case INDEX_op_eqv_vec:
+case INDEX_op_nand_vec:
 case INDEX_op_cmp_vec:
 case INDEX_op_ssadd_vec:
 case INDEX_op_sssub_vec:
-- 
2.25.1

[PULL 03/30] tcg: Add opcodes for vector nand, nor, eqv

2022-03-03 Thread Richard Henderson

We've had placeholders for these opcodes for a while,
and should have support on ppc, s390x and avx512 hosts.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 include/tcg/tcg-opc.h|  3 +++
 include/tcg/tcg.h|  3 +++
 tcg/aarch64/tcg-target.h |  3 +++
 tcg/arm/tcg-target.h |  3 +++
 tcg/i386/tcg-target.h|  3 +++
 tcg/ppc/tcg-target.h |  3 +++
 tcg/s390x/tcg-target.h   |  3 +++
 tcg/optimize.c   | 12 ++--
 tcg/tcg-op-vec.c | 27 ++-
 tcg/tcg.c|  6 ++
 10 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 675873e200..dd444734d9 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -245,6 +245,9 @@ DEF(or_vec, 1, 2, 0, IMPLVEC)
 DEF(xor_vec, 1, 2, 0, IMPLVEC)
 DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
 DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
+DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
+DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
 DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
 
 DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 939041103e..73869fd9d0 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -183,6 +183,9 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_not_vec  0
 #define TCG_TARGET_HAS_andc_vec 0
 #define TCG_TARGET_HAS_orc_vec  0
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec  0
+#define TCG_TARGET_HAS_eqv_vec  0
 #define TCG_TARGET_HAS_roti_vec 0
 #define TCG_TARGET_HAS_rots_vec 0
 #define TCG_TARGET_HAS_rotv_vec 0
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 876af589ce..485f685bd2 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -131,6 +131,9 @@ typedef enum {
 
 #define TCG_TARGET_HAS_andc_vec 1
 #define TCG_TARGET_HAS_orc_vec  1
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec  0
+#define TCG_TARGET_HAS_eqv_vec  0
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  1
 #define TCG_TARGET_HAS_abs_vec  1
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 27c27a1f14..7e96495392 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -130,6 +130,9 @@ extern bool use_neon_instructions;
 
 #define TCG_TARGET_HAS_andc_vec 1
 #define TCG_TARGET_HAS_orc_vec  1
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec  0
+#define TCG_TARGET_HAS_eqv_vec  0
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  1
 #define TCG_TARGET_HAS_abs_vec  1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 3b2c9437a0..ecd0fa6e05 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -185,6 +185,9 @@ extern bool have_movbe;
 
 #define TCG_TARGET_HAS_andc_vec 1
 #define TCG_TARGET_HAS_orc_vec  0
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec  0
+#define TCG_TARGET_HAS_eqv_vec  0
 #define TCG_TARGET_HAS_not_vec  0
 #define TCG_TARGET_HAS_neg_vec  0
 #define TCG_TARGET_HAS_abs_vec  1
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index c775c97b61..3e543161eb 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -162,6 +162,9 @@ extern bool have_vsx;
 
 #define TCG_TARGET_HAS_andc_vec 1
 #define TCG_TARGET_HAS_orc_vec  have_isa_2_07
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec  0
+#define TCG_TARGET_HAS_eqv_vec  0
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  have_isa_3_00
 #define TCG_TARGET_HAS_abs_vec  0
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 69217d995b..94ccb179b8 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -145,6 +145,9 @@ extern uint64_t s390_facilities[3];
 
 #define TCG_TARGET_HAS_andc_vec   1
 #define TCG_TARGET_HAS_orc_vecHAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nand_vec   0
+#define TCG_TARGET_HAS_nor_vec0
+#define TCG_TARGET_HAS_eqv_vec0
 #define TCG_TARGET_HAS_not_vec1
 #define TCG_TARGET_HAS_neg_vec1
 #define TCG_TARGET_HAS_abs_vec1
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 06213fd434..ae081ab29c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -359,13 +359,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, 
uint64_t x, uint64_t y)
 CASE_OP_32_64_VEC(orc):
 return x | ~y;
 
-CASE_OP_32_64(eqv):
+

[PULL 01/30] tcg/optimize: only read val after const check

2022-03-03 Thread Richard Henderson

From: Alex Bennée 

valgrind pointed out that arg_info()->val can be undefined which will
be the case if the arguments are not constant. The ordering of the
checks will have ensured we never relied on an undefined value but for
the sake of completeness re-order the code to be clear.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20220209112142.3367525-1-alex.ben...@linaro.org>
Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index e573000951..06213fd434 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -552,10 +552,10 @@ static bool do_constant_folding_cond_eq(TCGCond c)
 static int do_constant_folding_cond(TCGType type, TCGArg x,
 TCGArg y, TCGCond c)
 {
-uint64_t xv = arg_info(x)->val;
-uint64_t yv = arg_info(y)->val;
-
 if (arg_is_const(x) && arg_is_const(y)) {
+uint64_t xv = arg_info(x)->val;
+uint64_t yv = arg_info(y)->val;
+
 switch (type) {
 case TCG_TYPE_I32:
 return do_constant_folding_cond_32(xv, yv, c);
@@ -567,7 +567,7 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
 }
 } else if (args_are_copies(x, y)) {
 return do_constant_folding_cond_eq(c);
-} else if (arg_is_const(y) && yv == 0) {
+} else if (arg_is_const(y) && arg_info(y)->val == 0) {
 switch (c) {
 case TCG_COND_LTU:
 return 0;
-- 
2.25.1

[PATCH 2/5] python/utils: add VerboseProcessError

2022-03-03 Thread John Snow

This adds an Exception that extends the Python stdlib
subprocess.CalledProcessError.

The difference is that the str() method of this exception also adds the
stdout/stderr logs. In effect, if this exception goes unhandled, Python
will print the output in a visually distinct wrapper to the terminal so
that it's easy to spot in a sea of traceback information.

Signed-off-by: John Snow 
---
 python/qemu/utils/__init__.py | 36 +++
 1 file changed, 36 insertions(+)

diff --git a/python/qemu/utils/__init__.py b/python/qemu/utils/__init__.py
index 5babf40df2..355ac550bc 100644
--- a/python/qemu/utils/__init__.py
+++ b/python/qemu/utils/__init__.py
@@ -18,6 +18,7 @@
 import os
 import re
 import shutil
+from subprocess import CalledProcessError
 import textwrap
 from typing import Optional
 
@@ -26,6 +27,7 @@
 
 
 __all__ = (
+'VerboseProcessError',
 'add_visual_margin',
 'get_info_usernet_hostfwd_port',
 'kvm_available',
@@ -121,3 +123,37 @@ def _wrap(line: str) -> str:
 os.linesep.join(_wrap(line) for line in content.splitlines()),
 _bar(None, top=False),
 ))
+
+
+class VerboseProcessError(CalledProcessError):
+"""
+The same as CalledProcessError, but more verbose.
+
+This is useful for debugging failed calls during test executions.
+The return code, signal (if any), and terminal output will be displayed
+on unhandled exceptions.
+"""
+def summary(self) -> str:
+"""Return the normal CalledProcessError str() output."""
+return super().__str__()
+
+def __str__(self) -> str:
+lmargin = '  '
+width = -len(lmargin)
+sections = []
+
+name = 'output' if self.stderr is None else 'stdout'
+if self.stdout:
+sections.append(add_visual_margin(self.stdout, width, name))
+else:
+sections.append(f"{name}: N/A")
+
+if self.stderr:
+sections.append(add_visual_margin(self.stderr, width, 'stderr'))
+elif self.stderr is not None:
+sections.append("stderr: N/A")
+
+return os.linesep.join((
+self.summary(),
+textwrap.indent(os.linesep.join(sections), prefix=lmargin),
+))
-- 
2.34.1

[PULL 02/30] tcg: Set MAX_OPC_PARAM_IARGS to 7

2022-03-03 Thread Richard Henderson

From: Ziqiao Kong 

The last entry of DEF_HELPERS_FLAGS_n is DEF_HELPER_FLAGS_7 and
thus the MAX_OPC_PARAM_IARGS should be 7.

Reviewed-by: Taylor Simpson 
Signed-off-by: Ziqiao Kong 
Message-Id: <20220227113127.414533-2-ziqiaok...@gmail.com>
Fixes: e6cadf49c3d ("tcg: Add support for a helper with 7 arguments")
Signed-off-by: Richard Henderson 
---
 include/tcg/tcg.h| 2 +-
 tcg/tci/tcg-target.c.inc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 42f5b500ed..939041103e 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -43,7 +43,7 @@
 #else
 #define MAX_OPC_PARAM_PER_ARG 1
 #endif
-#define MAX_OPC_PARAM_IARGS 6
+#define MAX_OPC_PARAM_IARGS 7
 #define MAX_OPC_PARAM_OARGS 1
 #define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
 
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 9ff1fa0832..98337c567a 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -197,7 +197,7 @@ static const int tcg_target_reg_alloc_order[] = {
 TCG_REG_R0,
 };
 
-#if MAX_OPC_PARAM_IARGS != 6
+#if MAX_OPC_PARAM_IARGS != 7
 # error Fix needed, number of supported input arguments changed!
 #endif
 
-- 
2.25.1

[PULL 00/30] tcg patch queue

2022-03-03 Thread Richard Henderson

The following changes since commit 36eae3a732a1f2aa81391e871ac0e9bb3233e7d7:

  Merge remote-tracking branch 
'remotes/dgilbert-gitlab/tags/pull-migration-20220302b' into staging 
(2022-03-02 20:55:48 +)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220303

for you to fetch changes up to f23e6de25c31cadd9a3b7122f9384e6b259ce37f:

  tcg/loongarch64: Support TCG_TARGET_SIGNED_ADDR32 (2022-03-03 10:47:20 -1000)


Reorder do_constant_folding_cond test to satisfy valgrind.
Fix value of MAX_OPC_PARAM_IARGS.
Add opcodes for vector nand, nor, eqv.
Support vector nand, nor, eqv on PPC and S390X hosts.
Support AVX512VL, AVX512BW, AVX512DQ, and AVX512VBMI2.
Support 32-bit guest addresses as signed values.


Alex Bennée (1):
  tcg/optimize: only read val after const check

Richard Henderson (28):
  tcg: Add opcodes for vector nand, nor, eqv
  tcg/ppc: Implement vector NAND, NOR, EQV
  tcg/s390x: Implement vector NAND, NOR, EQV
  tcg/i386: Detect AVX512
  tcg/i386: Add tcg_out_evex_opc
  tcg/i386: Use tcg_can_emit_vec_op in expand_vec_cmp_noinv
  tcg/i386: Implement avx512 variable shifts
  tcg/i386: Implement avx512 scalar shift
  tcg/i386: Implement avx512 immediate sari shift
  tcg/i386: Implement avx512 immediate rotate
  tcg/i386: Implement avx512 variable rotate
  tcg/i386: Support avx512vbmi2 vector shift-double instructions
  tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double
  tcg/i386: Remove rotls_vec from tcg_target_op_def
  tcg/i386: Expand scalar rotate with avx512 insns
  tcg/i386: Implement avx512 min/max/abs
  tcg/i386: Implement avx512 multiply
  tcg/i386: Implement more logical operations for avx512
  tcg/i386: Implement bitsel for avx512
  tcg: Add TCG_TARGET_SIGNED_ADDR32
  accel/tcg: Split out g2h_tlbe
  accel/tcg: Support TCG_TARGET_SIGNED_ADDR32 for softmmu
  accel/tcg: Add guest_base_signed_addr32 for user-only
  linux-user: Support TCG_TARGET_SIGNED_ADDR32
  tcg/aarch64: Support TCG_TARGET_SIGNED_ADDR32
  tcg/mips: Support TCG_TARGET_SIGNED_ADDR32
  tcg/riscv: Support TCG_TARGET_SIGNED_ADDR32
  tcg/loongarch64: Support TCG_TARGET_SIGNED_ADDR32

Ziqiao Kong (1):
  tcg: Set MAX_OPC_PARAM_IARGS to 7

 include/exec/cpu-all.h|  20 +-
 include/exec/cpu_ldst.h   |   3 +-
 include/qemu/cpuid.h  |  20 +-
 include/tcg/tcg-opc.h |   3 +
 include/tcg/tcg.h |   5 +-
 tcg/aarch64/tcg-target-sa32.h |   7 +
 tcg/aarch64/tcg-target.h  |   3 +
 tcg/arm/tcg-target-sa32.h |   1 +
 tcg/arm/tcg-target.h  |   3 +
 tcg/i386/tcg-target-con-set.h |   1 +
 tcg/i386/tcg-target-sa32.h|   1 +
 tcg/i386/tcg-target.h |  17 +-
 tcg/i386/tcg-target.opc.h |   3 +
 tcg/loongarch64/tcg-target-sa32.h |   1 +
 tcg/mips/tcg-target-sa32.h|   9 +
 tcg/ppc/tcg-target-sa32.h |   1 +
 tcg/ppc/tcg-target.h  |   3 +
 tcg/riscv/tcg-target-sa32.h   |   5 +
 tcg/s390x/tcg-target-sa32.h   |   1 +
 tcg/s390x/tcg-target.h|   3 +
 tcg/sparc/tcg-target-sa32.h   |   1 +
 tcg/tci/tcg-target-sa32.h |   1 +
 accel/tcg/cputlb.c|  36 ++--
 bsd-user/main.c   |   4 +
 linux-user/elfload.c  |  62 --
 linux-user/main.c |   3 +
 tcg/optimize.c|  20 +-
 tcg/tcg-op-vec.c  |  27 ++-
 tcg/tcg.c |  10 +
 tcg/aarch64/tcg-target.c.inc  |  81 +---
 tcg/i386/tcg-target.c.inc | 387 +++---
 tcg/loongarch64/tcg-target.c.inc  |  15 +-
 tcg/mips/tcg-target.c.inc |  10 +-
 tcg/ppc/tcg-target.c.inc  |  15 ++
 tcg/riscv/tcg-target.c.inc|   8 +-
 tcg/s390x/tcg-target.c.inc|  17 ++
 tcg/tci/tcg-target.c.inc  |   2 +-
 37 files changed, 640 insertions(+), 169 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-sa32.h
 create mode 100644 tcg/arm/tcg-target-sa32.h
 create mode 100644 tcg/i386/tcg-target-sa32.h
 create mode 100644 tcg/loongarch64/tcg-target-sa32.h
 create mode 100644 tcg/mips/tcg-target-sa32.h
 create mode 100644 tcg/ppc/tcg-target-sa32.h
 create mode 100644 tcg/riscv/tcg-target-sa32.h
 create mode 100644 tcg/s390x/tcg-target-sa32.h
 create mode 100644 tcg/sparc/tcg-target-sa32.h
 create mode 100644 tcg/tci/tcg-target-sa32.h

[PATCH 1/5] python/utils: add add_visual_margin() text decoration utility

2022-03-03 Thread John Snow

>>> print(add_visual_margin(msg, width=72, name="Commit Message"))
┏━ Commit Message ━━
┃ add_visual_margin() takes a chunk of text and wraps it in a visual
┃ container that force-wraps to a specified width. An optional title
┃ label may be given, and any of the individual glyphs used to draw the
┃ box may be replaced or specified as well.
┗━━━

Signed-off-by: John Snow 
---
 python/qemu/utils/__init__.py | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/python/qemu/utils/__init__.py b/python/qemu/utils/__init__.py
index 7f1a5138c4..5babf40df2 100644
--- a/python/qemu/utils/__init__.py
+++ b/python/qemu/utils/__init__.py
@@ -15,7 +15,10 @@
 # the COPYING file in the top-level directory.
 #
 
+import os
 import re
+import shutil
+import textwrap
 from typing import Optional
 
 # pylint: disable=import-error
@@ -23,6 +26,7 @@
 
 
 __all__ = (
+'add_visual_margin',
 'get_info_usernet_hostfwd_port',
 'kvm_available',
 'list_accel',
@@ -43,3 +47,77 @@ def get_info_usernet_hostfwd_port(info_usernet_output: str) 
-> Optional[int]:
 if match is not None:
 return int(match[1])
 return None
+
+
+# pylint: disable=too-many-arguments
+def add_visual_margin(
+content: str = '',
+width: Optional[int] = None,
+name: Optional[str] = None,
+padding: int = 1,
+upper_left: str = '┏',
+lower_left: str = '┗',
+horizontal: str = '━',
+vertical: str = '┃',
+) -> str:
+"""
+Decorate and wrap some text with a visual decoration around it.
+
+This function assumes that the text decoration characters are single
+characters that display using a single monospace column.
+
+┏━ Example ━
+┃ This is what this function looks like with text content that's
+┃ wrapped to 72 characters. The right-hand margin is left open to
+┃ acommodate the occasional unicode character that might make
+┃ predicting the total "visual" width of a line difficult. This
+┃ provides a visual distinction that's good-enough, though.
+┗━━━
+
+:param content: The text to wrap and decorate.
+:param width:
+The number of columns to use, including for the decoration
+itself. The default (None) uses the the available width of the
+current terminal, or a fallback of 72 lines. A negative number
+subtracts a fixed-width from the default size. The default obeys
+the COLUMNS environment variable, if set.
+:param name: A label to apply to the upper-left of the box.
+:param padding: How many columns of padding to apply inside.
+:param upper_left: Upper-left single-width text decoration character.
+:param lower_left: Lower-left single-width text decoration character.
+:param horizontal: Horizontal single-width text decoration character.
+:param vertical: Vertical single-width text decoration character.
+"""
+if width is None or width < 0:
+avail = shutil.get_terminal_size(fallback=(72, 24))[0]
+if width is None:
+_width = avail
+else:
+_width = avail + width
+else:
+_width = width
+
+prefix = vertical + (' ' * padding)
+
+def _bar(name: Optional[str], top: bool = True) -> str:
+ret = upper_left if top else lower_left
+if name is not None:
+ret += f"{horizontal} {name} "
+
+filler_len = _width - len(ret)
+ret += f"{horizontal * filler_len}"
+return ret
+
+def _wrap(line: str) -> str:
+return os.linesep.join(
+textwrap.wrap(
+line, width=_width - padding, initial_indent=prefix,
+subsequent_indent=prefix, replace_whitespace=False,
+drop_whitespace=True, break_on_hyphens=False)
+)
+
+return os.linesep.join((
+_bar(name, top=True),
+os.linesep.join(_wrap(line) for line in content.splitlines()),
+_bar(None, top=False),
+))
-- 
2.34.1

[PATCH 5/5] iotests: fortify compare_images() against crashes

2022-03-03 Thread John Snow

Fority compare_images() to be more discerning about the status codes it
receives. If qemu_img() returns an exit code that implies it didn't
actually perform the comparison, treat that as an exceptional
circumstance and force the caller to be aware of the peril.

If a negative test is desired (Perhaps to test how qemu_img compare
behaves on malformed images, for instance), it is still possible to
catch the exception in the test and deal with that circumstance
manually.

Signed-off-by: John Snow 
---
 tests/qemu-iotests/iotests.py | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f6c0f1b0a0..654ce834e6 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -474,11 +474,22 @@ def qemu_nbd_popen(*args):
 p.kill()
 p.wait()
 
-def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
-'''Return True if two image files are identical'''
-res = qemu_img('compare', '-f', fmt1,
-   '-F', fmt2, img1, img2, check=False)
-return res.returncode == 0
+def compare_images(img1: str, img2: str,
+   fmt1: str = imgfmt, fmt2: str = imgfmt) -> bool:
+"""
+Compare two images with QEMU_IMG; return True if they are identical.
+
+:raise CalledProcessError:
+when qemu-img crashes or returns a status code of anything other
+than 0 (identical) or 1 (different).
+"""
+try:
+qemu_img('compare', '-f', fmt1, '-F', fmt2, img1, img2)
+return True
+except subprocess.CalledProcessError as exc:
+if exc.returncode == 1:
+return False
+raise
 
 def create_image(name, size):
 '''Create a fully-allocated raw image with sector markers'''
-- 
2.34.1

[PATCH 4/5] iotests: make qemu_img raise on non-zero rc by default

2022-03-03 Thread John Snow

re-write qemu_img() as a function that will by default raise a
VerboseProcessException (extended from CalledProcessException) on
non-zero return codes. This will produce a stack trace that will show
the command line arguments and return code from the failed process run.

Users that want something more flexible (there appears to be only one)
can use check=False and manage the return themselves. However, when the
return code is negative, the Exception will be raised no matter what.
This is done under the belief that there's no legitimate reason, even in
negative tests, to see a crash from qemu-img.

Signed-off-by: John Snow 
---
 tests/qemu-iotests/257|  8 --
 tests/qemu-iotests/iotests.py | 54 +++
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/tests/qemu-iotests/257 b/tests/qemu-iotests/257
index fb5359c581..e7e7a2317e 100755
--- a/tests/qemu-iotests/257
+++ b/tests/qemu-iotests/257
@@ -241,11 +241,13 @@ def compare_images(image, reference, baseimg=None, 
expected_match=True):
 expected_ret = 0 if expected_match else 1
 if baseimg:
 qemu_img("rebase", "-u", "-b", baseimg, '-F', iotests.imgfmt, image)
-ret = qemu_img("compare", image, reference)
+
+sub = qemu_img("compare", image, reference, check=False)
+
 log('qemu_img compare "{:s}" "{:s}" ==> {:s}, {:s}'.format(
 image, reference,
-"Identical" if ret == 0 else "Mismatch",
-"OK!" if ret == expected_ret else "ERROR!"),
+"Identical" if sub.returncode == 0 else "Mismatch",
+"OK!" if sub.returncode == expected_ret else "ERROR!"),
 filters=[iotests.filter_testfiles])
 
 def test_bitmap_sync(bsync_mode, msync_mode='bitmap', failure=None):
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6ba65eb1ff..f6c0f1b0a0 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -39,6 +39,7 @@
 
 from qemu.machine import qtest
 from qemu.qmp import QMPMessage
+from qemu.utils import VerboseProcessError
 
 # Use this logger for logging messages directly from the iotests module
 logger = logging.getLogger('qemu.iotests')
@@ -215,9 +216,49 @@ def qemu_img_pipe_and_status(*args: str) -> Tuple[str, 
int]:
 return qemu_tool_pipe_and_status('qemu-img', full_args,
  drop_successful_output=is_create)
 
-def qemu_img(*args: str) -> int:
-'''Run qemu-img and return the exit code'''
-return qemu_img_pipe_and_status(*args)[1]
+def qemu_img(*args: str, check: bool = True, combine_stdio: bool = True
+ ) -> subprocess.CompletedProcess[str]:
+"""
+Run qemu_img and return the status code and console output.
+
+This function always prepends QEMU_IMG_OPTIONS and may further alter
+the args for 'create' commands.
+
+:param args: command-line arguments to qemu-img.
+:param check: Enforce a return code of zero.
+:param combine_stdio: set to False to keep stdout/stderr separated.
+
+:raise VerboseProcessError:
+When the return code is negative, or on any non-zero exit code
+when 'check=True' was provided (the default). This exception has
+'stdout', 'stderr', and 'returncode' properties that may be
+inspected to show greater detail. If this exception is not
+handled, the command-line, return code, and all console output
+will be included at the bottom of the stack trace.
+
+:return: a CompletedProcess. This object has args, returncode, and
+stdout properties. If streams are not combined, it will also
+have a stderr property.
+"""
+full_args = qemu_img_args + qemu_img_create_prepare_args(list(args))
+
+subp = subprocess.run(
+full_args,
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT if combine_stdio else subprocess.PIPE,
+universal_newlines=True,
+check=False
+)
+
+if check and subp.returncode or (subp.returncode < 0):
+raise VerboseProcessError(
+subp.returncode, full_args,
+output=subp.stdout,
+stderr=subp.stderr,
+)
+
+return subp
+
 
 def ordered_qmp(qmsg, conv_keys=True):
 # Dictionaries are not ordered prior to 3.6, therefore:
@@ -232,7 +273,7 @@ def ordered_qmp(qmsg, conv_keys=True):
 return od
 return qmsg
 
-def qemu_img_create(*args):
+def qemu_img_create(*args: str) -> subprocess.CompletedProcess[str]:
 return qemu_img('create', *args)
 
 def qemu_img_measure(*args):
@@ -435,8 +476,9 @@ def qemu_nbd_popen(*args):
 
 def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
 '''Return True if two image files are identical'''
-return qemu_img('compare', '-f', fmt1,
-'-F', fmt2, img1, img2) == 0
+res = qemu_img('compare', '-f', fmt1,
+   '-F', fmt2, img1, img2, check=False)
+return res.returncode == 0
 
 def create_image(name, size):
 '''Create a

[PATCH 3/5] iotests: Remove explicit checks for qemu_img() == 0

2022-03-03 Thread John Snow

qemu_img() returning zero ought to be the rule, not the
exception. Remove all explicit checks against the condition in
preparation for making non-zero returns an Exception.

Signed-off-by: John Snow 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/163 |  9 +++--
 tests/qemu-iotests/216 |  6 +++---
 tests/qemu-iotests/218 |  2 +-
 tests/qemu-iotests/224 | 11 +--
 tests/qemu-iotests/228 | 12 ++--
 tests/qemu-iotests/257 |  3 +--
 tests/qemu-iotests/258 |  4 ++--
 tests/qemu-iotests/310 | 14 +++---
 tests/qemu-iotests/tests/block-status-cache|  3 +--
 tests/qemu-iotests/tests/image-fleecing|  4 ++--
 tests/qemu-iotests/tests/mirror-ready-cancel-error |  6 ++
 tests/qemu-iotests/tests/mirror-top-perms  |  3 +--
 .../qemu-iotests/tests/remove-bitmap-from-backing  |  8 
 tests/qemu-iotests/tests/stream-error-on-reset |  4 ++--
 14 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/tests/qemu-iotests/163 b/tests/qemu-iotests/163
index b8bfc95358..e4cd4b230f 100755
--- a/tests/qemu-iotests/163
+++ b/tests/qemu-iotests/163
@@ -107,8 +107,7 @@ class ShrinkBaseClass(iotests.QMPTestCase):
 
 if iotests.imgfmt == 'raw':
 return
-self.assertEqual(qemu_img('check', test_img), 0,
- "Verifying image corruption")
+qemu_img('check', test_img)
 
 def test_empty_image(self):
 qemu_img('resize',  '-f', iotests.imgfmt, '--shrink', test_img,
@@ -130,8 +129,7 @@ class ShrinkBaseClass(iotests.QMPTestCase):
 qemu_img('resize',  '-f', iotests.imgfmt, '--shrink', test_img,
  self.shrink_size)
 
-self.assertEqual(qemu_img("compare", test_img, check_img), 0,
- "Verifying image content")
+qemu_img("compare", test_img, check_img)
 
 self.image_verify()
 
@@ -146,8 +144,7 @@ class ShrinkBaseClass(iotests.QMPTestCase):
 qemu_img('resize',  '-f', iotests.imgfmt, '--shrink', test_img,
  self.shrink_size)
 
-self.assertEqual(qemu_img("compare", test_img, check_img), 0,
- "Verifying image content")
+qemu_img("compare", test_img, check_img)
 
 self.image_verify()
 
diff --git a/tests/qemu-iotests/216 b/tests/qemu-iotests/216
index c02f8d2880..88b385afa3 100755
--- a/tests/qemu-iotests/216
+++ b/tests/qemu-iotests/216
@@ -51,10 +51,10 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Setting up images ---')
 log('')
 
-assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M')
 assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
-assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
-'-F', iotests.imgfmt, top_img_path) == 0
+qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+ '-F', iotests.imgfmt, top_img_path)
 assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
 
 log('Done')
diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
index 4922b4d3b6..853ed52b34 100755
--- a/tests/qemu-iotests/218
+++ b/tests/qemu-iotests/218
@@ -145,7 +145,7 @@ log('')
 with iotests.VM() as vm, \
  iotests.FilePath('src.img') as src_img_path:
 
-assert qemu_img('create', '-f', iotests.imgfmt, src_img_path, '64M') == 0
+qemu_img('create', '-f', iotests.imgfmt, src_img_path, '64M')
 assert qemu_io_silent('-f', iotests.imgfmt, src_img_path,
   '-c', 'write -P 42 0M 64M') == 0
 
diff --git a/tests/qemu-iotests/224 b/tests/qemu-iotests/224
index 38dd153625..c31c55b49d 100755
--- a/tests/qemu-iotests/224
+++ b/tests/qemu-iotests/224
@@ -47,12 +47,11 @@ for filter_node_name in False, True:
  iotests.FilePath('top.img') as top_img_path, \
  iotests.VM() as vm:
 
-assert qemu_img('create', '-f', iotests.imgfmt,
-base_img_path, '64M') == 0
-assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
-'-F', iotests.imgfmt, mid_img_path) == 0
-assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
-'-F', iotests.imgfmt, top_img_path) == 0
+qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M')
+qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+ '-F', iotests.imgfmt, mid_img_path)
+qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+ '-F', iotests.imgfmt, top_img_path)
 
 # Something to commit
 assert qemu_io_silent(mid_img_path, '-c', 'write -P 1 0 1M') == 0
diff

[PATCH 0/5] iotests: add enhanced debugging info to qemu-img failures

2022-03-03 Thread John Snow

This is kinda-sorta V3-ish of a series I started in response to Thomas
Huth's encountering a failure in qemu-img because of missing zstd
support. This series changes the qemu_img() function in iotests.py to
one that raises an Exception on non-zero return code by default.

Alongside this, the Exception object itself is also augmented so that it
prints the stdout/stderr logs to screen if the exception goes unhandled
so that failure cases are very obvious and easy to spot in the middle of
python tracebacks.

(Test this out yourself: Disable zstd support and then run qcow2 iotest
065 before and after this patchset. It makes a real difference!)

NOTE: I have another 13-ish patches that go the rest of the way and
ensure that *every* call to qemu-img goes through this new qemu_img()
function, but for the sake of doing the most good in the shortest amount
of time, I am sending just the first 5 patches, and the rest will be
sent later. I think this is a very good series to get in before freeze
so that we have it during the heavy testing season.

John Snow (5):
  python/utils: add add_visual_margin() text decoration utility
  python/utils: add VerboseProcessError
  iotests: Remove explicit checks for qemu_img() == 0
  iotests: make qemu_img raise on non-zero rc by default
  iotests: fortify compare_images() against crashes

 python/qemu/utils/__init__.py | 114 ++
 tests/qemu-iotests/163|   9 +-
 tests/qemu-iotests/216|   6 +-
 tests/qemu-iotests/218|   2 +-
 tests/qemu-iotests/224|  11 +-
 tests/qemu-iotests/228|  12 +-
 tests/qemu-iotests/257|  11 +-
 tests/qemu-iotests/258|   4 +-
 tests/qemu-iotests/310|  14 +--
 tests/qemu-iotests/iotests.py |  69 +--
 tests/qemu-iotests/tests/block-status-cache   |   3 +-
 tests/qemu-iotests/tests/image-fleecing   |   4 +-
 .../tests/mirror-ready-cancel-error   |   6 +-
 tests/qemu-iotests/tests/mirror-top-perms |   3 +-
 .../tests/remove-bitmap-from-backing  |   8 +-
 .../qemu-iotests/tests/stream-error-on-reset  |   4 +-
 16 files changed, 220 insertions(+), 60 deletions(-)

-- 
2.34.1

Re: [PATCH] docs/system: riscv: Update description of CPU

2022-03-03 Thread Palmer Dabbelt


On Tue, 08 Feb 2022 05:07:23 PST (-0800), liyu.yukit...@bytedance.com wrote:

Since the hypervisor extension been non experimental and enabled for
default CPU, the previous command is no longer available and the
option `x-h=true` or `h=true` is also no longer required.

Signed-off-by: Yu Li 
---
  docs/system/riscv/virt.rst | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
index fa016584bf..08ce3c4177 100644
--- a/docs/system/riscv/virt.rst
+++ b/docs/system/riscv/virt.rst
@@ -23,9 +23,9 @@ The ``virt`` machine supports the following devices:
  * 1 generic PCIe host bridge
  * The fw_cfg device that allows a guest to obtain data from QEMU

-Note that the default CPU is a generic RV32GC/RV64GC. Optional extensions
-can be enabled via command line parameters, e.g.: ``-cpu rv64,x-h=true``
-enables the hypervisor extension for RV64.
+The hypervisor extension has been enabled for the default CPU, so virtual
+machines with hypervisor extension can simply be used without explicitly
+declaring.

  Hardware configuration information
  --


I saw this landed, but I'm not sure the wording is quite right.  I think 
it's at least missing what is being declared, so something like this


   diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
   index 08ce3c4177..fe40d6565a 100644
   --- a/docs/system/riscv/virt.rst
   +++ b/docs/system/riscv/virt.rst
   @@ -24,8 +24,8 @@ The ``virt`` machine supports the following devices:
* The fw_cfg device that allows a guest to obtain data from QEMU
   
The hypervisor extension has been enabled for the default CPU, so virtual

   -machines with hypervisor extension can simply be used without explicitly
   -declaring.
   +machines with the hypervisor extension can simply be used without explicitly
   +declaring it as enabled.
   
Hardware configuration information

--

That said, I think we'd be better off just picking a different example 
extension to enable -- IMO this was less about the hypervisor extension 
and more about the syntax for enabling extensions, that's certianly what 
I end up looking at the docs for.  So IMO we'd be better off with 
something like


   diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
   index 08ce3c4177..f59e8777b2 100644
   --- a/docs/system/riscv/virt.rst
   +++ b/docs/system/riscv/virt.rst
   @@ -23,9 +23,9 @@ The ``virt`` machine supports the following devices:
* 1 generic PCIe host bridge
* The fw_cfg device that allows a guest to obtain data from QEMU
   
   -The hypervisor extension has been enabled for the default CPU, so virtual

   -machines with hypervisor extension can simply be used without explicitly
   -declaring.
   +Note that the default CPU is a generic RV32GC/RV64GC. Optional extensions
   +can be enabled via command line parameters, e.g.: ``-cpu rv64,c=false``
   +disables the C extension.
   
Hardware configuration information

--

Re: [PATCH v2 5/5] tests/tcg/ppc64le: Use Altivec register names in clobbler list

2022-03-03 Thread Matheus K. Ferst


On 03/03/2022 16:30, Richard Henderson wrote:

On 3/3/22 07:20, matheus.fe...@eldorado.org.br wrote:

From: Matheus Ferst 

LLVM/Clang doesn't know the VSX registers when compiling with
-mabi=elfv1. Use only registers >= 32 and list them with their Altivec
name.

Signed-off-by: Matheus Ferst 


This description isn't quite right.  The change to the m[tf]vsr insns is 
a generic bug

fix, and not related to Clang.



I'm not sure if I understood. I'm targeting the Clang problem with this 
patch, is something else being fixed by this change?
AFAICT, the old "mtvsrd 0, %2" and "mfvsrd %0, 0" were correct, I'm just 
changing from VSR 0 to VSR 32 to allow the clobber with Clang, but GCC 
doesn't seem to have this limitation with ELFv1.


Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO 
Analista de Software
Aviso Legal - Disclaimer

Re: [PATCH v2 2/5] target/ppc: change xs[n]madd[am]sp to use float64r32_muladd

2022-03-03 Thread Matheus K. Ferst

On 03/03/2022 15:49, Richard Henderson wrote:

On 3/3/22 07:20, matheus.fe...@eldorado.org.br wrote:

From: Matheus Ferst 

Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision
helpers to use float64r32_muladd. This method should correctly handle
all rounding modes, so the workaround for float_round_nearest_even can
be dropped.

Reviewed-by: Richard Henderson 
Signed-off-by: Matheus Ferst 
---
  target/ppc/fpu_helper.c | 93 -
  1 file changed, 35 insertions(+), 58 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 8f970288f5..c973968ed6 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -1916,22 +1916,19 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t 
opcode,

   *   fld   - vsr_t field (VsrD(*) or VsrW(*))
   *   sfprf - set FPRF
   */
-#define VSX_RE(op, nels, tp, fld, sfprf, 
r2sp)    \
+#define VSX_RE(op, nels, tp, op_tp, fld, 
sfprf)   \
  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t 
*xb)  \

{ 
\
  ppc_vsr_t t = { 
};    \
-    int 
i;    \
+    int i, 
flags; \

\

helper_reset_fpstatus(env);   
\

\
  for (i = 0; i < nels; i++) 
{  \
-    if (unlikely(tp##_is_signaling_nan(xb->fld, 
>fp_status))) {  \
+    t.fld = op_tp##_div(tp##_one, xb->fld, 
>fp_status);  \
+    flags = 
get_float_exception_flags(>fp_status);   \

+    if (unlikely(flags & float_flag_invalid_snan)) {

You seem to have squashed the change to recip-estimate into the same 
patch as muladd.

-#define VSX_RSQRTE(op, nels, tp, fld, sfprf, 
r2sp)   \
+#define VSX_RSQRTE(op, nels, tp, op_tp, fld, 
sfprf)  \

And recip-sqrt-estimate.

I guess it's ok to squash, since it's all related, but you should update 
the patch

description if you leave it this way.

Sorry, I cherry-picked the wrong branch. This patch should just be a 
rebase of v1. I'll send the changes to 
VSX_{ADD_SUB,MUL,DIV,RE,SQRT,RSQRTE} in a separate patch series since 
it's not test-related.

Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO 
Analista de Software
Aviso Legal - Disclaimer

[PULL 5/7] target/nios2: Split mmu_write

2022-03-03 Thread Richard Henderson

Create three separate functions for the three separate registers.
Avoid extra dispatch through op_helper.c.
Dispatch to the correct function in translation.
Clean up the ifdefs in wrctl.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/nios2/helper.h|   4 +-
 target/nios2/mmu.c   | 180 +++
 target/nios2/op_helper.c |   5 --
 target/nios2/translate.c |  26 +++---
 4 files changed, 104 insertions(+), 111 deletions(-)

diff --git a/target/nios2/helper.h b/target/nios2/helper.h
index 6d8eec1814..21ef7f0791 100644
--- a/target/nios2/helper.h
+++ b/target/nios2/helper.h
@@ -21,6 +21,8 @@
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
 
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_3(mmu_write, void, env, i32, i32)
+DEF_HELPER_2(mmu_write_tlbacc, void, env, i32)
+DEF_HELPER_2(mmu_write_tlbmisc, void, env, i32)
+DEF_HELPER_2(mmu_write_pteaddr, void, env, i32)
 DEF_HELPER_1(check_interrupts, void, env)
 #endif
diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c
index 437fad09b7..4daab2a7ab 100644
--- a/target/nios2/mmu.c
+++ b/target/nios2/mmu.c
@@ -23,6 +23,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "mmu.h"
+#include "exec/helper-proto.h"
 #include "trace/trace-target_nios2.h"
 
 
@@ -80,106 +81,103 @@ static void mmu_flush_pid(CPUNios2State *env, uint32_t 
pid)
 }
 }
 
-void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
+void helper_mmu_write_tlbacc(CPUNios2State *env, uint32_t v)
 {
 CPUState *cs = env_cpu(env);
 Nios2CPU *cpu = env_archcpu(env);
 
-switch (rn) {
-case CR_TLBACC:
-trace_nios2_mmu_write_tlbacc(v >> CR_TLBACC_IGN_SHIFT,
- (v & CR_TLBACC_C) ? 'C' : '.',
- (v & CR_TLBACC_R) ? 'R' : '.',
- (v & CR_TLBACC_W) ? 'W' : '.',
- (v & CR_TLBACC_X) ? 'X' : '.',
- (v & CR_TLBACC_G) ? 'G' : '.',
- v & CR_TLBACC_PFN_MASK);
+trace_nios2_mmu_write_tlbacc(v >> CR_TLBACC_IGN_SHIFT,
+ (v & CR_TLBACC_C) ? 'C' : '.',
+ (v & CR_TLBACC_R) ? 'R' : '.',
+ (v & CR_TLBACC_W) ? 'W' : '.',
+ (v & CR_TLBACC_X) ? 'X' : '.',
+ (v & CR_TLBACC_G) ? 'G' : '.',
+ v & CR_TLBACC_PFN_MASK);
 
-/* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */
-if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) {
-int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT);
-int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
-int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
-int g = (v & CR_TLBACC_G) ? 1 : 0;
-int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC) ? 1 : 0;
-Nios2TLBEntry *entry =
->mmu.tlb[(way * cpu->tlb_num_ways) +
-  (vpn & env->mmu.tlb_entry_mask)];
-uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid;
-uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W |
-CR_TLBACC_X | CR_TLBACC_PFN_MASK);
+/* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */
+if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) {
+int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT);
+int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
+int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
+int g = (v & CR_TLBACC_G) ? 1 : 0;
+int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC) ? 1 : 0;
+Nios2TLBEntry *entry =
+>mmu.tlb[(way * cpu->tlb_num_ways) +
+  (vpn & env->mmu.tlb_entry_mask)];
+uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid;
+uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W |
+CR_TLBACC_X | CR_TLBACC_PFN_MASK);
 
-if ((entry->tag != newTag) || (entry->data != newData)) {
-if (entry->tag & (1 << 10)) {
-/* Flush existing entry */
-tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK);
-}
-entry->tag = newTag;
-entry->data = newData;
+if ((entry->tag != newTag) || (entry->data != newData)) {
+if (entry->tag & (1 << 10)) {
+/* Flush existing entry */
+tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK);
 }
-/* Auto-increment tlbmisc.WAY */
-env->regs[CR_TLBMISC] =
-(env->regs[CR_TLBMISC] & ~CR_TLBMISC_WAY_MASK) |
-(((way + 1) & (cpu->tlb_num_ways - 1))

[PULL 3/7] target/nios2: Only build mmu.c for system mode

2022-03-03 Thread Richard Henderson

We can thus remove an ifdef covering the entire file.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/nios2/mmu.c   | 3 ---
 target/nios2/meson.build | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c
index 306370f675..437fad09b7 100644
--- a/target/nios2/mmu.c
+++ b/target/nios2/mmu.c
@@ -25,7 +25,6 @@
 #include "mmu.h"
 #include "trace/trace-target_nios2.h"
 
-#if !defined(CONFIG_USER_ONLY)
 
 /* rw - 0 = read, 1 = write, 2 = fetch.  */
 unsigned int mmu_translate(CPUNios2State *env,
@@ -217,5 +216,3 @@ void dump_mmu(CPUNios2State *env)
 (entry->data & CR_TLBACC_X) ? 'X' : '-');
 }
 }
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/nios2/meson.build b/target/nios2/meson.build
index e643917db1..62b384702d 100644
--- a/target/nios2/meson.build
+++ b/target/nios2/meson.build
@@ -2,14 +2,13 @@ nios2_ss = ss.source_set()
 nios2_ss.add(files(
   'cpu.c',
   'helper.c',
-  'mmu.c',
   'nios2-semi.c',
   'op_helper.c',
   'translate.c',
 ))
 
 nios2_softmmu_ss = ss.source_set()
-nios2_softmmu_ss.add(files('monitor.c'))
+nios2_softmmu_ss.add(files('monitor.c', 'mmu.c'))
 
 target_arch += {'nios2': nios2_ss}
 target_softmmu_arch += {'nios2': nios2_softmmu_ss}
-- 
2.25.1

[PULL 6/7] target/nios2: Special case ipending in rdctl and wrctl

2022-03-03 Thread Richard Henderson

It was never correct to be able to write to ipending.
Until the rest of the irq code is tidied, the read of
ipending will generate an "unnecessary" mask.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/nios2/translate.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 52965ba17e..a5f8d20729 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -452,6 +452,17 @@ static void rdctl(DisasContext *dc, uint32_t code, 
uint32_t flags)
 }
 
 switch (instr.imm5 + CR_BASE) {
+case CR_IPENDING:
+/*
+ * The value of the ipending register is synthetic.
+ * In hw, this is the AND of a set of hardware irq lines
+ * with the ienable register.  In qemu, we re-use the space
+ * of CR_IPENDING to store the set of irq lines, and so we
+ * must perform the AND here, and anywhere else we need the
+ * guest value of ipending.
+ */
+tcg_gen_and_tl(cpu_R[instr.c], cpu_R[CR_IPENDING], cpu_R[CR_IENABLE]);
+break;
 default:
 tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
 break;
@@ -477,6 +488,9 @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t 
flags)
 case CR_TLBMISC:
 gen_helper_mmu_write_tlbmisc(cpu_env, v);
 break;
+case CR_IPENDING:
+/* ipending is read only, writes ignored. */
+break;
 default:
 tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], v);
 break;
-- 
2.25.1

[PULL 2/7] target/nios2: Replace MMU_LOG with tracepoints

2022-03-03 Thread Richard Henderson

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 meson.build   |  1 +
 target/nios2/mmu.c| 96 ---
 target/nios2/trace-events | 10 
 3 files changed, 39 insertions(+), 68 deletions(-)
 create mode 100644 target/nios2/trace-events

diff --git a/meson.build b/meson.build
index a5b63e62cd..038502714a 100644
--- a/meson.build
+++ b/meson.build
@@ -2705,6 +2705,7 @@ if have_system or have_user
 'target/i386',
 'target/i386/kvm',
 'target/mips/tcg',
+'target/nios2',
 'target/ppc',
 'target/riscv',
 'target/s390x',
diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c
index 5616c39d54..306370f675 100644
--- a/target/nios2/mmu.c
+++ b/target/nios2/mmu.c
@@ -23,18 +23,10 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "mmu.h"
+#include "trace/trace-target_nios2.h"
 
 #if !defined(CONFIG_USER_ONLY)
 
-/* Define this to enable MMU debug messages */
-/* #define DEBUG_MMU */
-
-#ifdef DEBUG_MMU
-#define MMU_LOG(x) x
-#else
-#define MMU_LOG(x)
-#endif
-
 /* rw - 0 = read, 1 = write, 2 = fetch.  */
 unsigned int mmu_translate(CPUNios2State *env,
Nios2MMULookup *lu,
@@ -43,37 +35,26 @@ unsigned int mmu_translate(CPUNios2State *env,
 Nios2CPU *cpu = env_archcpu(env);
 int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
 int vpn = vaddr >> 12;
+int way, n_ways = cpu->tlb_num_ways;
 
-MMU_LOG(qemu_log("mmu_translate vaddr %08X, pid %08X, vpn %08X\n",
- vaddr, pid, vpn));
-
-int way;
-for (way = 0; way < cpu->tlb_num_ways; way++) {
-
-Nios2TLBEntry *entry =
->mmu.tlb[(way * cpu->tlb_num_ways) +
-  (vpn & env->mmu.tlb_entry_mask)];
-
-MMU_LOG(qemu_log("TLB[%d] TAG %08X, VPN %08X\n",
- (way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask),
- entry->tag, (entry->tag >> 12)));
+for (way = 0; way < n_ways; way++) {
+uint32_t index = (way * n_ways) + (vpn & env->mmu.tlb_entry_mask);
+Nios2TLBEntry *entry = >mmu.tlb[index];
 
 if (((entry->tag >> 12) != vpn) ||
 (((entry->tag & (1 << 11)) == 0) &&
 ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) != pid))) {
+trace_nios2_mmu_translate_miss(vaddr, pid, index, entry->tag);
 continue;
 }
+
 lu->vaddr = vaddr & TARGET_PAGE_MASK;
 lu->paddr = (entry->data & CR_TLBACC_PFN_MASK) << TARGET_PAGE_BITS;
 lu->prot = ((entry->data & CR_TLBACC_R) ? PAGE_READ : 0) |
((entry->data & CR_TLBACC_W) ? PAGE_WRITE : 0) |
((entry->data & CR_TLBACC_X) ? PAGE_EXEC : 0);
 
-MMU_LOG(qemu_log("HIT TLB[%d] %08X %08X %08X\n",
- (way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask),
- lu->vaddr, lu->paddr, lu->prot));
+trace_nios2_mmu_translate_hit(vaddr, pid, index, lu->paddr, lu->prot);
 return 1;
 }
 return 0;
@@ -84,21 +65,18 @@ static void mmu_flush_pid(CPUNios2State *env, uint32_t pid)
 CPUState *cs = env_cpu(env);
 Nios2CPU *cpu = env_archcpu(env);
 int idx;
-MMU_LOG(qemu_log("TLB Flush PID %d\n", pid));
 
 for (idx = 0; idx < cpu->tlb_num_entries; idx++) {
 Nios2TLBEntry *entry = >mmu.tlb[idx];
 
-MMU_LOG(qemu_log("TLB[%d] => %08X %08X\n",
- idx, entry->tag, entry->data));
-
 if ((entry->tag & (1 << 10)) && (!(entry->tag & (1 << 11))) &&
 ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) == pid)) {
 uint32_t vaddr = entry->tag & TARGET_PAGE_MASK;
 
-MMU_LOG(qemu_log("TLB Flush Page %08X\n", vaddr));
-
+trace_nios2_mmu_flush_pid_hit(pid, idx, vaddr);
 tlb_flush_page(cs, vaddr);
+} else {
+trace_nios2_mmu_flush_pid_miss(pid, idx, entry->tag);
 }
 }
 }
@@ -108,18 +86,15 @@ void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
 CPUState *cs = env_cpu(env);
 Nios2CPU *cpu = env_archcpu(env);
 
-MMU_LOG(qemu_log("mmu_write %08X = %08X\n", rn, v));
-
 switch (rn) {
 case CR_TLBACC:
-MMU_LOG(qemu_log("TLBACC: IG %02X, FLAGS %c%c%c%c%c, PFN %05X\n",
- v >> CR_TLBACC_IGN_SHIFT,
- (v & CR_TLBACC_C) ? 'C' : '.',
- (v & CR_TLBACC_R) ? 'R' : '.',
- (v & CR_TLBACC_W) ? 'W' : '.',
- (v & CR_TLBACC_X) ? 'X' : '.',
- (v & CR_TLBACC_G) ? 'G' : '.',
- v & CR_TLBACC_PFN_MASK));
+trace_nios2_mmu_write_tlbacc(v >> CR_TLBACC_IGN_SHIFT,
+ (v & CR_TLBACC_C) ? 'C' : '.',
+ (v &

[PULL 7/7] target/nios2: Rewrite interrupt handling

2022-03-03 Thread Richard Henderson

Previously, we would avoid setting CPU_INTERRUPT_HARD when interrupts
are disabled at a particular point in time, instead queuing the value
into cpu->irq_pending.  This is more complicated than required.

Instead, set CPU_INTERRUPT_HARD any time there is a pending interrupt,
and exclusively check for interrupts disabled in nios2_cpu_exec_interrupt.

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/nios2/cpu.h   |  1 -
 target/nios2/helper.h|  1 -
 target/nios2/cpu.c   | 10 --
 target/nios2/op_helper.c | 19 ---
 target/nios2/translate.c | 14 +-
 5 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index d2ba0c5bbd..a00e4229ce 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -160,7 +160,6 @@ struct CPUNios2State {
 
 #if !defined(CONFIG_USER_ONLY)
 Nios2MMU mmu;
-uint32_t irq_pending;
 #endif
 int error_code;
 };
diff --git a/target/nios2/helper.h b/target/nios2/helper.h
index 21ef7f0791..a44ecfdf7a 100644
--- a/target/nios2/helper.h
+++ b/target/nios2/helper.h
@@ -24,5 +24,4 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, 
env, i32)
 DEF_HELPER_2(mmu_write_tlbacc, void, env, i32)
 DEF_HELPER_2(mmu_write_tlbmisc, void, env, i32)
 DEF_HELPER_2(mmu_write_pteaddr, void, env, i32)
-DEF_HELPER_1(check_interrupts, void, env)
 #endif
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 4cade61e93..6975ae4bdb 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -73,12 +73,9 @@ static void nios2_cpu_set_irq(void *opaque, int irq, int 
level)
 
 env->regs[CR_IPENDING] = deposit32(env->regs[CR_IPENDING], irq, 1, 
!!level);
 
-env->irq_pending = env->regs[CR_IPENDING] & env->regs[CR_IENABLE];
-
-if (env->irq_pending && (env->regs[CR_STATUS] & CR_STATUS_PIE)) {
-env->irq_pending = 0;
+if (env->regs[CR_IPENDING]) {
 cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-} else if (!env->irq_pending) {
+} else {
 cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
 }
 }
@@ -134,7 +131,8 @@ static bool nios2_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 CPUNios2State *env = >env;
 
 if ((interrupt_request & CPU_INTERRUPT_HARD) &&
-(env->regs[CR_STATUS] & CR_STATUS_PIE)) {
+(env->regs[CR_STATUS] & CR_STATUS_PIE) &&
+(env->regs[CR_IPENDING] & env->regs[CR_IENABLE])) {
 cs->exception_index = EXCP_IRQ;
 nios2_cpu_do_interrupt(cs);
 return true;
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index d729379e4d..caa885f7b4 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -21,28 +21,9 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 #include "qemu/main-loop.h"
 
-#if !defined(CONFIG_USER_ONLY)
-static void nios2_check_interrupts(CPUNios2State *env)
-{
-if (env->irq_pending &&
-(env->regs[CR_STATUS] & CR_STATUS_PIE)) {
-env->irq_pending = 0;
-cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HARD);
-}
-}
-
-void helper_check_interrupts(CPUNios2State *env)
-{
-qemu_mutex_lock_iothread();
-nios2_check_interrupts(env);
-qemu_mutex_unlock_iothread();
-}
-#endif /* !CONFIG_USER_ONLY */
-
 void helper_raise_exception(CPUNios2State *env, uint32_t index)
 {
 CPUState *cs = env_cpu(env);
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index a5f8d20729..f89271dbed 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -491,19 +491,15 @@ static void wrctl(DisasContext *dc, uint32_t code, 
uint32_t flags)
 case CR_IPENDING:
 /* ipending is read only, writes ignored. */
 break;
+case CR_STATUS:
+case CR_IENABLE:
+/* If interrupts were enabled using WRCTL, trigger them. */
+dc->base.is_jmp = DISAS_UPDATE;
+/* fall through */
 default:
 tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], v);
 break;
 }
-
-/* If interrupts were enabled using WRCTL, trigger them. */
-if ((instr.imm5 + CR_BASE) == CR_STATUS) {
-if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-gen_io_start();
-}
-gen_helper_check_interrupts(cpu_env);
-dc->base.is_jmp = DISAS_UPDATE;
-}
 #endif
 }
 
-- 
2.25.1

[PULL 0/7] target/nios2: Rewrite interrupt handling

2022-03-03 Thread Richard Henderson

The following changes since commit 36eae3a732a1f2aa81391e871ac0e9bb3233e7d7:

  Merge remote-tracking branch 
'remotes/dgilbert-gitlab/tags/pull-migration-20220302b' into staging 
(2022-03-02 20:55:48 +)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-nios-20220303

for you to fetch changes up to b72c9d5951f1dfa047f545408dd9e35597e6b9d3:

  target/nios2: Rewrite interrupt handling (2022-03-03 09:51:59 -1000)


Rewrite nios2 interrupt handling


Richard Henderson (7):
  target/nios2: Remove mmu_read_debug
  target/nios2: Replace MMU_LOG with tracepoints
  target/nios2: Only build mmu.c for system mode
  target/nios2: Hoist R_ZERO check in rdctl
  target/nios2: Split mmu_write
  target/nios2: Special case ipending in rdctl and wrctl
  target/nios2: Rewrite interrupt handling

 meson.build   |   1 +
 target/nios2/cpu.h|   1 -
 target/nios2/helper.h |   6 +-
 target/nios2/mmu.h|   1 -
 target/nios2/cpu.c|  10 +-
 target/nios2/mmu.c| 265 +-
 target/nios2/op_helper.c  |  29 -
 target/nios2/translate.c  |  73 ++---
 target/nios2/meson.build  |   3 +-
 target/nios2/trace-events |  10 ++
 10 files changed, 152 insertions(+), 247 deletions(-)
 create mode 100644 target/nios2/trace-events

[PULL 1/7] target/nios2: Remove mmu_read_debug

2022-03-03 Thread Richard Henderson

This functionality can be had via plugins, if desired.
In the meantime, it is unused code.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/nios2/helper.h|  1 -
 target/nios2/mmu.h   |  1 -
 target/nios2/mmu.c   | 20 
 target/nios2/op_helper.c |  5 -
 target/nios2/translate.c | 17 -
 5 files changed, 44 deletions(-)

diff --git a/target/nios2/helper.h b/target/nios2/helper.h
index 6c8f0b5b35..6d8eec1814 100644
--- a/target/nios2/helper.h
+++ b/target/nios2/helper.h
@@ -21,7 +21,6 @@
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
 
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_2(mmu_read_debug, void, env, i32)
 DEF_HELPER_3(mmu_write, void, env, i32, i32)
 DEF_HELPER_1(check_interrupts, void, env)
 #endif
diff --git a/target/nios2/mmu.h b/target/nios2/mmu.h
index 4f46fbb82e..b7785b46c0 100644
--- a/target/nios2/mmu.h
+++ b/target/nios2/mmu.h
@@ -44,7 +44,6 @@ void mmu_flip_um(CPUNios2State *env, unsigned int um);
 unsigned int mmu_translate(CPUNios2State *env,
Nios2MMULookup *lu,
target_ulong vaddr, int rw, int mmu_idx);
-void mmu_read_debug(CPUNios2State *env, uint32_t rn);
 void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v);
 void mmu_init(CPUNios2State *env);
 
diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c
index 2545c06761..5616c39d54 100644
--- a/target/nios2/mmu.c
+++ b/target/nios2/mmu.c
@@ -35,26 +35,6 @@
 #define MMU_LOG(x)
 #endif
 
-void mmu_read_debug(CPUNios2State *env, uint32_t rn)
-{
-switch (rn) {
-case CR_TLBACC:
-MMU_LOG(qemu_log("TLBACC READ %08X\n", env->regs[rn]));
-break;
-
-case CR_TLBMISC:
-MMU_LOG(qemu_log("TLBMISC READ %08X\n", env->regs[rn]));
-break;
-
-case CR_PTEADDR:
-MMU_LOG(qemu_log("PTEADDR READ %08X\n", env->regs[rn]));
-break;
-
-default:
-break;
-}
-}
-
 /* rw - 0 = read, 1 = write, 2 = fetch.  */
 unsigned int mmu_translate(CPUNios2State *env,
Nios2MMULookup *lu,
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index a59003855a..61fc4dc903 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -26,11 +26,6 @@
 #include "qemu/main-loop.h"
 
 #if !defined(CONFIG_USER_ONLY)
-void helper_mmu_read_debug(CPUNios2State *env, uint32_t rn)
-{
-mmu_read_debug(env, rn);
-}
-
 void helper_mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
 {
 mmu_write(env, rn, v);
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index f9abc2fdd2..194c8ebafd 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -448,23 +448,6 @@ static void rdctl(DisasContext *dc, uint32_t code, 
uint32_t flags)
 gen_check_supervisor(dc);
 
 switch (instr.imm5 + CR_BASE) {
-case CR_PTEADDR:
-case CR_TLBACC:
-case CR_TLBMISC:
-{
-#if !defined(CONFIG_USER_ONLY)
-if (likely(instr.c != R_ZERO)) {
-tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
-#ifdef DEBUG_MMU
-TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
-gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp);
-tcg_temp_free_i32(tmp);
-#endif
-}
-#endif
-break;
-}
-
 default:
 if (likely(instr.c != R_ZERO)) {
 tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
-- 
2.25.1

1 2 3 4 >

1 - 100 of 382 matches

Mail list logo