Re: [RFC PATCH 03/21] i386/kvm: handle Xen HVM cpuid leaves
On 6/12/22 01:18, David Woodhouse wrote: On Mon, 2022-12-05 at 22:58 +0100, Philippe Mathieu-Daudé wrote: diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 22b681ca37..45aa9e40a5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7069,6 +7069,8 @@ static Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), +DEFINE_PROP_BOOL("xen", X86CPU, xen, false), Maybe name it 'xen-hvm'? I think I'd prefer it to go away completely. If the *machine* has the Xen feature enabled (which I've made implicit in the 'xen-version' property), perhaps we should *always* disable 'expose_kvm' and enable the Xen CPUID leaves instead? It would be silly to run a non-Xen guest on the Xen machine, so it is not a bad idea :) +DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), What happens if we use -cpu host,-kvm,+xen,-xen-vapic ? That's sane; it does the Xen CPUID thing but doesn't advertise the vAPIC feature in the Xen CPUID leaves. In which case we don't want to use the vAPIC? Thanks, Phil.
Re: [PATCH 22/22] tcg/riscv: Implement direct branch for goto_tb
On 6/12/22 05:17, Richard Henderson wrote: Now that tcg can handle direct and indirect goto_tb simultaneously, we can optimistically leave space for a direct branch and fall back to loading the pointer from the TB for an indirect branch. Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.h | 5 + tcg/riscv/tcg-target.c.inc | 19 +-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 56f7bc3346..a75c84f2a6 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -159,6 +159,11 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i641 #endif +<<< HEAD +=== +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +>>> 89ab294271 (tcg/riscv: Implement TCG_TARGET_HAS_direct_jump) HEAD is correct :)
Re: [PATCH 21/22] tcg/riscv: Introduce OPC_NOP
On 6/12/22 05:17, Richard Henderson wrote: Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 18/22] tcg/sparc64: Remove USE_REG_TB
On 6/12/22 05:17, Richard Henderson wrote: This is always true for sparc64, so this is dead since 3a5f6805c7ca. Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 57 ++-- 1 file changed, 22 insertions(+), 35 deletions(-) @@ -1897,7 +1884,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, tcg_debug_assert(tb_disp == (int32_t)tb_disp); tcg_debug_assert(br_disp == (int32_t)br_disp); -if (!USE_REG_TB) { +if (0) { qatomic_set((uint32_t *)jmp_rw, deposit32(CALL, 0, 30, br_disp >> 2)); flush_idcache_range(jmp_rx, jmp_rw, 4); Why remove in the next patch and not here? Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 14/22] tcg: Always define tb_target_set_jmp_target
On 6/12/22 05:17, Richard Henderson wrote: Install empty versions for !TCG_TARGET_HAS_direct_jump hosts. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 6 ++ tcg/mips/tcg-target.c.inc | 6 ++ tcg/riscv/tcg-target.c.inc | 6 ++ tcg/tci/tcg-target.c.inc | 6 ++ 4 files changed, 24 insertions(+) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 13/22] tcg: Move tb_target_set_jmp_target declaration to tcg.h
On 6/12/22 05:17, Richard Henderson wrote: Signed-off-by: Richard Henderson --- include/tcg/tcg.h| 3 +++ tcg/aarch64/tcg-target.h | 4 tcg/arm/tcg-target.h | 5 - tcg/i386/tcg-target.h| 3 --- tcg/loongarch64/tcg-target.h | 3 --- tcg/mips/tcg-target.h| 5 - tcg/ppc/tcg-target.h | 4 tcg/riscv/tcg-target.h | 4 tcg/s390x/tcg-target.h | 4 tcg/sparc64/tcg-target.h | 4 tcg/tci/tcg-target.h | 4 11 files changed, 3 insertions(+), 40 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 08/22] tcg: Split out tcg_out_goto_tb
On 6/12/22 05:17, Richard Henderson wrote: The INDEX_op_goto_tb opcode needs no register allocation. Split out a dedicated helper function for it. Signed-off-by: Richard Henderson --- tcg/tcg.c| 4 ++ tcg/aarch64/tcg-target.c.inc | 40 +- tcg/arm/tcg-target.c.inc | 49 +++--- tcg/i386/tcg-target.c.inc| 33 +++ tcg/loongarch64/tcg-target.c.inc | 38 + tcg/mips/tcg-target.c.inc| 21 ++ tcg/ppc/tcg-target.c.inc | 52 tcg/riscv/tcg-target.c.inc | 20 + tcg/s390x/tcg-target.c.inc | 70 tcg/sparc64/tcg-target.c.inc | 68 --- tcg/tci/tcg-target.c.inc | 16 11 files changed, 219 insertions(+), 192 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 09/22] tcg: Rename TB_JMP_RESET_OFFSET_INVALID to TB_JMP_OFFSET_INVALID
On 6/12/22 05:17, Richard Henderson wrote: This will shortly be used for more than reset. Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 2 +- accel/tcg/translate-all.c | 8 tcg/tcg.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 7/8] tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
On 6/12/22 05:40, Richard Henderson wrote: Take the w^x split into account when computing the pc-relative distance to an absolute pointer. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2044897e36..47465b8c20 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -701,7 +701,7 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, intptr_t imm12 = sextreg(offset, 0, 12); if (offset != imm12) { -intptr_t diff = offset - (uintptr_t)s->code_ptr; +intptr_t diff = tcg_pcrel_diff(s, (void *)offset); Nitpicking? const void *. if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { imm12 = sextreg(diff, 0, 12);
Re: [PATCH v2 3/4] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail
On Mon, Dec 5, 2022 at 9:07 PM Eugenio Perez Martin wrote: > > On Mon, Dec 5, 2022 at 5:27 AM Jason Wang wrote: > > > > On Thu, Dec 1, 2022 at 5:29 PM Eugenio Perez Martin > > wrote: > > > > > > On Thu, Dec 1, 2022 at 9:39 AM Jason Wang wrote: > > > > > > > > On Wed, Nov 30, 2022 at 3:07 PM Eugenio Perez Martin > > > > wrote: > > > > > > > > > > On Wed, Nov 30, 2022 at 8:02 AM Jason Wang > > > > > wrote: > > > > > > > > > > > > On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez > > > > > > wrote: > > > > > > > > > > > > > > Since this capability is emulated by qemu shadowed CVQ cannot > > > > > > > forward it > > > > > > > to the device. Process all that command within qemu. > > > > > > > > > > > > > > Signed-off-by: Eugenio Pérez > > > > > > > --- > > > > > > > net/vhost-vdpa.c | 15 --- > > > > > > > 1 file changed, 12 insertions(+), 3 deletions(-) > > > > > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > > > > > index 2b4b85d8f8..8172aa8449 100644 > > > > > > > --- a/net/vhost-vdpa.c > > > > > > > +++ b/net/vhost-vdpa.c > > > > > > > @@ -489,9 +489,18 @@ static int > > > > > > > vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, > > > > > > > out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, > > > > > > > s->cvq_cmd_out_buffer, > > > > > > > vhost_vdpa_net_cvq_cmd_len()); > > > > > > > -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, > > > > > > > sizeof(status)); > > > > > > > -if (unlikely(dev_written < 0)) { > > > > > > > -goto out; > > > > > > > +if (*(uint8_t *)s->cvq_cmd_out_buffer == > > > > > > > VIRTIO_NET_CTRL_ANNOUNCE) { > > > > > > > +/* > > > > > > > + * Guest announce capability is emulated by qemu, so > > > > > > > dont forward to > > > > > > > > > > > > s/dont/don't/ > > > > > > > > > > > > > > > > I'll correct it, thanks! > > > > > > > > > > > > + * the device. > > > > > > > + */ > > > > > > > +dev_written = sizeof(status); > > > > > > > +*s->status = VIRTIO_NET_OK; > > > > > > > > > > > > I wonder if we should avoid negotiating ANNOUNCE with vDPA parents > > > > > > if > > > > > > we do this? > > > > > > > > > > > > > > > > I can re-check, but the next patch should avoid it. > > > > > > > > Kind of, it makes sure guest can always see _F_ANNOUNCE. But does it > > > > prevent _F_ANNOUNCE from being negotiated? > > > > > > > > > > It should go like: > > > * vhost_net_ack_features calls vhost_ack_features with feature_bits = > > > vdpa_feature_bits and features = guest acked features. > > > vhost_ack_features stores in hdev->acked_features only the features > > > that met features & bit_mask, so it will not store _F_ANNOUNCE. > > > * vhost_vdpa_set_features is called from vhost_dev_set_features with > > > features = dev->acked_features. Both functions can add features by > > > themselves (VHOST_F_LOG_ALL, VIRTIO_F_IOMMU_PLATFORM), but no > > > _F_ANNOUNCE. > > > > > > Still untested. > > > > Ok. > > > > > > > > > > Even if > > > > > negotiated, the parent should never set the announce status bit, since > > > > > we never tell the device is a destination device. > > > > > > > > That's the point, do we have such a guarantee? Or I wonder if there's > > > > any parent that supports _F_ANNOUNCE if yes, how it is supposed to > > > > work? > > > > > > > > > > At the moment it is impossible to work since there is no support for > > > config interrupt from the device. Even with config interrupt, > > > something external from qemu should make the device enable the status > > > bit, since the current migration protocol makes no difference between > > > to be a migration destination and to start the device from scratch. > > > Unless it enables the bit maliciously or by mistake. > > > > > > Just for completion, the current method works with no need of vdpa > > > device config interrupt support thanks to being 100% emulated in qemu, > > > which has the support of injecting config interrupts. > > > > Ok, rethink this feature, I think I can find one use case for > > _F_ANNOUNCE, that is, the migration is totally done through the vDPA > > device (DPU) itself. > > > > To make sure we are on the same page, this migration would save some > things like transfer the status through qemu, but it is not possible > at the moment. A few things need to be developed for that to make it > possible. Somehow, it means the DPU is in charge of doing all the migration. > > The default behavior is to emulate the announce feature / status bit > at the moment, so no ack to the device is needed. If we want that > passthrough, a new parameter or similar needs to be developed, so the > feature is negotiated with the device and not emulated in get_config. > > Is that accurate? Yes. Thanks > > Thanks! > > > I think we can go forward and revisit this issue in the future. > > > > Thanks > > > > > > > > Thanks! > > > > > >
Re: [PATCH 07/22] tcg: Introduce get_jmp_target_addr
On 6/12/22 05:17, Richard Henderson wrote: Similar to the existing set_jmp_reset_offset. Include the rw->rx address space coversion done by arm and s390x, and Typo "conversion". forgotten by mips and riscv. Signed-off-by: Richard Henderson --- tcg/tcg.c | 9 + tcg/arm/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc | 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/s390x/tcg-target.c.inc | 2 +- tcg/tci/tcg-target.c.inc | 2 +- 6 files changed, 14 insertions(+), 5 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 05/22] tcg: Replace asserts on tcg_jmp_insn_offset
On 6/12/22 05:16, Richard Henderson wrote: Test TCG_TARGET_HAS_direct_jump instead of testing an implementation pointer. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 +- tcg/arm/tcg-target.c.inc | 2 +- tcg/loongarch64/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc| 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/s390x/tcg-target.c.inc | 2 +- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 01/22] tcg: Split out tcg_out_exit_tb
On 6/12/22 05:16, Richard Henderson wrote: The INDEX_op_exit_tb opcode needs no register allocation. Split out a dedicated helper function for it. Signed-off-by: Richard Henderson --- tcg/tcg.c| 4 tcg/aarch64/tcg-target.c.inc | 22 ++ tcg/arm/tcg-target.c.inc | 11 + tcg/i386/tcg-target.c.inc| 21 + tcg/loongarch64/tcg-target.c.inc | 22 ++ tcg/mips/tcg-target.c.inc| 33 +-- tcg/ppc/tcg-target.c.inc | 11 + tcg/riscv/tcg-target.c.inc | 22 ++ tcg/s390x/tcg-target.c.inc | 23 ++- tcg/sparc64/tcg-target.c.inc | 39 +--- tcg/tci/tcg-target.c.inc | 10 11 files changed, 121 insertions(+), 97 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH 7/8] tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
On 6/12/22 05:40, Richard Henderson wrote: Take the w^x split into account when computing the pc-relative distance to an absolute pointer. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [RFC PATCH for 8.0 00/13] vDPA-net inflight descriptors migration with SVQ
On Tue, Dec 6, 2022 at 1:04 AM Eugenio Pérez wrote: > > The state of the descriptors (avail or used) may not be recoverable just > looking at the guest memory. Out of order used descriptor may override > previous avail ones in the descriptor table or avail vring. > > Currently we're not migrating this status in net devices because virtio-net, > vhost-kernel etc use the descriptors in order, Note that this might not be the truth (when zerocopy is enabled). > so the information always > recoverable from guest's memory. However, vDPA devices may use them out of > order, and other kind of devices like block need this support. > > Shadow virtqueue is able to track these and resend them at the destination. As discussed, there's a bootstrap issue here: When SVQ needs to be enabled on demand, do we still need another way to get inflight ones without the help of SVQ? Thanks > Add them to the virtio-net migration description so they are not lose in the > process. > > This is a very early RFC just to validate the first draft so expect leftovers. > To fetch and request the descriptors from a device without SVQ need to be > implemented on top. Some other notable pending items are: > * Do not send the descriptors actually recoverable from the guest memory. > * Properly version the migrate data. > * Properly abstract the descriptors access from virtio-net to SVQ. > * Do not use VirtQueueElementOld but migrate directly VirtQueueElement. > * Replace lots of assertions with runtime conditionals. > * Other TODOs in the patch message or code changes. > > Thanks. > > Eugenio Pérez (13): > vhost: add available descriptor list in SVQ > vhost: iterate only available descriptors at SVQ stop > vhost: merge avail list and next avail descriptors detach > vhost: add vhost_svq_save_inflight > virtio: Specify uint32_t as VirtQueueElementOld members type > virtio: refactor qemu_get_virtqueue_element > virtio: refactor qemu_put_virtqueue_element > virtio: expose VirtQueueElementOld > virtio: add vmstate_virtqueue_element_old > virtio-net: Migrate vhost inflight descriptors > virtio-net: save inflight descriptors at vhost shutdown > vhost: expose vhost_svq_add_element > vdpa: Recover inflight descriptors > > hw/virtio/vhost-shadow-virtqueue.h | 9 ++ > include/hw/virtio/virtio-net.h | 2 + > include/hw/virtio/virtio.h | 32 ++ > include/migration/vmstate.h| 22 > hw/net/vhost_net.c | 56 ++ > hw/net/virtio-net.c| 129 +++ > hw/virtio/vhost-shadow-virtqueue.c | 52 +++-- > hw/virtio/vhost-vdpa.c | 11 -- > hw/virtio/virtio.c | 162 ++--- > 9 files changed, 392 insertions(+), 83 deletions(-) > > -- > 2.31.1 > >
Re: [PATCH] target/riscv: Fix mret exception cause when no pmp rule is configured
On Mon, Dec 5, 2022 at 4:54 PM Bin Meng wrote: > > The priv spec v1.12 says: > > If no PMP entry matches an M-mode access, the access succeeds. If > no PMP entry matches an S-mode or U-mode access, but at least one > PMP entry is implemented, the access fails. Failed accesses generate > an instruction, load, or store access-fault exception. > > At present the exception cause is set to 'illegal instruction' but > should have been 'instruction access fault'. > > Fixes: d102f19a2085 ("target/riscv/pmp: Raise exception if no PMP entry is > configured") > Signed-off-by: Bin Meng Reviewed-by: Alistair Francis Alistair > --- > > target/riscv/op_helper.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c > index 09f1f5185d..d7af7f056b 100644 > --- a/target/riscv/op_helper.c > +++ b/target/riscv/op_helper.c > @@ -202,7 +202,7 @@ target_ulong helper_mret(CPURISCVState *env) > > if (riscv_feature(env, RISCV_FEATURE_PMP) && > !pmp_get_num_rules(env) && (prev_priv != PRV_M)) { > -riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > +riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, GETPC()); > } > > target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV); > -- > 2.34.1 > >
Re: [PATCH] intel-iommu: Document iova_tree
On Tue, Dec 6, 2022 at 7:28 AM Peter Xu wrote: > > On Mon, Dec 05, 2022 at 12:23:20PM +0800, Jason Wang wrote: > > On Fri, Dec 2, 2022 at 12:25 AM Peter Xu wrote: > > > > > > It seems not super clear on when iova_tree is used, and why. Add a rich > > > comment above iova_tree to track why we needed the iova_tree, and when we > > > need it. > > > > > > Suggested-by: Jason Wang > > > Signed-off-by: Peter Xu > > > --- > > > include/hw/i386/intel_iommu.h | 30 +- > > > 1 file changed, 29 insertions(+), 1 deletion(-) > > > > > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > > > index 46d973e629..8d130ab2e3 100644 > > > --- a/include/hw/i386/intel_iommu.h > > > +++ b/include/hw/i386/intel_iommu.h > > > @@ -109,7 +109,35 @@ struct VTDAddressSpace { > > > QLIST_ENTRY(VTDAddressSpace) next; > > > /* Superset of notifier flags that this address space has */ > > > IOMMUNotifierFlag notifier_flags; > > > -IOVATree *iova_tree; /* Traces mapped IOVA ranges */ > > > +/* > > > + * @iova_tree traces mapped IOVA ranges. > > > + * > > > + * The tree is not needed if no MAP notifiers is registered with > > > + * current VTD address space, because all UNMAP (including iotlb or > > > + * dev-iotlb) events can be transparently delivered to !MAP iommu > > > + * notifiers. > > > > So this means the UNMAP notifier doesn't need to be as accurate as > > MAP. (Should we document it in the notifier headers)? > > Yes. > > > > > For MAP[a, b] MAP[b, c] we can do a UNMAP[a. c]. > > IIUC a better way to say this is, for MAP[a, b] we can do an UNMAP[a-X, > b+Y] as long as the range covers [a, b]? Right. > > > > > > + * > > > + * The tree OTOH is required for MAP typed iommu notifiers for a few > > > + * reasons. > > > + * > > > + * Firstly, there's no way to identify whether an PSI event is MAP or > > > + * UNMAP within the PSI message itself. Without having prior > > > knowledge > > > + * of existing state vIOMMU doesn't know whether it should notify MAP > > > + * or UNMAP for a PSI message it received. > > > + * > > > + * Secondly, PSI received from guest driver (or even a large PSI can > > > + * grow into a DSI at least with Linux intel-iommu driver) can be > > > + * larger in range than the newly mapped ranges for either MAP or > > > UNMAP > > > + * events. > > > > Yes, so I think we need a document that the UNMAP handler should be > > prepared for this. > > How about I squash below into this same patch? Looks good to me. Thanks > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 91f8a2395a..c83bd11a68 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -129,6 +129,24 @@ struct IOMMUTLBEntry { > /* > * Bitmap for different IOMMUNotifier capabilities. Each notifier can > * register with one or multiple IOMMU Notifier capability bit(s). > + * > + * Normally there're two use cases for the notifiers: > + * > + * (1) When the device needs accurate synchronizations of the vIOMMU page > + * tables, it needs to register with both MAP|UNMAP notifies (which > + * is defined as IOMMU_NOTIFIER_IOTLB_EVENTS below). As long as MAP > + * events are registered, the notifications will be accurate but > + * there's overhead on synchronizing the guest vIOMMU page tables. > + * > + * (2) When the device doesn't need accurate synchronizations of the > + * vIOMMU page tables (when the device can both cache translations > + * and requesting to translate dynamically during DMA process), it > + * needs to register only with UNMAP or DEVIOTLB_UNMAP notifies. > + * Note that in such working mode shadow page table is not used for > + * vIOMMU unit on this address space, so the UNMAP messages can be > + * actually larger than the real invalidations (just like how the > + * Linux IOMMU driver normally works, where an invalidation can be > + * enlarged as long as it still covers the target range). > */ > typedef enum { > IOMMU_NOTIFIER_NONE = 0, > > Thanks, > > -- > Peter Xu >
Re: [PATCH v3 2/3] hw/riscv: sifive_e: Support the watchdog timer of HiFive 1 rev b.
On Wed, Nov 30, 2022 at 11:56 AM Tommy Wu wrote: > > Create the AON device when we realize the sifive_e machine. > This patch only implemented the functionality of the watchdog timer, > not all the functionality of the AON device. > > Signed-off-by: Tommy Wu Reviewed-by: Alistair Francis Alistair > --- > hw/riscv/Kconfig| 1 + > hw/riscv/sifive_e.c | 13 +++-- > include/hw/riscv/sifive_e.h | 8 +--- > 3 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig > index 79ff61c464..50890b1b75 100644 > --- a/hw/riscv/Kconfig > +++ b/hw/riscv/Kconfig > @@ -59,6 +59,7 @@ config SIFIVE_E > select SIFIVE_PLIC > select SIFIVE_UART > select SIFIVE_E_PRCI > +select SIFIVE_E_AON > select UNIMP > > config SIFIVE_U > diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c > index d65d2fd869..c866ffe232 100644 > --- a/hw/riscv/sifive_e.c > +++ b/hw/riscv/sifive_e.c > @@ -45,6 +45,7 @@ > #include "hw/intc/riscv_aclint.h" > #include "hw/intc/sifive_plic.h" > #include "hw/misc/sifive_e_prci.h" > +#include "hw/misc/sifive_e_aon.h" > #include "chardev/char.h" > #include "sysemu/sysemu.h" > > @@ -222,8 +223,13 @@ static void sifive_e_soc_realize(DeviceState *dev, Error > **errp) > RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, > RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, > RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false); > -create_unimplemented_device("riscv.sifive.e.aon", > -memmap[SIFIVE_E_DEV_AON].base, memmap[SIFIVE_E_DEV_AON].size); > + > +s->aon = qdev_new(TYPE_SIFIVE_E_AON); > +if (!sysbus_realize(SYS_BUS_DEVICE(s->aon), errp)) { > +return; > +} > +sysbus_mmio_map(SYS_BUS_DEVICE(s->aon), 0, > memmap[SIFIVE_E_DEV_AON].base); > + > sifive_e_prci_create(memmap[SIFIVE_E_DEV_PRCI].base); > > /* GPIO */ > @@ -244,6 +250,9 @@ static void sifive_e_soc_realize(DeviceState *dev, Error > **errp) > qdev_get_gpio_in(DEVICE(s->plic), > SIFIVE_E_GPIO0_IRQ0 + i)); > } > +sysbus_connect_irq(SYS_BUS_DEVICE(s->aon), 0, > + qdev_get_gpio_in(DEVICE(s->plic), > +SIFIVE_E_AON_WDT_IRQ)); > > sifive_uart_create(sys_mem, memmap[SIFIVE_E_DEV_UART0].base, > serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_E_UART0_IRQ)); > diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h > index d738745925..e2de1564a7 100644 > --- a/include/hw/riscv/sifive_e.h > +++ b/include/hw/riscv/sifive_e.h > @@ -35,6 +35,7 @@ typedef struct SiFiveESoCState { > /*< public >*/ > RISCVHartArrayState cpus; > DeviceState *plic; > +DeviceState *aon; > SIFIVEGPIOState gpio; > MemoryRegion xip_mem; > MemoryRegion mask_rom; > @@ -76,9 +77,10 @@ enum { > }; > > enum { > -SIFIVE_E_UART0_IRQ = 3, > -SIFIVE_E_UART1_IRQ = 4, > -SIFIVE_E_GPIO0_IRQ0 = 8 > +SIFIVE_E_AON_WDT_IRQ = 1, > +SIFIVE_E_UART0_IRQ= 3, > +SIFIVE_E_UART1_IRQ= 4, > +SIFIVE_E_GPIO0_IRQ0 = 8 > }; > > #define SIFIVE_E_PLIC_HART_CONFIG "M" > -- > 2.27.0 > >
Re: [PATCH v3 0/3] Add (more) missing PolarFire SoC io regions
On Fri, Nov 18, 2022 at 8:57 AM Conor Dooley wrote: > > From: Conor Dooley > > Hey all, > Apart from DDR (see [1]), these should be the last bits needed to get > recent Linux kernels booting again for Icicle/PolarFire SoC. Previously, > I had been disabling the hwrng and PCI but I keep forgetting that is > required and decided to fix that. > > I'm not entirely sure if I have done some sort of no-no thing by > registering the same interrupt with both the IOSCB and SYSREG regions. > The interrupt is raised after the system controller handles a service > via the mailbox. The mailbox's status, control and mailbox registers > are all part of the IOSCB region. It's cleared by a write to a register > in the SYSREG region. > Since my goal here is to add the regions/peripherals without actually > implementing them so that Linux etc, I'm just raising an interrupt > once a guest requests a service & reporting a status indicating that the > service request failed. > > Thanks, > Conor. > > 1 - https://lore.kernel.org/all/Y2+dUCpd8OP52%2FDJ@spud/ > > Changes since v2: > - fix the actual bits in the register used for the service return > status > - remove a duplicate irq_lower() in the sysreg bits of patch 3 > - move the irq raise to a write function, raising it in the read one was > causing the irq to get raised twice by the linux driver that works > properly with the actual hardware. oops. > > Conor Dooley (3): > hw/misc: pfsoc: add fabric clocks to ioscb > hw/riscv: pfsoc: add missing FICs as unimplemented > hw/{misc,riscv}: pfsoc: add system controller as unimplemented Thanks! Applied to riscv-to-apply.next Alistair > > hw/misc/mchp_pfsoc_ioscb.c | 78 +- > hw/misc/mchp_pfsoc_sysreg.c | 18 - > hw/riscv/microchip_pfsoc.c | 121 > include/hw/misc/mchp_pfsoc_ioscb.h | 4 + > include/hw/misc/mchp_pfsoc_sysreg.h | 1 + > include/hw/riscv/microchip_pfsoc.h | 3 + > 6 files changed, 167 insertions(+), 58 deletions(-) > > -- > 2.37.2 > >
Re: [PATCH v3] riscv: Allow user to set the satp mode
On Tue, Dec 06, 2022 at 06:57:39AM +0100, Alexandre Ghiti wrote: > > I can't find the sve* properties you're talking about, can you point them > to me? > target/arm/cpu64.c: cpu_arm_get/set_vq() and arm_cpu_sve_finalize() and aarch64_add_sve_properties(). Thanks, drew
Re: [PATCH v3] riscv: Allow user to set the satp mode
Hi Andrew, On Thu, Dec 1, 2022 at 3:47 PM Andrew Jones wrote: > On Thu, Dec 01, 2022 at 10:36:23AM +0100, Alexandre Ghiti wrote: > > RISC-V specifies multiple sizes for addressable memory and Linux probes > for > > the machine's support at startup via the satp CSR register (done in > > csr.c:validate_vm). > > > > As per the specification, sv64 must support sv57, which in turn must > > support sv48...etc. So we can restrict machine support by simply setting > the > > "highest" supported mode and the bare mode is always supported. > > > > You can set the satp mode using the new properties "mbare", "sv32", > > "sv39", "sv48", "sv57" and "sv64" as follows: > > -cpu rv64,sv57=on # Linux will boot using sv57 scheme > > -cpu rv64,sv39=on # Linux will boot using sv39 scheme > > > > We take the highest level set by the user: > > -cpu rv64,sv48=on,sv57=on # Linux will boot using sv57 scheme > > > > We make sure that invalid configurations are rejected: > > -cpu rv64,sv32=on # Can't enable 32-bit satp mode in 64-bit > > -cpu rv64,sv39=off,sv48=on # sv39 must be supported if higher modes are > > # enabled > > > > We accept "redundant" configurations: > > -cpu rv64,sv48=on,sv57=off # sv39 must be supported if higher modes are > > > > In addition, we now correctly set the device-tree entry 'mmu-type' using > > those new properties. > > > > Co-Developed-by: Ludovic Henry > > Signed-off-by: Ludovic Henry > > Signed-off-by: Alexandre Ghiti > > --- > > v3: > > - Free sv_name as pointed by Bin > > - Replace satp-mode with boolean properties as suggested by Andrew > > - Removed RB from Atish as the patch considerably changed > > > > v2: > > - Use error_setg + return as suggested by Alistair > > - Add RB from Atish > > - Fixed checkpatch issues missed in v1 > > - Replaced Ludovic email address with the rivos one > > > > hw/riscv/virt.c | 16 ++-- > > target/riscv/cpu.c | 164 > > target/riscv/cpu.h | 8 ++ > > target/riscv/cpu_bits.h | 1 + > > target/riscv/csr.c | 8 +- > > 5 files changed, 186 insertions(+), 11 deletions(-) > > > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > > index a5bc7353b4..bb7c739a74 100644 > > --- a/hw/riscv/virt.c > > +++ b/hw/riscv/virt.c > > @@ -228,7 +228,7 @@ static void create_fdt_socket_cpus(RISCVVirtState > *s, int socket, > > int cpu; > > uint32_t cpu_phandle; > > MachineState *mc = MACHINE(s); > > -char *name, *cpu_name, *core_name, *intc_name; > > +char *name, *cpu_name, *core_name, *intc_name, *sv_name; > > > > for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { > > cpu_phandle = (*phandle)++; > > @@ -236,14 +236,12 @@ static void create_fdt_socket_cpus(RISCVVirtState > *s, int socket, > > cpu_name = g_strdup_printf("/cpus/cpu@%d", > > s->soc[socket].hartid_base + cpu); > > qemu_fdt_add_subnode(mc->fdt, cpu_name); > > -if (riscv_feature(>soc[socket].harts[cpu].env, > > - RISCV_FEATURE_MMU)) { > > -qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", > > -(is_32_bit) ? "riscv,sv32" : > "riscv,sv48"); > > -} else { > > -qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", > > -"riscv,none"); > > -} > > + > > +sv_name = g_strdup_printf("riscv,%s", > > + > s->soc[socket].harts[cpu].cfg.satp_mode_str); > > +qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", sv_name); > > +g_free(sv_name); > > + > > name = riscv_isa_string(>soc[socket].harts[cpu]); > > qemu_fdt_setprop_string(mc->fdt, cpu_name, "riscv,isa", name); > > g_free(name); > > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c > > index d14e95c9dc..51c06ed057 100644 > > --- a/target/riscv/cpu.c > > +++ b/target/riscv/cpu.c > > @@ -907,6 +907,66 @@ static void riscv_cpu_realize(DeviceState *dev, > Error **errp) > > } > > #endif > > > > +/* > > + * Either a cpu sets its supported satp_mode in XXX_cpu_init > > + * or the user sets this value using satp_mode property. > > using the sv* and mbare properties. > > > + */ > > +bool rv32 = riscv_cpu_mxl(>env) == MXL_RV32; > > + > > +cpu->cfg.satp_mode = VM_1_10_UNDEF; > > Could probably just use -1 here instead of introducing VM_1_10_UNDEF. > > > + > > +if (rv32) { > > +if (cpu->cfg.sv32 == ON_OFF_AUTO_ON) { > > +cpu->cfg.satp_mode_str = g_strdup("sv32"); > > No need to allocate memory, satp_mode_str = "sv32". > > Also I'm not sure we need to keep mode_str in CPUConfig. Providing a > function with a switch on VM_1_10_SV* cases to get it should be enough > for its one usecase. > > > +cpu->cfg.satp_mode = VM_1_10_SV32; > > +} else if (cpu->cfg.mbare == ON_OFF_AUTO_ON) { > > +cpu->cfg.satp_mode_str = g_strdup("none"); > > +
Re: [RFC PATCH 0/1] QEMU: Dirty quota-based throttling of vcpus
On 21/11/22 4:24 am, Shivam Kumar wrote: This patchset is the QEMU-side implementation of a (new) dirty "quota" based throttling algorithm that selectively throttles vCPUs based on their individual contribution to overall memory dirtying and also dynamically adapts the throttle based on the available network bandwidth. Overview -- -- To throttle memory dirtying, we propose to set a limit on the number of pages a vCPU can dirty in given fixed microscopic size time intervals. This limit depends on the network throughput calculated over the last few intervals so as to throttle the vCPUs based on available network bandwidth. We are referring to this limit as the "dirty quota" of a vCPU and the fixed size intervals as the "dirty quota intervals". One possible approach to distributing the overall scope of dirtying for a dirty quota interval is to equally distribute it among all the vCPUs. This approach to the distribution doesn't make sense if the distribution of workloads among vCPUs is skewed. So, to counter such skewed cases, we propose that if any vCPU doesn't need its quota for any given dirty quota interval, we add this quota to a common pool. This common pool (or "common quota") can be consumed on a first come first serve basis by all vCPUs in the upcoming dirty quota intervals. Design -- -- Userspace KVM [At the start of dirty logging] Initialize dirty quota to some non-zero value for each vcpu.-> [When dirty logging starts] Start incrementing dirty count for every dirty by the vcpu. [Dirty count equals/exceeds dirty quota] If the vcpu has already claimed <- Exit to userspace. its quota for the current dirty quota interval: 1) If common quota is available, give the vcpu its quota from common pool. 2) Else sleep the vcpu until the next interval starts. Give the vcpu its share for the current(fresh) dirty quota -> Continue dirtying with the newly interval.received quota. [At the end of dirty logging] Set dirty quota back to zero for every vcpu. -> Throttling disabled. References -- -- KVM Forum Talk: https://www.youtube.com/watch?v=ZBkkJf78zFA Kernel Patchset: https://lore.kernel.org/all/20221113170507.208810-1-shivam.kum...@nutanix.com/ Note -- -- We understand that there is a good scope of improvement in the current implementation. Here is a list of things we are working on: 1) Adding dirty quota as a migration capability so that it can be toggled through QMP command. 2) Adding support for throttling guest DMAs. 3) Not enabling dirty quota for the first migration iteration. 4) Falling back to current auto-converge based throttling in cases where dirty quota throttling can overthrottle. Please stay tuned for the next patchset. Shivam Kumar (1): Dirty quota-based throttling of vcpus accel/kvm/kvm-all.c | 91 +++ include/exec/memory.h | 3 ++ include/hw/core/cpu.h | 5 +++ include/sysemu/kvm_int.h | 1 + linux-headers/linux/kvm.h | 9 migration/migration.c | 22 ++ migration/migration.h | 31 + softmmu/memory.c | 64 +++ 8 files changed, 226 insertions(+) It'd be great if I could get some more feedback before I send v2. Thanks. CC: Peter Xu, Juan Quintela
[PATCH 4/8] tcg/loongarch64: Introduce tcg_out_addi
Adjust the constraints to allow any int32_t for immediate addition. Split immediate adds into addu16i + addi, which covers quite a lot of the immediate space. For the hole in the middle, load the constant into TMP0 instead. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target-con-set.h | 4 +- tcg/loongarch64/tcg-target-con-str.h | 2 +- tcg/loongarch64/tcg-target.c.inc | 57 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 349c672687..7b5a7a3f5d 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -23,9 +23,11 @@ C_O1_I1(r, L) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) +C_O1_I2(r, r, rJ) C_O1_I2(r, r, rU) C_O1_I2(r, r, rW) C_O1_I2(r, r, rZ) C_O1_I2(r, 0, rZ) -C_O1_I2(r, rZ, rN) +C_O1_I2(r, rZ, ri) +C_O1_I2(r, rZ, rJ) C_O1_I2(r, rZ, rZ) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index c3986a4fd4..541ff47fa9 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -21,7 +21,7 @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S12) -CONST('N', TCG_CT_CONST_N12) +CONST('J', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U12) CONST('Z', TCG_CT_CONST_ZERO) CONST('C', TCG_CT_CONST_C12) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b2350f2cc7..a477b1b96d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -126,7 +126,7 @@ static const int tcg_target_call_oarg_regs[] = { #define TCG_CT_CONST_ZERO 0x100 #define TCG_CT_CONST_S12 0x200 -#define TCG_CT_CONST_N12 0x400 +#define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U12 0x800 #define TCG_CT_CONST_C12 0x1000 #define TCG_CT_CONST_WSZ 0x2000 @@ -161,7 +161,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { return true; } -if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { +if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return true; } if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { @@ -378,6 +378,45 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, } } +static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, tcg_target_long imm) +{ +tcg_target_long lo12 = sextreg(imm, 0, 12); +tcg_target_long hi16 = sextreg(imm - lo12, 16, 16); + +/* + * Note that there's a hole in between hi16 and lo12: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * ...+---+---+---+ + *| hi16 | | lo12 | + * ...+---+---+---+ + * + * For bits within that hole, it's more efficient to use LU12I and ADD. + */ +if (imm == (hi16 << 16) + lo12) { +if (hi16) { +tcg_out_opc_addu16i_d(s, rd, rs, hi16); +rs = rd; +} +if (type == TCG_TYPE_I32) { +tcg_out_opc_addi_w(s, rd, rs, lo12); +} else if (lo12) { +tcg_out_opc_addi_d(s, rd, rs, lo12); +} else { +tcg_out_mov(s, type, rd, rs); +} +} else { +tcg_out_movi(s, type, TCG_REG_TMP0, imm); +if (type == TCG_TYPE_I32) { +tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0); +} else { +tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0); +} +} +} + static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_out_opc_andi(s, ret, arg, 0xff); @@ -1349,14 +1388,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_add_i32: if (c2) { -tcg_out_opc_addi_w(s, a0, a1, a2); +tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); } else { tcg_out_opc_add_w(s, a0, a1, a2); } break; case INDEX_op_add_i64: if (c2) { -tcg_out_opc_addi_d(s, a0, a1, a2); +tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); } else { tcg_out_opc_add_d(s, a0, a1, a2); } @@ -1364,14 +1403,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sub_i32: if (c2) { -tcg_out_opc_addi_w(s, a0, a1, -a2); +tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); } else { tcg_out_opc_sub_w(s, a0, a1, a2); } break; case INDEX_op_sub_i64: if (c2) { -tcg_out_opc_addi_d(s, a0, a1, -a2); +
[PATCH 3/8] tcg/loongarch64: Update tcg-insn-defs.c.inc
Regenerate with ADDU16I included. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-insn-defs.c.inc | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc index d162571856..c3c8669b4b 100644 --- a/tcg/loongarch64/tcg-insn-defs.c.inc +++ b/tcg/loongarch64/tcg-insn-defs.c.inc @@ -4,7 +4,7 @@ * * This file is auto-generated by genqemutcgdefs from * https://github.com/loongson-community/loongarch-opcodes, - * from commit 961f0c60f5b63e574d785995600c71ad5413fdc4. + * from commit 6ffbaddacacfcd5bdc893a49a165b8549d385eea. * DO NOT EDIT. */ @@ -74,6 +74,7 @@ typedef enum { OPC_ANDI = 0x0340, OPC_ORI = 0x0380, OPC_XORI = 0x03c0, +OPC_ADDU16I_D = 0x1000, OPC_LU12I_W = 0x1400, OPC_CU32I_D = 0x1600, OPC_PCADDU2I = 0x1800, @@ -710,6 +711,13 @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12) tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12)); } +/* Emits the `addu16i.d d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ +tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16)); +} + /* Emits the `lu12i.w d, sj20` instruction. */ static void __attribute__((unused)) tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20) -- 2.34.1
[PATCH 2/8] tcg/loongarch64: Optimize immediate loading
From: Rui Wang diff: Imm Before After addi.w rd, zero, 0 addi.w rd, zero, 0 lu52i.d rd, zero, 0 f800lu12i.w rd, -1 addi.w rd, zero, -2048 ori rd, rd, 2048lu32i.d rd, 0 lu32i.d rd, 0 ... Signed-off-by: Rui Wang Message-Id: <20221107144713.845550-1-wang...@loongson.cn> Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 35 +++- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c05b19a084..b2350f2cc7 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -274,16 +274,6 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) return true; } -static bool imm_part_needs_loading(bool high_bits_are_ones, - tcg_target_long part) -{ -if (high_bits_are_ones) { -return part != -1; -} else { -return part != 0; -} -} - /* Loads a 32-bit immediate into rd, sign-extended. */ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) { @@ -291,16 +281,16 @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) tcg_target_long hi12 = sextreg(val, 12, 20); /* Single-instruction cases. */ -if (lo == val) { -/* val fits in simm12: addi.w rd, zero, val */ -tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); -return; -} -if (0x800 <= val && val <= 0xfff) { +if (hi12 == 0) { /* val fits in uimm12: ori rd, zero, val */ tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); return; } +if (hi12 == sextreg(lo, 12, 20)) { +/* val fits in simm12: addi.w rd, zero, val */ +tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); +return; +} /* High bits must be set; load with lu12i.w + optional ori. */ tcg_out_opc_lu12i_w(s, rd, hi12); @@ -334,8 +324,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, intptr_t pc_offset; tcg_target_long val_lo, val_hi, pc_hi, offset_hi; -tcg_target_long hi32, hi52; -bool rd_high_bits_are_ones; +tcg_target_long hi12, hi32, hi52; /* Value fits in signed i32. */ if (type == TCG_TYPE_I32 || val == (int32_t)val) { @@ -366,25 +355,25 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, return; } +hi12 = sextreg(val, 12, 20); hi32 = sextreg(val, 32, 20); hi52 = sextreg(val, 52, 12); /* Single cu52i.d case. */ -if (ctz64(val) >= 52) { +if ((hi52 != 0) && (ctz64(val) >= 52)) { tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); return; } /* Slow path. Initialize the low 32 bits, then concat high bits. */ tcg_out_movi_i32(s, rd, val); -rd_high_bits_are_ones = (int32_t)val < 0; -if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { +/* Load hi32 and hi52 explicitly when they are unexpected values. */ +if (hi32 != sextreg(hi12, 20, 20)) { tcg_out_opc_cu32i_d(s, rd, hi32); -rd_high_bits_are_ones = hi32 < 0; } -if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { +if (hi52 != sextreg(hi32, 20, 12)) { tcg_out_opc_cu52i_d(s, rd, rd, hi52); } } -- 2.34.1
[PATCH 0/8] tcg/loongarch64: Reorg goto_tb and cleanups
Based-on: 20221206041715.314209-1-richard.hender...@linaro.org ("[PATCH 00/22] tcg: exit_tb tidy, goto_tb reorg") Includes: * Disassembler from target/loongarch/. * Improvements to movi by Rui Wang, with minor tweaks. * Improvements to setcond. * Implement movcond. * Fix the same goto_tb bug that affected some others. r~ Richard Henderson (7): target/loongarch: Enable the disassembler for host tcg tcg/loongarch64: Update tcg-insn-defs.c.inc tcg/loongarch64: Introduce tcg_out_addi tcg/loongarch64: Improve setcond expansion tcg/loongarch64: Implement movcond tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst tcg/loongarch64: Reorg goto_tb implementation Rui Wang (1): tcg/loongarch64: Optimize immediate loading tcg/loongarch64/tcg-target-con-set.h | 5 +- tcg/loongarch64/tcg-target-con-str.h | 2 +- tcg/loongarch64/tcg-target.h | 11 +- disas.c | 2 + target/loongarch/meson.build | 3 +- tcg/loongarch64/tcg-insn-defs.c.inc | 10 +- tcg/loongarch64/tcg-target.c.inc | 359 +-- 7 files changed, 256 insertions(+), 136 deletions(-) -- 2.34.1
[PATCH 1/8] target/loongarch: Enable the disassembler for host tcg
Reuse the decodetree based disassembler from target/loongarch/ for tcg/loongarch64/. The generation of decode-insns.c.inc into ./libcommon.fa.p/ could eventually result in conflict, if any other host requires the same trick, but this is good enough for now. Signed-off-by: Richard Henderson --- disas.c | 2 ++ target/loongarch/meson.build | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/disas.c b/disas.c index 94d3b45042..758824d749 100644 --- a/disas.c +++ b/disas.c @@ -198,6 +198,8 @@ static void initialize_debug_host(CPUDebug *s) s->info.cap_insn_split = 6; #elif defined(__hppa__) s->info.print_insn = print_insn_hppa; +#elif defined(__loongarch64) +s->info.print_insn = print_insn_loongarch; #endif } diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build index 6376f9e84b..690633969f 100644 --- a/target/loongarch/meson.build +++ b/target/loongarch/meson.build @@ -3,7 +3,6 @@ gen = decodetree.process('insns.decode') loongarch_ss = ss.source_set() loongarch_ss.add(files( 'cpu.c', - 'disas.c', )) loongarch_tcg_ss = ss.source_set() loongarch_tcg_ss.add(gen) @@ -24,6 +23,8 @@ loongarch_softmmu_ss.add(files( 'iocsr_helper.c', )) +common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen]) + loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) target_arch += {'loongarch': loongarch_ss} -- 2.34.1
[PATCH 8/8] tcg/loongarch64: Reorg goto_tb implementation
The old implementation replaces two insns, swapping between b nop and pcaddu18i tmp, jirl zero, tmp, & 0x There is a race condition in which a thread could be stopped at the jirl, i.e. with the top of the address loaded, and when restarted we have re-linked to a different TB, so that the top half no longer matches the bottom half. Note that while we never directly re-link to a different TB, we can link, unlink, and link again all while the stopped thread remains stopped. The new implementation replaces only one insn, swapping between b and nop falling through to a general-case indirect branch. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.h | 7 +--- tcg/loongarch64/tcg-target.c.inc | 67 2 files changed, 26 insertions(+), 48 deletions(-) diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 624fbe87ff..81548fbb09 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -42,11 +42,8 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_NB_REGS 32 -/* - * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits - * signed offset, which is +/- 128 GiB. - */ -#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB) + +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) typedef enum { TCG_REG_ZERO, diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 47465b8c20..f8964699eb 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1150,37 +1150,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) #endif } -/* LoongArch uses `andi zero, zero, 0` as NOP. */ -#define NOP OPC_ANDI -static void tcg_out_nop(TCGContext *s) -{ -tcg_out32(s, NOP); -} - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ -tcg_insn_unit i1, i2; -ptrdiff_t upper, lower; -uintptr_t addr = tb->jmp_target_addr[n]; -ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; - -if (offset == sextreg(offset, 0, 26)) { -i1 = encode_sd10k16_insn(OPC_B, offset); -i2 = NOP; -} else { -tcg_debug_assert(offset == sextreg(offset, 0, 36)); -lower = (int16_t)offset; -upper = (offset - lower) >> 16; - -i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper); -i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower); -} -uint64_t pair = ((uint64_t)i2 << 32) | i1; -qatomic_set((uint64_t *)jmp_rw, pair); -flush_idcache_range(jmp_rx, jmp_rw, 8); -} - /* * Entry-points */ @@ -1200,23 +1169,35 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { -/* - * Ensure that patch area is 8-byte aligned so that an - * atomic write can be used to patch the target address. - */ -if ((uintptr_t)s->code_ptr & 7) { -tcg_out_nop(s); -} +/* Direct branch will be patched by tb_target_set_jmp_target. */ set_jmp_insn_offset(s, which); -/* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ -tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); +tcg_out_opc_b(s, 0); + +/* When branch is out of range, fall through to indirect. */ +tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + get_jmp_target_addr(s, which)); tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +uintptr_t addr = tb->jmp_target_addr[n]; +ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; +tcg_insn_unit insn; + +/* Either directly branch, or fall through to indirect branch. */ +if (offset == sextreg(offset, 0, 26)) { +insn = encode_sd10k16_insn(OPC_B, offset); +} else { +/* LoongArch uses `andi zero, zero, 0` as NOP. */ +insn = OPC_ANDI; +} +qatomic_set((tcg_insn_unit *)jmp_rw, insn); +flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) -- 2.34.1
[PATCH 5/8] tcg/loongarch64: Improve setcond expansion
Split out a helper function, tcg_out_setcond_int, which does not always produce the complete boolean result, but returns a set of flags to do so. Accept all int32_t as constant input, so that LE/GT can adjust the constant to LT. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 165 +-- 1 file changed, 115 insertions(+), 50 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index a477b1b96d..325ae3b5c9 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -469,64 +469,131 @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, -TCGReg arg1, TCGReg arg2, bool c2) -{ -TCGReg tmp; +#define SETCOND_INVTCG_TARGET_NB_REGS +#define SETCOND_NEZ(SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) -if (c2) { -tcg_debug_assert(arg2 == 0); +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ +int flags = 0; + +switch (cond) { +case TCG_COND_EQ:/* -> NE */ +case TCG_COND_GE:/* -> LT */ +case TCG_COND_GEU: /* -> LTU */ +case TCG_COND_GT:/* -> LE */ +case TCG_COND_GTU: /* -> LEU */ +cond = tcg_invert_cond(cond); +flags ^= SETCOND_INV; +break; +default: +break; } switch (cond) { -case TCG_COND_EQ: -if (c2) { -tmp = arg1; -} else { -tcg_out_opc_sub_d(s, ret, arg1, arg2); -tmp = ret; -} -tcg_out_opc_sltui(s, ret, tmp, 1); -break; -case TCG_COND_NE: -if (c2) { -tmp = arg1; -} else { -tcg_out_opc_sub_d(s, ret, arg1, arg2); -tmp = ret; -} -tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); -break; -case TCG_COND_LT: -tcg_out_opc_slt(s, ret, arg1, arg2); -break; -case TCG_COND_GE: -tcg_out_opc_slt(s, ret, arg1, arg2); -tcg_out_opc_xori(s, ret, ret, 1); -break; case TCG_COND_LE: -tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false); -break; -case TCG_COND_GT: -tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false); -break; -case TCG_COND_LTU: -tcg_out_opc_sltu(s, ret, arg1, arg2); -break; -case TCG_COND_GEU: -tcg_out_opc_sltu(s, ret, arg1, arg2); -tcg_out_opc_xori(s, ret, ret, 1); -break; case TCG_COND_LEU: -tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false); +/* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is still constrained to int32_t, and INT32_MAX+1 is representable + * in the 64-bit temporary register. + */ +if (c2) { +if (cond == TCG_COND_LEU) { +/* unsigned <= -1 is true */ +if (arg2 == -1) { +tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); +return ret; +} +cond = TCG_COND_LTU; +} else { +cond = TCG_COND_LT; +} +arg2 += 1; +} else { +TCGReg tmp = arg2; +arg2 = arg1; +arg1 = tmp; +cond = tcg_swap_cond(cond);/* LE -> GE */ +cond = tcg_invert_cond(cond); /* GE -> LT */ +flags ^= SETCOND_INV; +} break; -case TCG_COND_GTU: -tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false); +default: break; +} + +switch (cond) { +case TCG_COND_NE: +flags |= SETCOND_NEZ; +if (!c2) { +tcg_out_opc_xor(s, ret, arg1, arg2); +} else if (arg2 == 0) { +ret = arg1; +} else if (arg2 >= 0 && arg2 <= 0xfff) { +tcg_out_opc_xori(s, ret, arg1, arg2); +} else { +tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2); +} +break; + +case TCG_COND_LT: +case TCG_COND_LTU: +if (c2) { +if (arg2 >= -0x800 && arg2 <= 0x7ff) { +if (cond == TCG_COND_LT) { +tcg_out_opc_slti(s, ret, arg1, arg2); +} else { +tcg_out_opc_sltui(s, ret, arg1, arg2); +} +break; +} +tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); +arg2 = TCG_REG_TMP0; +} +if (cond == TCG_COND_LT) { +tcg_out_opc_slt(s, ret, arg1, arg2); +
[PATCH 7/8] tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
Take the w^x split into account when computing the pc-relative distance to an absolute pointer. Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2044897e36..47465b8c20 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -701,7 +701,7 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, intptr_t imm12 = sextreg(offset, 0, 12); if (offset != imm12) { -intptr_t diff = offset - (uintptr_t)s->code_ptr; +intptr_t diff = tcg_pcrel_diff(s, (void *)offset); if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { imm12 = sextreg(diff, 0, 12); -- 2.34.1
[PATCH 6/8] tcg/loongarch64: Implement movcond
Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target-con-set.h | 1 + tcg/loongarch64/tcg-target.h | 4 ++-- tcg/loongarch64/tcg-target.c.inc | 33 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 7b5a7a3f5d..172c107289 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -31,3 +31,4 @@ C_O1_I2(r, 0, rZ) C_O1_I2(r, rZ, ri) C_O1_I2(r, rZ, rJ) C_O1_I2(r, rZ, rZ) +C_O1_I4(r, rZ, rJ, rZ, rZ) diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 086c90bda3..624fbe87ff 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -96,7 +96,7 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET0 /* optional instructions */ -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 @@ -132,7 +132,7 @@ typedef enum { #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 325ae3b5c9..2044897e36 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -596,6 +596,30 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, +TCGReg c1, tcg_target_long c2, bool const2, +TCGReg v1, TCGReg v2) +{ +int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); +TCGReg t; + +/* Standardize the test below to t != 0. */ +if (tmpflags & SETCOND_INV) { +t = v1, v1 = v2, v2 = t; +} + +t = tmpflags & ~SETCOND_FLAGS; +if (v1 == TCG_REG_ZERO) { +tcg_out_opc_masknez(s, ret, v2, t); +} else if (v2 == TCG_REG_ZERO) { +tcg_out_opc_maskeqz(s, ret, v1, t); +} else { +tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */ +tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */ +tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2); +} +} + /* * Branch helpers */ @@ -1537,6 +1561,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; +case INDEX_op_movcond_i32: +case INDEX_op_movcond_i64: +tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); +break; + case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -1740,6 +1769,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_remu_i64: return C_O1_I2(r, rZ, rZ); +case INDEX_op_movcond_i32: +case INDEX_op_movcond_i64: +return C_O1_I4(r, rZ, rJ, rZ, rZ); + default: g_assert_not_reached(); } -- 2.34.1
[PATCH 17/22] tcg/ppc: Reorg goto_tb implementation
The old ppc64 implementation replaces 2 or 4 insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b and mtctr r31 falling through to a general-case indirect branch. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.h | 3 +- tcg/ppc/tcg-target.c.inc | 158 +++ 2 files changed, 44 insertions(+), 117 deletions(-) diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index f253184915..af81c5a57f 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -27,11 +27,10 @@ #ifdef _ARCH_PPC64 # define TCG_TARGET_REG_BITS 64 -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) #else # define TCG_TARGET_REG_BITS 32 -# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) #endif +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #define TCG_TARGET_NB_REGS 64 #define TCG_TARGET_INSN_UNIT_SIZE 4 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 592b8d6498..755b954447 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1847,104 +1847,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, insn); } -static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2) -{ -if (HOST_BIG_ENDIAN) { -return (uint64_t)i1 << 32 | i2; -} -return (uint64_t)i2 << 32 | i1; -} - -static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1) -{ -#if TCG_TARGET_REG_BITS == 64 -qatomic_set((uint64_t *)rw, make_pair(i0, i1)); -flush_idcache_range(rx, rw, 8); -#else -qemu_build_not_reached(); -#endif -} - -static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1, - tcg_insn_unit i2, tcg_insn_unit i3) -{ -uint64_t p[2]; - -p[!HOST_BIG_ENDIAN] = make_pair(i0, i1); -p[HOST_BIG_ENDIAN] = make_pair(i2, i3); - -/* - * There's no convenient way to get the compiler to allocate a pair - * of registers at an even index, so copy into r6/r7 and clobber. - */ -asm("mr %%r6, %1\n\t" -"mr %%r7, %2\n\t" -"stq %%r6, %0" -: "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7"); -flush_idcache_range(rx, rw, 16); -} - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ -tcg_insn_unit i0, i1, i2, i3; -uintptr_t addr = tb->jmp_target_addr[n]; -intptr_t tb_diff = addr - (uintptr_t)tb->tc.ptr; -intptr_t br_diff = addr - (jmp_rx + 4); -intptr_t lo, hi; - -if (TCG_TARGET_REG_BITS == 32) { -intptr_t diff = addr - jmp_rx; -tcg_debug_assert(in_range_b(diff)); -qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fc)); -flush_idcache_range(jmp_rx, jmp_rw, 4); -return; -} - -/* - * For 16-bit displacements, we can use a single add + branch. - * This happens quite often. - */ -if (tb_diff == (int16_t)tb_diff) { -i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff); -i1 = B | (br_diff & 0x3fc); -ppc64_replace2(jmp_rx, jmp_rw, i0, i1); -return; -} - -lo = (int16_t)tb_diff; -hi = (int32_t)(tb_diff - lo); -assert(tb_diff == hi + lo); -i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16); -i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo); - -/* - * Without stq from 2.07, we can only update two insns, - * and those must be the ones that load the target address. - */ -if (!have_isa_2_07) { -ppc64_replace2(jmp_rx, jmp_rw, i0, i1); -return; -} - -/* - * For 26-bit displacements, we can use a direct branch. - * Otherwise we still need the indirect branch, which we - * must restore after a potential direct branch write. - */ -br_diff -= 4; -if (in_range_b(br_diff)) { -i2 = B | (br_diff & 0x3fc); -i3 = NOP; -} else { -i2 = MTSPR | RS(TCG_REG_TB) | CTR; -i3 = BCCTR | BO_ALWAYS; -} -ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3); -} - static void tcg_out_call_int(TCGContext *s, int lk, const tcg_insn_unit *target) { @@ -2625,30 +2527,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { -/* Direct jump. */ -if (TCG_TARGET_REG_BITS == 64) { -/* Ensure the next insns are 8 or 16-byte aligned. */ -while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { -tcg_out32(s, NOP); -} +uintptr_t ptr = get_jmp_target_addr(s, which); + +if (USE_REG_TB) { +ptrdiff_t offset =
[PATCH 19/22] tcg/sparc64: Reorg goto_tb implementation
The old sparc64 implementation may replace two insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between a direct branch and a direct call. The TCG_REG_TB register is loaded from tb->jmp_target_addr[] in the delay slot. Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 93 ++-- 1 file changed, 37 insertions(+), 56 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 1ae9615ef0..07ebea7a6e 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1435,33 +1435,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { -int c; +ptrdiff_t off = tcg_tbrel_diff(s, (void *)get_jmp_target_addr(s, which)); -/* Direct jump. */ -/* make sure the patch is 8-byte aligned. */ -if ((intptr_t)s->code_ptr & 4) { -tcg_out_nop(s); -} +/* Direct branch will be patched by tb_target_set_jmp_target. */ set_jmp_insn_offset(s, which); -tcg_out_sethi(s, TCG_REG_T1, 0); -tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); -tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); -tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); +tcg_out32(s, CALL); +/* delay slot */ +tcg_debug_assert(check_fit_ptr(off, 13)); +tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, TCG_REG_TB, off); set_jmp_reset_offset(s, which); /* * For the unlinked path of goto_tb, we need to reset TCG_REG_TB * to the beginning of this TB. */ -c = -tcg_current_code_size(s); -if (check_fit_i32(c, 13)) { -tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); +off = -tcg_current_code_size(s); +if (check_fit_i32(off, 13)) { +tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, off, ARITH_ADD); } else { -tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); +tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, off); tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +uintptr_t addr = tb->jmp_target_addr[n]; +intptr_t br_disp = (intptr_t)(addr - jmp_rx) >> 2; +tcg_insn_unit insn; + +br_disp >>= 2; +if (check_fit_ptr(br_disp, 19)) { +/* ba,pt %icc, addr */ +insn = deposit32(INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) + | BPCC_ICC | BPCC_PT, 0, 19, br_disp); +} else if (check_fit_ptr(br_disp, 22)) { +/* ba addr */ +insn = deposit32(INSN_OP(0) | INSN_OP2(2) | INSN_COND(COND_A), + 0, 22, br_disp); +} else { +/* The code_gen_buffer can't be larger than 2GB. */ +tcg_debug_assert(check_fit_ptr(br_disp, 30)); +/* call addr */ +insn = deposit32(CALL, 0, 30, br_disp); +} + +qatomic_set((uint32_t *)jmp_rw, insn); +flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1871,45 +1894,3 @@ void tcg_register_jit(const void *buf, size_t buf_size) tcg_register_jit_int(buf, buf_size, _frame, sizeof(debug_frame)); } -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ -uintptr_t addr = tb->jmp_target_addr[n]; -intptr_t tb_disp = addr - (uintptr_t)tb->tc.ptr; -intptr_t br_disp = addr - jmp_rx; -tcg_insn_unit i1, i2; - -/* We can reach the entire address space for ILP32. - For LP64, the code_gen_buffer can't be larger than 2GB. */ -tcg_debug_assert(tb_disp == (int32_t)tb_disp); -tcg_debug_assert(br_disp == (int32_t)br_disp); - -if (0) { -qatomic_set((uint32_t *)jmp_rw, - deposit32(CALL, 0, 30, br_disp >> 2)); -flush_idcache_range(jmp_rx, jmp_rw, 4); -return; -} - -/* This does not exercise the range of the branch, but we do - still need to be able to load the new value of TCG_REG_TB. - But this does still happen quite often. */ -if (check_fit_ptr(tb_disp, 13)) { -/* ba,pt %icc, addr */ -i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) - | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp)); -i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB) - | INSN_IMM13(tb_disp)); -} else if (tb_disp >= 0) { -i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfc00) >> 10); -i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
[PATCH 13/22] tcg: Move tb_target_set_jmp_target declaration to tcg.h
Signed-off-by: Richard Henderson --- include/tcg/tcg.h| 3 +++ tcg/aarch64/tcg-target.h | 4 tcg/arm/tcg-target.h | 5 - tcg/i386/tcg-target.h| 3 --- tcg/loongarch64/tcg-target.h | 3 --- tcg/mips/tcg-target.h| 5 - tcg/ppc/tcg-target.h | 4 tcg/riscv/tcg-target.h | 4 tcg/s390x/tcg-target.h | 4 tcg/sparc64/tcg-target.h | 4 tcg/tci/tcg-target.h | 4 11 files changed, 3 insertions(+), 40 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 993aafa1a2..6f3b602564 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -837,6 +837,9 @@ void tcg_func_start(TCGContext *s); int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start); +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); + void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 9b0927012c..dc16fd0da6 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -151,10 +151,6 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -void tb_target_set_jmp_target(const TranslationBlock *, int, - uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 743a725aa7..13ad721438 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -150,11 +150,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index b64317bf40..4b4ceacfa5 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -218,9 +218,6 @@ extern bool have_movbe; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -void tb_target_set_jmp_target(const TranslationBlock *, int, - uintptr_t, uintptr_t); - /* This defines the natural memory order supported by this * architecture before guarantees made by various barrier * instructions. diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 6a6c8d6941..87f40d935c 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -170,9 +170,6 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i641 #define TCG_TARGET_HAS_mulsh_i641 -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 57154ec808..31236d8e81 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -203,11 +203,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t) -QEMU_ERROR("code path is reachable"); - #define TCG_TARGET_NEED_LDST_LABELS #endif diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 02764c3331..5ffb41fb57 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -180,12 +180,8 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 21d455a081..ff2f861e82 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -160,10 +160,6 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i641 #endif -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index d9a45e20a8..274cb3cc5d 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -171,10 +171,6 @@ extern uint64_t s390_facilities[3]; #define
[PATCH 18/22] tcg/sparc64: Remove USE_REG_TB
This is always true for sparc64, so this is dead since 3a5f6805c7ca. Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 57 ++-- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 96d58f30b1..1ae9615ef0 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -92,7 +92,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif #define TCG_REG_TB TCG_REG_I1 -#define USE_REG_TB (sizeof(void *) > 4) static const int tcg_target_reg_alloc_order[] = { TCG_REG_L0, @@ -439,7 +438,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* A 13-bit constant relative to the TB. */ -if (!in_prologue && USE_REG_TB) { +if (!in_prologue) { test = tcg_tbrel_diff(s, (void *)arg); if (check_fit_ptr(test, 13)) { tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD); @@ -468,7 +467,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* Use the constant pool, if possible. */ -if (!in_prologue && USE_REG_TB) { +if (!in_prologue) { new_pool_label(s, arg, R_SPARC_13, s->code_ptr, tcg_tbrel_diff(s, NULL)); tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); @@ -1014,10 +1013,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #endif /* We choose TCG_REG_TB such that no move is required. */ -if (USE_REG_TB) { -QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); -tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); -} +QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); +tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); /* delay slot */ @@ -1422,7 +1419,7 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); tcg_out_movi_imm13(s, TCG_REG_O0, a0); return; -} else if (USE_REG_TB) { +} else { intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); if (check_fit_ptr(tb_diff, 13)) { tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); @@ -1438,36 +1435,30 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { +int c; + /* Direct jump. */ -if (USE_REG_TB) { -/* make sure the patch is 8-byte aligned. */ -if ((intptr_t)s->code_ptr & 4) { -tcg_out_nop(s); -} -set_jmp_insn_offset(s, which); -tcg_out_sethi(s, TCG_REG_T1, 0); -tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); -tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); -tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); -} else { -set_jmp_insn_offset(s, which); -tcg_out32(s, CALL); +/* make sure the patch is 8-byte aligned. */ +if ((intptr_t)s->code_ptr & 4) { tcg_out_nop(s); } +set_jmp_insn_offset(s, which); +tcg_out_sethi(s, TCG_REG_T1, 0); +tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); +tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); +tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); set_jmp_reset_offset(s, which); /* * For the unlinked path of goto_tb, we need to reset TCG_REG_TB * to the beginning of this TB. */ -if (USE_REG_TB) { -int c = -tcg_current_code_size(s); -if (check_fit_i32(c, 13)) { -tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); -} else { -tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); -tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); -} +c = -tcg_current_code_size(s); +if (check_fit_i32(c, 13)) { +tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); +} else { +tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); +tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } } @@ -1487,11 +1478,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_ptr: tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); -if (USE_REG_TB) { -tcg_out_mov_delay(s, TCG_REG_TB, a0); -} else { -tcg_out_nop(s); -} +tcg_out_mov_delay(s, TCG_REG_TB, a0); break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); @@ -1897,7 +1884,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, tcg_debug_assert(tb_disp == (int32_t)tb_disp); tcg_debug_assert(br_disp == (int32_t)br_disp); -if (!USE_REG_TB) { +if (0) { qatomic_set((uint32_t *)jmp_rw, deposit32(CALL, 0, 30, br_disp >> 2));
[PATCH 04/22] tcg/sparc64: Remove unused goto_tb code for indirect jump
Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 41 +++- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 1e3351a4e8..f035bf7dd2 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -537,17 +537,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg) -{ -intptr_t diff = tcg_tbrel_diff(s, arg); -if (USE_REG_TB && check_fit_ptr(diff, 13)) { -tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff); -return; -} -tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff); -tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff); -} - static void tcg_out_sety(TCGContext *s, TCGReg rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); @@ -1462,27 +1451,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -if (s->tb_jmp_insn_offset) { -/* direct jump method */ -if (USE_REG_TB) { -/* make sure the patch is 8-byte aligned. */ -if ((intptr_t)s->code_ptr & 4) { -tcg_out_nop(s); -} -s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); -tcg_out_sethi(s, TCG_REG_T1, 0); -tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); -tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); -tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); -} else { -s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); -tcg_out32(s, CALL); +qemu_build_assert(TCG_TARGET_HAS_direct_jump); +/* Direct jump. */ +if (USE_REG_TB) { +/* make sure the patch is 8-byte aligned. */ +if ((intptr_t)s->code_ptr & 4) { tcg_out_nop(s); } +s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); +tcg_out_sethi(s, TCG_REG_T1, 0); +tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); +tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); +tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } else { -/* indirect jump method */ -tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0); -tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL); +s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); +tcg_out32(s, CALL); tcg_out_nop(s); } set_jmp_reset_offset(s, a0); -- 2.34.1
[PATCH 10/22] tcg: Add gen_tb to TCGContext
This can replace four other variables that are references into the TranslationBlock structure. Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 11 +++ accel/tcg/translate-all.c | 2 +- tcg/tcg-op.c | 14 +++--- tcg/tcg.c | 14 +++--- 4 files changed, 14 insertions(+), 27 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index d84bae6e3f..993aafa1a2 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -544,20 +544,15 @@ struct TCGContext { int nb_indirects; int nb_ops; -/* goto_tb support */ -tcg_insn_unit *code_buf; -uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */ -uintptr_t *tb_jmp_insn_offset; /* tb->jmp_target_arg if direct_jump */ -uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_arg if !direct_jump */ - TCGRegSet reserved_regs; -uint32_t tb_cflags; /* cflags of the current TB */ intptr_t current_frame_offset; intptr_t frame_start; intptr_t frame_end; TCGTemp *frame_temp; -tcg_insn_unit *code_ptr; +TranslationBlock *gen_tb; /* tb for which code is being generated */ +tcg_insn_unit *code_buf; /* pointer for start of tb */ +tcg_insn_unit *code_ptr; /* pointer for running end of tb */ #ifdef CONFIG_PROFILER TCGProfile prof; diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 9cf88da6cb..94238a1926 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -827,7 +827,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->trace_vcpu_dstate = *cpu->trace_dstate; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); -tcg_ctx->tb_cflags = cflags; +tcg_ctx->gen_tb = tb; tb_overflow: #ifdef CONFIG_PROFILER diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 019fab00cc..585f33ffaf 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -94,7 +94,7 @@ void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, void tcg_gen_mb(TCGBar mb_type) { -if (tcg_ctx->tb_cflags & CF_PARALLEL) { +if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { tcg_gen_op1(INDEX_op_mb, mb_type); } } @@ -2763,7 +2763,7 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) void tcg_gen_goto_tb(unsigned idx) { /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */ -tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB)); +tcg_debug_assert(!(tcg_ctx->gen_tb->cflags & CF_NO_GOTO_TB)); /* We only support two chained exits. */ tcg_debug_assert(idx <= TB_EXIT_IDXMAX); #ifdef CONFIG_DEBUG_TCG @@ -2779,7 +2779,7 @@ void tcg_gen_lookup_and_goto_ptr(void) { TCGv_ptr ptr; -if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) { +if (tcg_ctx->gen_tb->cflags & CF_NO_GOTO_PTR) { tcg_gen_exit_tb(NULL, 0); return; } @@ -3146,7 +3146,7 @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, { memop = tcg_canonicalize_memop(memop, 0, 0); -if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { +if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); @@ -3184,7 +3184,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, { memop = tcg_canonicalize_memop(memop, 1, 0); -if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { +if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -3345,7 +3345,7 @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \ void tcg_gen_atomic_##NAME##_i32\ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)\ { \ -if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ +if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {\ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ } else {\ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,\ @@ -3355,7 +3355,7 @@ void tcg_gen_atomic_##NAME##_i32 \ void tcg_gen_atomic_##NAME##_i64\ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)\ { \ -if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ +if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {\ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ } else {\ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,\ diff --git a/tcg/tcg.c b/tcg/tcg.c index
[PATCH 15/22] tcg: Remove TCG_TARGET_HAS_direct_jump
We now have the option to generate direct or indirect goto_tb depending on the dynamic displacement, thus the define is no longer necessary or completely accurate. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 1 - tcg/arm/tcg-target.h | 1 - tcg/i386/tcg-target.h| 1 - tcg/loongarch64/tcg-target.h | 1 - tcg/mips/tcg-target.h| 1 - tcg/ppc/tcg-target.h | 1 - tcg/riscv/tcg-target.h | 1 - tcg/s390x/tcg-target.h | 1 - tcg/sparc64/tcg-target.h | 1 - tcg/tci/tcg-target.h | 1 - accel/tcg/cpu-exec.c | 13 ++--- tcg/tcg.c| 1 - tcg/arm/tcg-target.c.inc | 1 - tcg/mips/tcg-target.c.inc| 1 - tcg/riscv/tcg-target.c.inc | 1 - tcg/s390x/tcg-target.c.inc | 5 - tcg/tci/tcg-target.c.inc | 1 - 17 files changed, 10 insertions(+), 23 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index dc16fd0da6..c8202e4bc5 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -123,7 +123,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i640 #define TCG_TARGET_HAS_muluh_i641 #define TCG_TARGET_HAS_mulsh_i641 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_v64 1 #define TCG_TARGET_HAS_v128 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 13ad721438..879eecd93d 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -121,7 +121,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_mulsh_i320 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_v64 use_neon_instructions diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 4b4ceacfa5..b1ffd47493 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -139,7 +139,6 @@ extern bool have_movbe; #define TCG_TARGET_HAS_muls2_i321 #define TCG_TARGET_HAS_muluh_i320 #define TCG_TARGET_HAS_mulsh_i320 -#define TCG_TARGET_HAS_direct_jump 1 #if TCG_TARGET_REG_BITS == 64 /* Keep target addresses zero-extended in a register. */ diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 87f40d935c..086c90bda3 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -127,7 +127,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i320 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_brcond2 0 #define TCG_TARGET_HAS_setcond2 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 31236d8e81..cc0a6f301a 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -132,7 +132,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i321 #define TCG_TARGET_HAS_mulsh_i321 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_direct_jump 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 5ffb41fb57..f253184915 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -108,7 +108,6 @@ extern bool have_vsx; #define TCG_TARGET_HAS_muls2_i320 #define TCG_TARGET_HAS_muluh_i321 #define TCG_TARGET_HAS_mulsh_i321 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index ff2f861e82..56f7bc3346 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -116,7 +116,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i320 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 274cb3cc5d..2c7c14055b 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -103,7 +103,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_direct_jumpHAVE_FACILITY(GEN_INST_EXT) #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index ca7e4da6d3..21d3c59bd7 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -110,7 +110,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i321 #define
[PATCH 14/22] tcg: Always define tb_target_set_jmp_target
Install empty versions for !TCG_TARGET_HAS_direct_jump hosts. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 6 ++ tcg/mips/tcg-target.c.inc | 6 ++ tcg/riscv/tcg-target.c.inc | 6 ++ tcg/tci/tcg-target.c.inc | 6 ++ 4 files changed, 24 insertions(+) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0afc286c8e..f5103dcc6d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1958,6 +1958,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +/* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 605aa7714a..dfb31a17bb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1976,6 +1976,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +/* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index e73dfb4cbb..06308c5243 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1285,6 +1285,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +/* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index cf84ff3133..31094067b7 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -606,6 +606,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +/* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) -- 2.34.1
[PATCH 20/22] tcg/arm: Implement direct branch for goto_tb
Now that tcg can handle direct and indirect goto_tb simultaneously, we can optimistically leave space for a direct branch and fall back to loading the pointer from the TB for an indirect branch. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 52 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index a44d1d969f..fad2be700e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -135,6 +135,8 @@ typedef enum { ARITH_BIC = 0xe << 21, ARITH_MVN = 0xf << 21, +INSN_B = 0x0a00, + INSN_CLZ = 0x016f0f10, INSN_RBIT = 0x06ff0f30, @@ -546,7 +548,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset) { -tcg_out32(s, (cond << 28) | 0x0a00 | +tcg_out32(s, (cond << 28) | INSN_B | (((offset - 8) >> 2) & 0x00ff)); } @@ -1935,32 +1937,52 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { -/* Indirect jump method */ -intptr_t ptr, dif, dil; -TCGReg base = TCG_REG_PC; +uintptr_t i_addr; +intptr_t i_disp; -ptr = get_jmp_target_addr(s, which); -dif = tcg_pcrel_diff(s, (void *)ptr) - 8; -dil = sextract32(dif, 0, 12); -if (dif != dil) { +/* Direct branch will be patched by tb_target_set_jmp_target. */ +set_jmp_insn_offset(s, which); +tcg_out32(s, INSN_NOP); + +/* When branch is out of range, fall through to indirect. */ +i_addr = get_jmp_target_addr(s, which); +i_disp = tcg_pcrel_diff(s, (void *)i_addr) - 8; +tcg_debug_assert(i_disp < 0); +if (i_disp >= -0xfff) { +tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, i_disp); +} else { /* * The TB is close, but outside the 12 bits addressable by * the load. We can extend this to 20 bits with a sub of a - * shifted immediate from pc. In the vastly unlikely event - * the code requires more than 1MB, we'll use 2 insns and - * be no worse off. + * shifted immediate from pc. */ -base = TCG_REG_R0; -tcg_out_movi32(s, COND_AL, base, ptr - dil); +int h = -i_disp; +int l = h & 0xfff; + +h = encode_imm_nofail(h - l); +tcg_out_dat_imm(s, COND_AL, ARITH_SUB, TCG_REG_R0, TCG_REG_PC, h); +tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, l); } -tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); set_jmp_reset_offset(s, which); } void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { -/* Always indirect, nothing to do */ +uintptr_t addr = tb->jmp_target_addr[n]; +ptrdiff_t offset = addr - (jmp_rx + 8); +tcg_insn_unit insn; + +/* Either directly branch, or fall through to indirect branch. */ +if (offset == sextract64(offset, 0, 26)) { +/* B */ +insn = (COND_AL << 28) | INSN_B | ((offset >> 2) & 0x00ff); +} else { +insn = INSN_NOP; +} + +qatomic_set((uint32_t *)jmp_rw, insn); +flush_idcache_range(jmp_rx, jmp_rw, 4); } static void tcg_out_op(TCGContext *s, TCGOpcode opc, -- 2.34.1
[PATCH 11/22] tcg: Add TranslationBlock.jmp_insn_offset
Stop overloading jmp_target_arg for both offset and address, depending on TCG_TARGET_HAS_direct_jump. Instead, add a new field to hold the jump insn offset and always set the target address in jmp_target_addr[]. This will allow a tcg backend to use either direct or indirect depending on displacement. Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 3 ++- accel/tcg/cpu-exec.c| 5 ++--- tcg/tcg.c | 6 -- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 7566ad9e7f..bb01508b4b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -577,7 +577,8 @@ struct TranslationBlock { */ #define TB_JMP_OFFSET_INVALID 0x /* indicates no jump generated */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ -uintptr_t jmp_target_arg[2]; /* target address or offset */ +uint16_t jmp_insn_offset[2]; /* offset of direct jump insn */ +uintptr_t jmp_target_addr[2]; /* target address */ /* * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps. diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 356fe348de..a87fbf74f4 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -572,14 +572,13 @@ void cpu_exec_step_atomic(CPUState *cpu) void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { +tb->jmp_target_addr[n] = addr; if (TCG_TARGET_HAS_direct_jump) { -uintptr_t offset = tb->jmp_target_arg[n]; +uintptr_t offset = tb->jmp_insn_offset[n]; uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; uintptr_t jmp_rx = tc_ptr + offset; uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr); -} else { -tb->jmp_target_arg[n] = addr; } } diff --git a/tcg/tcg.c b/tcg/tcg.c index c7ad46ff45..c103dd0037 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -328,7 +328,7 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ tcg_debug_assert(TCG_TARGET_HAS_direct_jump); -s->gen_tb->jmp_target_arg[which] = tcg_current_code_size(s); +s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); } static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) @@ -337,7 +337,7 @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) * Return the read-execute version of the pointer, for the benefit * of any pc-relative addressing mode. */ -return (uintptr_t)tcg_splitwx_to_rx(s->gen_tb->jmp_target_arg + which); +return (uintptr_t)tcg_splitwx_to_rx(>gen_tb->jmp_target_addr[which]); } /* Signal overflow, starting over with fewer guest insns. */ @@ -4252,6 +4252,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) /* Initialize goto_tb jump offsets. */ tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; +tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; +tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; tcg_reg_alloc_start(s); -- 2.34.1
[PATCH 01/22] tcg: Split out tcg_out_exit_tb
The INDEX_op_exit_tb opcode needs no register allocation. Split out a dedicated helper function for it. Signed-off-by: Richard Henderson --- tcg/tcg.c| 4 tcg/aarch64/tcg-target.c.inc | 22 ++ tcg/arm/tcg-target.c.inc | 11 + tcg/i386/tcg-target.c.inc| 21 + tcg/loongarch64/tcg-target.c.inc | 22 ++ tcg/mips/tcg-target.c.inc| 33 +-- tcg/ppc/tcg-target.c.inc | 11 + tcg/riscv/tcg-target.c.inc | 22 ++ tcg/s390x/tcg-target.c.inc | 23 ++- tcg/sparc64/tcg-target.c.inc | 39 +--- tcg/tci/tcg-target.c.inc | 10 11 files changed, 121 insertions(+), 97 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 436fcf6ebd..b53961baf7 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -107,6 +107,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -4302,6 +4303,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) case INDEX_op_call: tcg_reg_alloc_call(s, op); break; +case INDEX_op_exit_tb: +tcg_out_exit_tb(s, op->args[0]); +break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 344b63e20f..16c5e33b69 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1886,6 +1886,17 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ +/* Reuse the zeroing that exists for goto_ptr. */ +if (a0 == 0) { +tcg_out_goto_long(s, tcg_code_gen_epilogue); +} else { +tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); +tcg_out_goto_long(s, tb_ret_addr); +} +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1905,16 +1916,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) switch (opc) { -case INDEX_op_exit_tb: -/* Reuse the zeroing that exists for goto_ptr. */ -if (a0 == 0) { -tcg_out_goto_long(s, tcg_code_gen_epilogue); -} else { -tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); -tcg_out_goto_long(s, tb_ret_addr); -} -break; - case INDEX_op_goto_tb: tcg_debug_assert(s->tb_jmp_insn_offset != NULL); /* @@ -2304,6 +2305,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ +case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2c6c353eea..e5c2eae5a5 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1927,6 +1927,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) static void tcg_out_epilogue(TCGContext *s); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ +tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, arg); +tcg_out_epilogue(s); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1935,10 +1941,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c; switch (opc) { -case INDEX_op_exit_tb: -tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); -tcg_out_epilogue(s); -break; case INDEX_op_goto_tb: { /* Indirect jump method */ @@ -2250,6 +2252,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ +case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index cb04e4b3ad..5c20bedd20
[PATCH 16/22] tcg/aarch64: Reorg goto_tb implementation
The old implementation replaces two insns, swapping between b nop br x30 and adrpx30, addix30, x30, lo12: br x30 There is a race condition in which a thread could be stopped at the PC of the second insn, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b br tmp and ldr tmp, br tmp Reported-by: hev Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 2 +- tcg/aarch64/tcg-target.c.inc | 64 +++- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index c8202e4bc5..9b8835eaa3 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -15,7 +15,7 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 -#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #undef TCG_TARGET_STACK_GROWSUP typedef enum { diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index af30f9db69..fe8e8636aa 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1352,33 +1352,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) } } -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ -uintptr_t addr = tb->jmp_target_addr[n]; -tcg_insn_unit i1, i2; -TCGType rt = TCG_TYPE_I64; -TCGReg rd = TCG_REG_TMP; -uint64_t pair; - -ptrdiff_t offset = addr - jmp_rx; - -if (offset == sextract64(offset, 0, 26)) { -i1 = I3206_B | ((offset >> 2) & 0x3ff); -i2 = NOP; -} else { -offset = (addr >> 12) - (jmp_rx >> 12); - -/* patch ADRP */ -i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1c) << (5 - 2) | rd; -/* patch ADDI */ -i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; -} -pair = (uint64_t)i2 << 32 | i1; -qatomic_set((uint64_t *)jmp_rw, pair); -flush_idcache_range(jmp_rx, jmp_rw, 8); -} - static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) { if (!l->has_value) { @@ -1901,23 +1874,38 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { /* - * Ensure that ADRP+ADD are 8-byte aligned so that an atomic - * write can be used to patch the target address. + * Direct branch, or indirect address load, will be patched + * by tb_target_set_jmp_target. Assert indirect load offset + * in range early, regardless of direct branch distance. */ -if ((uintptr_t)s->code_ptr & 7) { -tcg_out32(s, NOP); -} +intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); +tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); + set_jmp_insn_offset(s, which); -/* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ -tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); -tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); +tcg_out32(s, I3206_B); tcg_out_insn(s, 3207, BR, TCG_REG_TMP); set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ +uintptr_t d_addr = tb->jmp_target_addr[n]; +uintptr_t i_addr = (uintptr_t)>jmp_target_addr[n]; +ptrdiff_t d_offset = d_addr - jmp_rx; +ptrdiff_t i_offset = i_addr - jmp_rx; +tcg_insn_unit insn; + +/* Either directly branch, or indirect branch load. */ +if (d_offset == sextract64(d_offset, 0, 26)) { +insn = I3206_B | ((d_offset >> 2) & 0x3ff); +} else { +insn = I3305_LDR | TCG_REG_TMP | (((i_offset >> 2) & 0x7) << 5); +} +qatomic_set((uint32_t *)jmp_rw, insn); +flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) -- 2.34.1
[PATCH 05/22] tcg: Replace asserts on tcg_jmp_insn_offset
Test TCG_TARGET_HAS_direct_jump instead of testing an implementation pointer. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 +- tcg/arm/tcg-target.c.inc | 2 +- tcg/loongarch64/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc| 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/s390x/tcg-target.c.inc | 2 +- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 16c5e33b69..8e97da3a39 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1917,7 +1917,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -tcg_debug_assert(s->tb_jmp_insn_offset != NULL); +qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that ADRP+ADD are 8-byte aligned so that an atomic * write can be used to patch the target address. diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e5c2eae5a5..31f8c5b7a7 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1947,7 +1947,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, intptr_t ptr, dif, dil; TCGReg base = TCG_REG_PC; -tcg_debug_assert(s->tb_jmp_insn_offset == 0); +qemu_build_assert(!TCG_TARGET_HAS_direct_jump); ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 41fc5ffa91..78398684cd 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1089,7 +1089,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -tcg_debug_assert(s->tb_jmp_insn_offset != NULL); +qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that patch area is 8-byte aligned so that an * atomic write can be used to patch the target address. diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 819648b100..5fc96e4406 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1986,7 +1986,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: /* indirect jump method */ -tcg_debug_assert(s->tb_jmp_insn_offset == 0); +qemu_build_assert(!TCG_TARGET_HAS_direct_jump); tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, (uintptr_t)(s->tb_jmp_target_addr + a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 1381c835af..1e8406b8c1 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1286,7 +1286,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -assert(s->tb_jmp_insn_offset == 0); +qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, (uintptr_t)(s->tb_jmp_target_addr + a0)); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 9568452773..9498694564 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2095,7 +2095,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: a0 = args[0]; -if (s->tb_jmp_insn_offset) { +if (TCG_TARGET_HAS_direct_jump) { /* * branch displacement must be aligned for atomic patching; * see if we need to add extra nop before branch diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 34583a3499..d1cc41261a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -606,7 +606,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -tcg_debug_assert(s->tb_jmp_insn_offset == 0); +qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method. */ tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]); set_jmp_reset_offset(s, args[0]); -- 2.34.1
[PATCH 02/22] tcg/i386: Remove unused goto_tb code for indirect jump
Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 14 +- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 5c20bedd20..f3a40fc428 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2382,23 +2382,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -if (s->tb_jmp_insn_offset) { -/* direct jump method */ -int gap; -/* jump displacement must be aligned for atomic patching; +qemu_build_assert(TCG_TARGET_HAS_direct_jump); +{ +/* + * Jump displacement must be aligned for atomic patching; * see if we need to add extra nops before jump */ -gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; +int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; if (gap != 1) { tcg_out_nopn(s, gap - 1); } tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); tcg_out32(s, 0); -} else { -/* indirect jump method */ -tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_jmp_target_addr + a0)); } set_jmp_reset_offset(s, a0); break; -- 2.34.1
[PATCH 09/22] tcg: Rename TB_JMP_RESET_OFFSET_INVALID to TB_JMP_OFFSET_INVALID
This will shortly be used for more than reset. Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 2 +- accel/tcg/translate-all.c | 8 tcg/tcg.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9b7bfbf09a..7566ad9e7f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -575,8 +575,8 @@ struct TranslationBlock { * setting one of the jump targets (or patching the jump instruction). Only * two of such jumps are supported. */ +#define TB_JMP_OFFSET_INVALID 0x /* indicates no jump generated */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ -#define TB_JMP_RESET_OFFSET_INVALID 0x /* indicates no jump generated */ uintptr_t jmp_target_arg[2]; /* target address or offset */ /* diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ac3ee3740c..9cf88da6cb 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -979,10 +979,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->jmp_dest[1] = (uintptr_t)NULL; /* init original jump addresses which have been set during tcg_gen_code() */ -if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { +if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 0); } -if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { +if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 1); } @@ -1164,9 +1164,9 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) if (tb_page_addr1(tb) != -1) { tst->cross_page++; } -if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { +if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp_count++; -if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { +if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp2_count++; } } diff --git a/tcg/tcg.c b/tcg/tcg.c index d334f95375..37957208c7 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4250,8 +4250,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) #endif /* Initialize goto_tb jump offsets. */ -tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; -tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; +tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; +tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; if (TCG_TARGET_HAS_direct_jump) { tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; -- 2.34.1
[PATCH 12/22] tcg: Change tb_target_set_jmp_target arguments
Replace 'tc_ptr' and 'addr' with 'tb' and 'n'. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 3 ++- tcg/arm/tcg-target.h | 3 ++- tcg/i386/tcg-target.h| 9 ++--- tcg/loongarch64/tcg-target.h | 3 ++- tcg/mips/tcg-target.h| 3 ++- tcg/ppc/tcg-target.h | 3 ++- tcg/riscv/tcg-target.h | 3 ++- tcg/s390x/tcg-target.h | 10 ++ tcg/sparc64/tcg-target.h | 3 ++- tcg/tci/tcg-target.h | 3 ++- accel/tcg/cpu-exec.c | 6 +++--- tcg/aarch64/tcg-target.c.inc | 5 +++-- tcg/i386/tcg-target.c.inc| 9 + tcg/loongarch64/tcg-target.c.inc | 5 +++-- tcg/ppc/tcg-target.c.inc | 7 --- tcg/s390x/tcg-target.c.inc | 10 ++ tcg/sparc64/tcg-target.c.inc | 7 --- 17 files changed, 56 insertions(+), 36 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 485f685bd2..9b0927012c 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -152,7 +152,8 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 7e96495392..743a725aa7 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -152,7 +152,8 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_MEMORY_BSWAP 0 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 00fcbe297d..b64317bf40 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -218,13 +218,8 @@ extern bool have_movbe; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, -uintptr_t jmp_rw, uintptr_t addr) -{ -/* patch the branch destination */ -qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); -/* no need to flush icache explicitly */ -} +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); /* This defines the natural memory order supported by this * architecture before guarantees made by various barrier diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index a659c8d6fd..6a6c8d6941 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -170,7 +170,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i641 #define TCG_TARGET_HAS_mulsh_i641 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7669213175..57154ec808 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -204,7 +204,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_MEMORY_BSWAP 1 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t) QEMU_ERROR("code path is reachable"); #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b5cd225cfa..02764c3331 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -180,7 +180,8 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 11c9b3e4f4..21d455a081 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -161,7 +161,8 @@ typedef enum { #endif /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, +
[PATCH 22/22] tcg/riscv: Implement direct branch for goto_tb
Now that tcg can handle direct and indirect goto_tb simultaneously, we can optimistically leave space for a direct branch and fall back to loading the pointer from the TB for an indirect branch. Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.h | 5 + tcg/riscv/tcg-target.c.inc | 19 +-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 56f7bc3346..a75c84f2a6 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -159,6 +159,11 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i641 #endif +<<< HEAD +=== +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +>>> 89ab294271 (tcg/riscv: Implement TCG_TARGET_HAS_direct_jump) #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index de029d62b4..ee269ea157 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1278,7 +1278,11 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { -/* indirect jump method */ +/* Direct branch will be patched by tb_target_set_jmp_target. */ +set_jmp_insn_offset(s, which); +tcg_out32(s, OPC_NOP); + +/* When branch is out of range, fall through to indirect. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, get_jmp_target_addr(s, which)); tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); @@ -1288,7 +1292,18 @@ static void tcg_out_goto_tb(TCGContext *s, int which) void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { -/* Always indirect, nothing to do */ +uintptr_t addr = tb->jmp_target_addr[n]; +ptrdiff_t offset = addr - jmp_rx; +tcg_insn_unit insn; + +/* Either directly branch, or fall through to indirect branch. */ +if (offset == sextreg(offset, 0, 20)) { +insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset); +} else { +insn = OPC_NOP; +} +qatomic_set((uint32_t *)jmp_rw, insn); +flush_idcache_range(jmp_rx, jmp_rw, 4); } static void tcg_out_op(TCGContext *s, TCGOpcode opc, -- 2.34.1
[PATCH 07/22] tcg: Introduce get_jmp_target_addr
Similar to the existing set_jmp_reset_offset. Include the rw->rx address space coversion done by arm and s390x, and forgotten by mips and riscv. Signed-off-by: Richard Henderson --- tcg/tcg.c | 9 + tcg/arm/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc | 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/s390x/tcg-target.c.inc | 2 +- tcg/tci/tcg-target.c.inc | 2 +- 6 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index df5a6cedf0..8bde8eb0d4 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -330,6 +330,15 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) s->tb_jmp_insn_offset[which] = tcg_current_code_size(s); } +static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) +{ +/* + * Return the read-execute version of the pointer, for the benefit + * of any pc-relative addressing mode. + */ +return (uintptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + which); +} + /* Signal overflow, starting over with fewer guest insns. */ static G_NORETURN void tcg_raise_tb_overflow(TCGContext *s) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 31f8c5b7a7..3a545f3707 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1948,7 +1948,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGReg base = TCG_REG_PC; qemu_build_assert(!TCG_TARGET_HAS_direct_jump); -ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); +ptr = get_jmp_target_addr(s, args[0]); dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); if (dif != dil) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 5fc96e4406..6b9c070f6b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1988,7 +1988,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, /* indirect jump method */ qemu_build_assert(!TCG_TARGET_HAS_direct_jump); tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); + get_jmp_target_addr(s, a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); tcg_out_nop(s); set_jmp_reset_offset(s, a0); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 1e8406b8c1..091c44d071 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1289,7 +1289,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); + get_jmp_target_addr(s, a0)); tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); set_jmp_reset_offset(s, a0); break; diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ad6433fcf2..dba02db6ee 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2110,7 +2110,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else { /* load address stored at s->tb_jmp_target_addr + a0 */ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB, - tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0)); + (const void *)get_jmp_target_addr(s, a0)); /* and go there */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB); } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d1cc41261a..baf3480c67 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -608,7 +608,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method. */ -tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]); +tcg_out_op_p(s, opc, (void *)get_jmp_target_addr(s, args[0])); set_jmp_reset_offset(s, args[0]); break; -- 2.34.1
[PATCH 00/22] tcg: exit_tb tidy, goto_tb reorg
Small patch for exit_tb. Large reorg for goto_tb, primarily aimed at fixing a race condition in which a host thread gets suspended in the middle of executing a two insn sequence, and the sequence is updated. The updated second insn does not match the previous first insn, so when the thread restarts the entire sequence will not branch to either the old or the new destination. The middle-end is adjusted to support both direct and indirect jumps simultaneously, instead of depending on TCG_TARGET_HAS_direct_jump. This allows the backend to decide whether to use direct or indirect based on the branch displacement. Which allows us to only update a single instruction, which means there's no multi-insn sequence that can be interrupted. This patch set fixes all except loongarch64, which will follow separately along with other cleanup. r~ Richard Henderson (22): tcg: Split out tcg_out_exit_tb tcg/i386: Remove unused goto_tb code for indirect jump tcg/ppc: Remove unused goto_tb code for indirect jump tcg/sparc64: Remove unused goto_tb code for indirect jump tcg: Replace asserts on tcg_jmp_insn_offset tcg: Introduce set_jmp_insn_offset tcg: Introduce get_jmp_target_addr tcg: Split out tcg_out_goto_tb tcg: Rename TB_JMP_RESET_OFFSET_INVALID to TB_JMP_OFFSET_INVALID tcg: Add gen_tb to TCGContext tcg: Add TranslationBlock.jmp_insn_offset tcg: Change tb_target_set_jmp_target arguments tcg: Move tb_target_set_jmp_target declaration to tcg.h tcg: Always define tb_target_set_jmp_target tcg: Remove TCG_TARGET_HAS_direct_jump tcg/aarch64: Reorg goto_tb implementation tcg/ppc: Reorg goto_tb implementation tcg/sparc64: Remove USE_REG_TB tcg/sparc64: Reorg goto_tb implementation tcg/arm: Implement direct branch for goto_tb tcg/riscv: Introduce OPC_NOP tcg/riscv: Implement direct branch for goto_tb include/exec/exec-all.h | 5 +- include/tcg/tcg.h| 14 +-- tcg/aarch64/tcg-target.h | 6 +- tcg/arm/tcg-target.h | 5 - tcg/i386/tcg-target.h| 9 -- tcg/loongarch64/tcg-target.h | 3 - tcg/mips/tcg-target.h| 5 - tcg/ppc/tcg-target.h | 7 +- tcg/riscv/tcg-target.h | 5 +- tcg/s390x/tcg-target.h | 11 -- tcg/sparc64/tcg-target.h | 4 - tcg/tci/tcg-target.h | 4 - accel/tcg/cpu-exec.c | 16 ++- accel/tcg/translate-all.c| 10 +- tcg/tcg-op.c | 14 +-- tcg/tcg.c| 42 +-- tcg/aarch64/tcg-target.c.inc | 104 tcg/arm/tcg-target.c.inc | 89 +- tcg/i386/tcg-target.c.inc| 68 ++- tcg/loongarch64/tcg-target.c.inc | 66 +- tcg/mips/tcg-target.c.inc| 59 + tcg/ppc/tcg-target.c.inc | 193 ++--- tcg/riscv/tcg-target.c.inc | 65 ++ tcg/s390x/tcg-target.c.inc | 106 +--- tcg/sparc64/tcg-target.c.inc | 200 --- tcg/tci/tcg-target.c.inc | 31 +++-- 26 files changed, 544 insertions(+), 597 deletions(-) -- 2.34.1
[PATCH 21/22] tcg/riscv: Introduce OPC_NOP
Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 37baae9cda..de029d62b4 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -254,6 +254,7 @@ typedef enum { #endif OPC_FENCE = 0x000f, +OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */ } RISCVInsn; /* @@ -390,7 +391,7 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; for (i = 0; i < count; ++i) { -p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); +p[i] = OPC_NOP; } } -- 2.34.1
[PATCH 06/22] tcg: Introduce set_jmp_insn_offset
Similar to the existing set_jmp_reset_offset. Move any assert for TCG_TARGET_HAS_direct_jump into the new function (which now cannot be build-time). Will be unused if TCG_TARGET_HAS_direct_jump is constant 0, but we can't test for constant in the preprocessor, so just mark it G_GNUC_UNUSED. Signed-off-by: Richard Henderson --- tcg/tcg.c| 10 ++ tcg/aarch64/tcg-target.c.inc | 3 +-- tcg/i386/tcg-target.c.inc| 3 +-- tcg/loongarch64/tcg-target.c.inc | 3 +-- tcg/ppc/tcg-target.c.inc | 7 +++ tcg/s390x/tcg-target.c.inc | 2 +- tcg/sparc64/tcg-target.c.inc | 5 ++--- 7 files changed, 19 insertions(+), 14 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index b53961baf7..df5a6cedf0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -320,6 +320,16 @@ static void set_jmp_reset_offset(TCGContext *s, int which) s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); } +static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) +{ +/* + * We will check for overflow at the end of the opcode loop in + * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. + */ +tcg_debug_assert(TCG_TARGET_HAS_direct_jump); +s->tb_jmp_insn_offset[which] = tcg_current_code_size(s); +} + /* Signal overflow, starting over with fewer guest insns. */ static G_NORETURN void tcg_raise_tb_overflow(TCGContext *s) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 8e97da3a39..3aa89d6060 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1917,7 +1917,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that ADRP+ADD are 8-byte aligned so that an atomic * write can be used to patch the target address. @@ -1925,7 +1924,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, if ((uintptr_t)s->code_ptr & 7) { tcg_out32(s, NOP); } -s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); +set_jmp_insn_offset(s, a0); /* * actual branch destination will be patched by * tb_target_set_jmp_target later diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index f3a40fc428..3d4cf71552 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2382,7 +2382,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -qemu_build_assert(TCG_TARGET_HAS_direct_jump); { /* * Jump displacement must be aligned for atomic patching; @@ -2393,7 +2392,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nopn(s, gap - 1); } tcg_out8(s, OPC_JMP_long); /* jmp im */ -s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); +set_jmp_insn_offset(s, a0); tcg_out32(s, 0); } set_jmp_reset_offset(s, a0); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 78398684cd..dd4934d6d5 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1089,7 +1089,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that patch area is 8-byte aligned so that an * atomic write can be used to patch the target address. @@ -1097,7 +1096,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, if ((uintptr_t)s->code_ptr & 7) { tcg_out_nop(s); } -s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); +set_jmp_insn_offset(s, a0); /* * actual branch destination will be patched by * tb_target_set_jmp_target later diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3f9ee4b39a..86684e1c84 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2630,20 +2630,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* Direct jump. */ if (TCG_TARGET_REG_BITS == 64) { /* Ensure the next insns are 8 or 16-byte aligned. */ while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { tcg_out32(s, NOP); } -s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); +set_jmp_insn_offset(s, args[0]); tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); } else { -s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); +
[PATCH 03/22] tcg/ppc: Remove unused goto_tb code for indirect jump
Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 32 +--- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 186599dae0..3f9ee4b39a 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2630,27 +2630,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: -if (s->tb_jmp_insn_offset) { -/* Direct jump. */ -if (TCG_TARGET_REG_BITS == 64) { -/* Ensure the next insns are 8 or 16-byte aligned. */ -while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { -tcg_out32(s, NOP); -} -s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); -tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); -tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); -} else { -s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); -tcg_out32(s, B); -s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); -break; +qemu_build_assert(TCG_TARGET_HAS_direct_jump); +/* Direct jump. */ +if (TCG_TARGET_REG_BITS == 64) { +/* Ensure the next insns are 8 or 16-byte aligned. */ +while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { +tcg_out32(s, NOP); } +s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); +tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); +tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); } else { -/* Indirect jump. */ -tcg_debug_assert(s->tb_jmp_insn_offset == NULL); -tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0, - (intptr_t)(s->tb_jmp_insn_offset + args[0])); +s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); +tcg_out32(s, B); +s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); +break; } tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS); -- 2.34.1
[PATCH 08/22] tcg: Split out tcg_out_goto_tb
The INDEX_op_goto_tb opcode needs no register allocation. Split out a dedicated helper function for it. Signed-off-by: Richard Henderson --- tcg/tcg.c| 4 ++ tcg/aarch64/tcg-target.c.inc | 40 +- tcg/arm/tcg-target.c.inc | 49 +++--- tcg/i386/tcg-target.c.inc| 33 +++ tcg/loongarch64/tcg-target.c.inc | 38 + tcg/mips/tcg-target.c.inc| 21 ++ tcg/ppc/tcg-target.c.inc | 52 tcg/riscv/tcg-target.c.inc | 20 + tcg/s390x/tcg-target.c.inc | 70 tcg/sparc64/tcg-target.c.inc | 68 --- tcg/tci/tcg-target.c.inc | 16 11 files changed, 219 insertions(+), 192 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 8bde8eb0d4..d334f95375 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -108,6 +108,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); +static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -4325,6 +4326,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) case INDEX_op_exit_tb: tcg_out_exit_tb(s, op->args[0]); break; +case INDEX_op_goto_tb: +tcg_out_goto_tb(s, op->args[0]); +break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 3aa89d6060..072d32f355 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1897,6 +1897,26 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ +/* + * Ensure that ADRP+ADD are 8-byte aligned so that an atomic + * write can be used to patch the target address. + */ +if ((uintptr_t)s->code_ptr & 7) { +tcg_out32(s, NOP); +} +set_jmp_insn_offset(s, which); +/* + * actual branch destination will be patched by + * tb_target_set_jmp_target later + */ +tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); +tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); +tcg_out_insn(s, 3207, BR, TCG_REG_TMP); +set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1916,25 +1936,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) switch (opc) { -case INDEX_op_goto_tb: -/* - * Ensure that ADRP+ADD are 8-byte aligned so that an atomic - * write can be used to patch the target address. - */ -if ((uintptr_t)s->code_ptr & 7) { -tcg_out32(s, NOP); -} -set_jmp_insn_offset(s, a0); -/* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ -tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); -tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); -tcg_out_insn(s, 3207, BR, TCG_REG_TMP); -set_jmp_reset_offset(s, a0); -break; - case INDEX_op_goto_ptr: tcg_out_insn(s, 3207, BR, a0); break; @@ -2305,6 +2306,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ +case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 3a545f3707..0afc286c8e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1933,6 +1933,31 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) tcg_out_epilogue(s); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ +/* Indirect jump method */ +intptr_t ptr, dif, dil; +TCGReg base = TCG_REG_PC; + +qemu_build_assert(!TCG_TARGET_HAS_direct_jump); +ptr = get_jmp_target_addr(s, which); +dif = tcg_pcrel_diff(s, (void *)ptr) - 8; +dil = sextract32(dif, 0, 12); +if (dif != dil) { +/* + * The TB is close, but outside the 12 bits addressable by + * the load. We can extend this to 20 bits with a sub of a + * shifted
Re: [RFC PATCH for 8.0 10/13] virtio-net: Migrate vhost inflight descriptors
On Tue, Dec 6, 2022 at 1:05 AM Eugenio Pérez wrote: > > There is currently no data to be migrated, since nothing populates or > read the fields on virtio-net. > > The migration of in-flight descriptors is modelled after the migration > of requests in virtio-blk. With some differences: > * virtio-blk migrates queue number on each request. Here we only add a > vq if it has descriptors to migrate, and then we make all descriptors > in an array. > * Use of QTAILQ since it works similar to signal the end of the inflight > descriptors: 1 for more data, 0 if end. But do it for each vq instead > of for each descriptor. > * Usage of VMState macros. > > The fields of descriptors would be way more complicated if we use the > VirtQueueElements directly, since there would be a few levels of > indirections. Using VirtQueueElementOld for the moment, and migrate to > VirtQueueElement for the final patch. > > TODO: Proper migration versioning > TODO: Do not embed vhost-vdpa structs > TODO: Migrate the VirtQueueElement, not VirtQueueElementOld. > > Signed-off-by: Eugenio Pérez > --- > include/hw/virtio/virtio-net.h | 2 + > include/migration/vmstate.h| 11 +++ > hw/net/virtio-net.c| 129 + > 3 files changed, 142 insertions(+) > > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h > index ef234ffe7e..ae7c017ef0 100644 > --- a/include/hw/virtio/virtio-net.h > +++ b/include/hw/virtio/virtio-net.h > @@ -151,9 +151,11 @@ typedef struct VirtIONetQueue { > QEMUTimer *tx_timer; > QEMUBH *tx_bh; > uint32_t tx_waiting; > +uint32_t tx_inflight_num, rx_inflight_num; > struct { > VirtQueueElement *elem; > } async_tx; > +VirtQueueElement **tx_inflight, **rx_inflight; > struct VirtIONet *n; > } VirtIONetQueue; > > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h > index 9726d2d09e..9e0dfef9ee 100644 > --- a/include/migration/vmstate.h > +++ b/include/migration/vmstate.h > @@ -626,6 +626,17 @@ extern const VMStateInfo vmstate_info_qlist; > .offset = vmstate_offset_varray(_state, _field, _type), \ > } > > +#define VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(_field, _state, _field_num, > \ > + _version, _vmsd, _type) { > \ > +.name = (stringify(_field)), > \ > +.version_id = (_version), > \ > +.vmsd = &(_vmsd), > \ > +.num_offset = vmstate_offset_value(_state, _field_num, uint16_t), > \ > +.size = sizeof(_type), > \ > +.flags = VMS_STRUCT | VMS_VARRAY_UINT16 | VMS_ALLOC | VMS_POINTER, > \ > +.offset = vmstate_offset_pointer(_state, _field, _type), > \ > +} > + > #define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, > _vmsd, _type) {\ > .name = (stringify(_field)), \ > .version_id = (_version),\ > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index aba12759d5..ffd7bf1fc7 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -3077,6 +3077,13 @@ static bool mac_table_doesnt_fit(void *opaque, int > version_id) > return !mac_table_fits(opaque, version_id); > } > > +typedef struct VirtIONetInflightQueue { > +uint16_t idx; > +uint16_t num; > +QTAILQ_ENTRY(VirtIONetInflightQueue) entry; > +VirtQueueElementOld *elems; > +} VirtIONetInflightQueue; > + > /* This temporary type is shared by all the WITH_TMP methods > * although only some fields are used by each. > */ > @@ -3086,6 +3093,7 @@ struct VirtIONetMigTmp { > uint16_tcurr_queue_pairs_1; > uint8_t has_ufo; > uint32_thas_vnet_hdr; > +QTAILQ_HEAD(, VirtIONetInflightQueue) queues_inflight; > }; > > /* The 2nd and subsequent tx_waiting flags are loaded later than > @@ -3231,6 +3239,124 @@ static const VMStateDescription > vmstate_virtio_net_rss = { > }, > }; > > +static const VMStateDescription vmstate_virtio_net_inflight_queue = { > +.name = "virtio-net-device/inflight/queue", > +.fields = (VMStateField[]) { > +VMSTATE_UINT16(idx, VirtIONetInflightQueue), > +VMSTATE_UINT16(num, VirtIONetInflightQueue), > + > +VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(elems, VirtIONetInflightQueue, > num, > + 0, vmstate_virtqueue_element_old, > + VirtQueueElementOld), > +VMSTATE_END_OF_LIST() > +}, > +}; A dumb question, any reason we need bother with virtio-net? It looks to me it's not a must and would complicate migration compatibility. I guess virtio-blk is the better place. Thanks > + >
Re: [PATCH 3/3] intel-iommu: build iova tree during IOMMU translation
On Tue, Dec 6, 2022 at 7:19 AM Peter Xu wrote: > > Jason, > > On Mon, Dec 05, 2022 at 12:12:04PM +0800, Jason Wang wrote: > > I'm fine to go without iova-tree. Would you mind to post patches for > > fix? I can test and include it in this series then. > > One sample patch attached, only compile tested. I don't see any direct connection between the attached patch and the intel-iommu? > > I can also work on this but I'll be slow in making progress, so I'll add it > into my todo. If you can help to fix this issue it'll be more than great. Ok, let me try but it might take some time :) > No worry on the ownership or authorship of the patch if you agree on the > change and moving forward with this when modifying - just take it over! Ok. Thanks > > Thanks! > > -- > Peter Xu
Re: [RFC PATCH 12/21] i386/xen: set shared_info page
On Mon, 2022-12-05 at 23:17 +0100, Philippe Mathieu-Daudé wrote: > On 5/12/22 18:31, David Woodhouse wrote: > > From: Joao Martins > > > > This is done by implementing HYPERVISOR_memory_op specifically > > XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While > > Xen removes the page with its own, we instead use the gfn passed > > by the guest. > > > > Signed-off-by: Joao Martins > > Signed-off-by: David Woodhouse > > --- > > accel/kvm/kvm-all.c | 6 > > include/hw/core/cpu.h| 2 ++ > > include/sysemu/kvm.h | 2 ++ > > include/sysemu/kvm_int.h | 3 ++ > > target/i386/cpu.h| 8 ++ > > target/i386/trace-events | 1 + > > target/i386/xen-proto.h | 19 + > > target/i386/xen.c| 61 > > 8 files changed, 102 insertions(+) > > create mode 100644 target/i386/xen-proto.h > > > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > > index 8830546121..e57b693528 100644 > > --- a/include/hw/core/cpu.h > > +++ b/include/hw/core/cpu.h > > @@ -443,6 +443,8 @@ struct CPUState { > > > > /* track IOMMUs whose translations we've cached in the TCG TLB */ > > GArray *iommu_notifiers; > > + > > +struct XenState *xen_state; > > Since you define a type definition below, use it. Ack. More importantly though, some of that state needs to be persisted across live migration / live update. There is per-vCPU state (the GPAs for vcpu_info etc., upcall vector, timer info). I think I see how I could add that to the vmstate_x86_cpu defined in target/i386/machine.c. For the machine-wide state, where do I add that? Should I just instantiate a dummy device (a bit like TYPE_KVM_CLOCK, AFAICT) to hang that state off? smime.p7s Description: S/MIME cryptographic signature
Re: [RFC PATCH 18/21] kvm/ioapic: mark gsi-2 used in ioapic routing init
On Mon, 2022-12-05 at 23:25 +0100, Philippe Mathieu-Daudé wrote: > On 5/12/22 18:31, David Woodhouse wrote: > > From: Ankur Arora < > > ankur.a.ar...@oracle.com > > > > > > > GSI-2/IOAPIC pin-2 is treated specially while initing > > IRQ routing: PIC does not use it at all while the IOAPIC > > maps virq=0 to pin-2 and does not use GSI-2. > > (all other GSIs are identity mapped to pins.) > > > > This results in any later code which allocates a virq > > to be assigned GSI-2. This virq is in-turn used to > > remap interrupts to HYPERVISOR_CALLBACK_VECTOR (0xf3) > > to deliver to the guest. > > > > Ordinarily this would be okay, but if the event delivery is > > via direct injection via KVM_REQ_EVENT (without going > > through the LAPIC) we see vmentry failure. > > > > This works fine for any other values of GSI. > > > > As a workaround, mark GSI-2 used. > > > > Signed-off-by: Ankur Arora > > Signed-off-by: David Woodhouse > > --- > > accel/kvm/kvm-all.c | 5 + > > hw/i386/kvm/ioapic.c | 1 + > > include/sysemu/kvm.h | 1 + > > 3 files changed, 7 insertions(+) > > > > diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c > > index ee7c8ef68b..5fab0d35c9 100644 > > --- a/hw/i386/kvm/ioapic.c > > +++ b/hw/i386/kvm/ioapic.c > > @@ -43,6 +43,7 @@ void kvm_pc_setup_irq_routing(bool pci_enabled) > > } > > } > > } > > Workarounds usually deserve some comment in the code. Yes, good point. Although I actually think I can kill this off completely since we no longer attempt the deliver the vector directly with KVM_REQ_EVENT anyway; the kernel injects it *for* us when it sees that vcpu_info->evtchn_upcall_pending is set on entry to the guest vCPU. > > +kvm_irqchip_set_gsi(s, 2); > > kvm_irqchip_commit_routes(s); > > } smime.p7s Description: S/MIME cryptographic signature
Re: [RFC PATCH 13/21] i386/xen: implement HYPERVISOR_hvm_op
On Mon, 2022-12-05 at 23:13 +0100, Philippe Mathieu-Daudé wrote: > > +static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, > > +int cmd, uint64_t arg) > > +{ > > +switch (cmd) { > > +case HVMOP_pagetable_dying: { > > +exit->u.hcall.result = -ENOSYS; > > +return 0; > > +} > > +} > > Could it be helpful to have a trace event here, or log a GUEST_ERROR? > > > +exit->u.hcall.result = -ENOSYS; > > +return HCALL_ERR; We already have a trace event for hypercalls. So the unimplemented ones look something like this (e.g. failing to set FIFO event channels, then failing to get the CONSOLE_EVTCHN HVM param)... [0.151084] NR_IRQS: 524544, nr_irqs: 256, preallocated irqs: 16 kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 32 a0 0xb a1 0xbda03da0 a2 0x93cc3ec2e9a0 ret 0xffda [0.152018] xen:events: Using 2-level ABI kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 34 a0 0x0 a1 0xbda03dd8 a2 0x7ff0 ret 0x0 [0.152731] xen:events: Xen HVM callback vector for event delivery is enabled [0.154158] rcu: srcu_init: Setting srcu_struct sizes based on contention. [0.170239] Console: colour VGA+ 80x25 kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 34 a0 0x1 a1 0xbda03e60 a2 0x7ff0 ret 0xffda [0.170966] Cannot get hvm parameter CONSOLE_EVTCHN (18): -38! (I just fixed a PRIu64 to PRIx64 in the trace event definition) smime.p7s Description: S/MIME cryptographic signature
Re: [RFC PATCH 10/21] i386/xen: handle guest hypercalls
On Mon, 2022-12-05 at 23:11 +0100, Philippe Mathieu-Daudé wrote: > On 5/12/22 18:31, David Woodhouse wrote: > > From: Joao Martins > > > > This means handling the new exit reason for Xen but still > > crashing on purpose. As we implement each of the hypercalls > > we will then return the right return code. > > > > Signed-off-by: Joao Martins > > [dwmw2: Add CPL to hypercall tracing, disallow hypercalls from CPL > 0] > > Signed-off-by: David Woodhouse > > --- > > target/i386/kvm/kvm.c| 5 + > > target/i386/trace-events | 3 +++ > > target/i386/xen.c| 45 > > target/i386/xen.h| 1 + > > 4 files changed, 54 insertions(+) > > > > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > > index 4b21d03250..6396d11f1e 100644 > > --- a/target/i386/kvm/kvm.c > > +++ b/target/i386/kvm/kvm.c > > @@ -5468,6 +5468,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct > > kvm_run *run) > > assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); > > ret = kvm_handle_wrmsr(cpu, run); > > break; > > +#ifdef CONFIG_XEN > > CONFIG_XEN is set when the _host_ has Xen development files available. > > IIUC here you want to check if Xen HVM guest support is enabled. > > You might want to use a different CONFIG_XEN_xxx key, which itself > depends on CONFIG_XEN. Yeah, I'd be interested in opinions on that one. Strictly, the only one that *needs* to be a configure option is CONFIG_XEN for the Xen libraries, which is support for actually running on Xen. Any time KVM is present, we *could* pull in the rest of the xenfv machine support unconditionally, since that's no longer dependent on true Xen. But because there's a non-trivial amount of code in the event channel and grant table stuff, *perhaps* we want to make it optional? I don't really want to call that CONFIG_KVM_XEN since as noted, it's theoretically possible to do it with TCG or other accelerators too. So we could call it CONFIG_XEN_EMULATION. I don't think we'd make that depend on CONFIG_XEN though, since none of the actual Xen libraries would be needed once everything's implemented and cleaned up. So things like the xenfv machine code would then depend on (CONFIG_XEN || CONFIG_XEN_EMULATION)... or we could make a new automatic config symbol CONFIG_XEN_MACHINE which has the same effect? Happy to do it however seems best. smime.p7s Description: S/MIME cryptographic signature
Re: [RFC PATCH 09/21] pc_piix: allow xenfv machine with XEN_EMULATE
On Mon, 2022-12-05 at 23:06 +0100, Philippe Mathieu-Daudé wrote: > On 5/12/22 18:31, David Woodhouse wrote: > > From: Joao Martins > > > > This allows -machine xenfv to work with Xen emulated guests. > > > > Signed-off-by: Joao Martins > > Signed-off-by: David Woodhouse > > --- > > hw/i386/pc_piix.c | 4 ++-- > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > index 3dcac2f4b6..d1127adde0 100644 > > --- a/hw/i386/pc_piix.c > > +++ b/hw/i386/pc_piix.c > > @@ -404,8 +404,8 @@ static void pc_xen_hvm_init(MachineState *machine) > > { > > PCMachineState *pcms = PC_MACHINE(machine); > > > > -if (!xen_enabled()) { > > -error_report("xenfv machine requires the xen accelerator"); > > +if (!xen_enabled() && (xen_mode != XEN_EMULATE)) { > > +error_report("xenfv machine requires the xen or kvm accelerator"); > > exit(1); > > } > > What about the XEN_EMULATE case? Shouldn't this be: > >if (!xen_enabled()) { > if (xen_mode == XEN_EMULATE) { > error_report("xenfv machine requires the xen accelerator"); > } else { > error_report("xenfv machine requires the xen or kvm accelerator"); > } > exit(1); >} > > ? Erm... that one I cherry-picked directly from the original and I confess I haven't yet done much thinking about it. There are two sane cases. If xen_mode == XEN_ATTACH, then xen_enabled() should be true. If xen_mode == XEN_EMULATED, then we'd better be using KVM with the Xen support (which could *theoretically* be added to TCG if anyone really wanted to). So this check is working because it's allowing *either* xen_enabled() *or* xen_mode==XEN_ATTACH to satisfy it. But it's too lax. I think it should *require* KVM in the case of XEN_EMULATE. That ought to be sufficient since it's going to set the xen-version machine property, and that would cause KVM itself to bail out if the required support isn't present in the kernel. (I'm assuming the existing XEN_EMULATE mode is long dead along with Xenner? Even on true Xen we run PV guests in a shim these days, and that shim works without modification under KVM and will eventually be one of my test cases as I get this all working under qemu) smime.p7s Description: S/MIME cryptographic signature
Re: [RFC PATCH 03/21] i386/kvm: handle Xen HVM cpuid leaves
On Mon, 2022-12-05 at 22:58 +0100, Philippe Mathieu-Daudé wrote: > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index 22b681ca37..45aa9e40a5 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -7069,6 +7069,8 @@ static Property x86_cpu_properties[] = { > >* own cache information (see x86_cpu_load_def()). > >*/ > > DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), > > +DEFINE_PROP_BOOL("xen", X86CPU, xen, false), > > Maybe name it 'xen-hvm'? I think I'd prefer it to go away completely. If the *machine* has the Xen feature enabled (which I've made implicit in the 'xen-version' property), perhaps we should *always* disable 'expose_kvm' and enable the Xen CPUID leaves instead? > > +DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), > > What happens if we use -cpu host,-kvm,+xen,-xen-vapic ? That's sane; it does the Xen CPUID thing but doesn't advertise the vAPIC feature in the Xen CPUID leaves. > Is -cpu host,-kvm,-xen,+xen-vapic meaningful? Otherwise we need to error > out (eventually displaying some hint). Indeed it isn't meaningful, and should cause an error. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] intel-iommu: Document iova_tree
On Mon, Dec 05, 2022 at 12:23:20PM +0800, Jason Wang wrote: > On Fri, Dec 2, 2022 at 12:25 AM Peter Xu wrote: > > > > It seems not super clear on when iova_tree is used, and why. Add a rich > > comment above iova_tree to track why we needed the iova_tree, and when we > > need it. > > > > Suggested-by: Jason Wang > > Signed-off-by: Peter Xu > > --- > > include/hw/i386/intel_iommu.h | 30 +- > > 1 file changed, 29 insertions(+), 1 deletion(-) > > > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > > index 46d973e629..8d130ab2e3 100644 > > --- a/include/hw/i386/intel_iommu.h > > +++ b/include/hw/i386/intel_iommu.h > > @@ -109,7 +109,35 @@ struct VTDAddressSpace { > > QLIST_ENTRY(VTDAddressSpace) next; > > /* Superset of notifier flags that this address space has */ > > IOMMUNotifierFlag notifier_flags; > > -IOVATree *iova_tree; /* Traces mapped IOVA ranges */ > > +/* > > + * @iova_tree traces mapped IOVA ranges. > > + * > > + * The tree is not needed if no MAP notifiers is registered with > > + * current VTD address space, because all UNMAP (including iotlb or > > + * dev-iotlb) events can be transparently delivered to !MAP iommu > > + * notifiers. > > So this means the UNMAP notifier doesn't need to be as accurate as > MAP. (Should we document it in the notifier headers)? Yes. > > For MAP[a, b] MAP[b, c] we can do a UNMAP[a. c]. IIUC a better way to say this is, for MAP[a, b] we can do an UNMAP[a-X, b+Y] as long as the range covers [a, b]? > > > + * > > + * The tree OTOH is required for MAP typed iommu notifiers for a few > > + * reasons. > > + * > > + * Firstly, there's no way to identify whether an PSI event is MAP or > > + * UNMAP within the PSI message itself. Without having prior knowledge > > + * of existing state vIOMMU doesn't know whether it should notify MAP > > + * or UNMAP for a PSI message it received. > > + * > > + * Secondly, PSI received from guest driver (or even a large PSI can > > + * grow into a DSI at least with Linux intel-iommu driver) can be > > + * larger in range than the newly mapped ranges for either MAP or UNMAP > > + * events. > > Yes, so I think we need a document that the UNMAP handler should be > prepared for this. How about I squash below into this same patch? diff --git a/include/exec/memory.h b/include/exec/memory.h index 91f8a2395a..c83bd11a68 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -129,6 +129,24 @@ struct IOMMUTLBEntry { /* * Bitmap for different IOMMUNotifier capabilities. Each notifier can * register with one or multiple IOMMU Notifier capability bit(s). + * + * Normally there're two use cases for the notifiers: + * + * (1) When the device needs accurate synchronizations of the vIOMMU page + * tables, it needs to register with both MAP|UNMAP notifies (which + * is defined as IOMMU_NOTIFIER_IOTLB_EVENTS below). As long as MAP + * events are registered, the notifications will be accurate but + * there's overhead on synchronizing the guest vIOMMU page tables. + * + * (2) When the device doesn't need accurate synchronizations of the + * vIOMMU page tables (when the device can both cache translations + * and requesting to translate dynamically during DMA process), it + * needs to register only with UNMAP or DEVIOTLB_UNMAP notifies. + * Note that in such working mode shadow page table is not used for + * vIOMMU unit on this address space, so the UNMAP messages can be + * actually larger than the real invalidations (just like how the + * Linux IOMMU driver normally works, where an invalidation can be + * enlarged as long as it still covers the target range). */ typedef enum { IOMMU_NOTIFIER_NONE = 0, Thanks, -- Peter Xu
Re: [PATCH 3/3] intel-iommu: build iova tree during IOMMU translation
Jason, On Mon, Dec 05, 2022 at 12:12:04PM +0800, Jason Wang wrote: > I'm fine to go without iova-tree. Would you mind to post patches for > fix? I can test and include it in this series then. One sample patch attached, only compile tested. I can also work on this but I'll be slow in making progress, so I'll add it into my todo. If you can help to fix this issue it'll be more than great. No worry on the ownership or authorship of the patch if you agree on the change and moving forward with this when modifying - just take it over! Thanks! -- Peter Xu >From 57e5cab805c94d56f801a7e21098389a77584e34 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 5 Dec 2022 11:14:02 -0500 Subject: [PATCH] memory: sanity check flatview deref on mr transactions Content-type: text/plain Signed-off-by: Peter Xu --- include/exec/memory.h | 9 + softmmu/memory.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 91f8a2395a..e136ab9558 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1069,8 +1069,17 @@ struct FlatView { MemoryRegion *root; }; +extern unsigned memory_region_transaction_depth; + static inline FlatView *address_space_to_flatview(AddressSpace *as) { +/* + * Before using any flatview, sanity check we're not during a memory + * region transaction or the map can be invalid. Note that this can + * also be called during commit phase of memory transaction, but that + * should also only happen when the depth decreases to 0 first. + */ +assert(memory_region_transaction_depth == 0); return qatomic_rcu_read(>current_map); } diff --git a/softmmu/memory.c b/softmmu/memory.c index bc0be3f62c..7cfcf5dffe 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -37,7 +37,7 @@ //#define DEBUG_UNASSIGNED -static unsigned memory_region_transaction_depth; +unsigned memory_region_transaction_depth; static bool memory_region_update_pending; static bool ioeventfd_update_pending; unsigned int global_dirty_tracking; -- 2.37.3
Re: [PATCH] target/riscv: Fix mret exception cause when no pmp rule is configured
On Mon, 2022-12-05 at 14:53 +0800, Bin Meng wrote: > The priv spec v1.12 says: > > If no PMP entry matches an M-mode access, the access succeeds. If > no PMP entry matches an S-mode or U-mode access, but at least one > PMP entry is implemented, the access fails. Failed accesses > generate > an instruction, load, or store access-fault exception. > > At present the exception cause is set to 'illegal instruction' but > should have been 'instruction access fault'. > > Fixes: d102f19a2085 ("target/riscv/pmp: Raise exception if no PMP > entry is configured") > Signed-off-by: Bin Meng > --- > > target/riscv/op_helper.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Reviewed-by: Wilfred Mallawa > > diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c > index 09f1f5185d..d7af7f056b 100644 > --- a/target/riscv/op_helper.c > +++ b/target/riscv/op_helper.c > @@ -202,7 +202,7 @@ target_ulong helper_mret(CPURISCVState *env) > > if (riscv_feature(env, RISCV_FEATURE_PMP) && > !pmp_get_num_rules(env) && (prev_priv != PRV_M)) { > - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, > GETPC()); > } > > target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV);
Re: [PATCH v3 3/3] hw/{misc, riscv}: pfsoc: add system controller as unimplemented
On Fri, Nov 18, 2022 at 8:57 AM Conor Dooley wrote: > > From: Conor Dooley > > The system controller on PolarFire SoC is access via a mailbox. The > control registers for this mailbox lie in the "IOSCB" region & the > interrupt is cleared via write to the "SYSREG" region. It also has a > QSPI controller, usually connected to a flash chip, that is used for > storing FPGA bitstreams and used for In-Application Programming (IAP). > > Linux has an implementation of the system controller, through which the > hwrng is accessed, leading to load/store access faults. > > Add the QSPI as unimplemented and a very basic (effectively > unimplemented) version of the system controller's mailbox. Rather than > purely marking the regions as unimplemented, service the mailbox > requests by reporting failures and raising the interrupt so a guest can > better handle the lack of support. > > Signed-off-by: Conor Dooley Acked-by: Alistair Francis Alistair > --- > hw/misc/mchp_pfsoc_ioscb.c | 72 - > hw/misc/mchp_pfsoc_sysreg.c | 18 ++-- > hw/riscv/microchip_pfsoc.c | 6 +++ > include/hw/misc/mchp_pfsoc_ioscb.h | 3 ++ > include/hw/misc/mchp_pfsoc_sysreg.h | 1 + > include/hw/riscv/microchip_pfsoc.h | 1 + > 6 files changed, 95 insertions(+), 6 deletions(-) > > diff --git a/hw/misc/mchp_pfsoc_ioscb.c b/hw/misc/mchp_pfsoc_ioscb.c > index f976e42f72..a71d134295 100644 > --- a/hw/misc/mchp_pfsoc_ioscb.c > +++ b/hw/misc/mchp_pfsoc_ioscb.c > @@ -24,6 +24,7 @@ > #include "qemu/bitops.h" > #include "qemu/log.h" > #include "qapi/error.h" > +#include "hw/irq.h" > #include "hw/sysbus.h" > #include "hw/misc/mchp_pfsoc_ioscb.h" > > @@ -34,6 +35,9 @@ > #define IOSCB_WHOLE_REG_SIZE0x1000 > #define IOSCB_SUBMOD_REG_SIZE 0x1000 > #define IOSCB_CCC_REG_SIZE 0x200 > +#define IOSCB_CTRL_REG_SIZE 0x800 > +#define IOSCB_QSPIXIP_REG_SIZE 0x200 > + > > /* > * There are many sub-modules in the IOSCB module. > @@ -45,6 +49,8 @@ > #define IOSCB_LANE01_BASE 0x0650 > #define IOSCB_LANE23_BASE 0x0651 > #define IOSCB_CTRL_BASE 0x0702 > +#define IOSCB_QSPIXIP_BASE 0x07020100 > +#define IOSCB_MAILBOX_BASE 0x07020800 > #define IOSCB_CFG_BASE 0x0708 > #define IOSCB_CCC_BASE 0x0800 > #define IOSCB_PLL_MSS_BASE 0x0E001000 > @@ -143,6 +149,58 @@ static const MemoryRegionOps mchp_pfsoc_io_calib_ddr_ops > = { > .endianness = DEVICE_LITTLE_ENDIAN, > }; > > +#define SERVICES_CR 0x50 > +#define SERVICES_SR 0x54 > +#define SERVICES_STATUS_SHIFT 16 > + > +static uint64_t mchp_pfsoc_ctrl_read(void *opaque, hwaddr offset, > + unsigned size) > +{ > +uint32_t val = 0; > + > +switch (offset) { > +case SERVICES_SR: > +/* > + * Although some services have no error codes, most do. All services > + * that do implement errors, begin their error codes at 1. Treat all > + * service requests as failures & return 1. > + * See the "PolarFire® FPGA and PolarFire SoC FPGA System Services" > + * user guide for more information on service error codes. > + */ > +val = 1u << SERVICES_STATUS_SHIFT; > +break; > +default: > +qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " > + "(size %d, offset 0x%" HWADDR_PRIx ")\n", > + __func__, size, offset); > +} > + > +return val; > +} > + > +static void mchp_pfsoc_ctrl_write(void *opaque, hwaddr offset, > + uint64_t value, unsigned size) > +{ > +MchpPfSoCIoscbState *s = opaque; > + > +switch (offset) { > +case SERVICES_CR: > +qemu_irq_raise(s->irq); > +break; > +default: > +qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " > + "(size %d, value 0x%" PRIx64 > + ", offset 0x%" HWADDR_PRIx ")\n", > + __func__, size, value, offset); > +} > +} > + > +static const MemoryRegionOps mchp_pfsoc_ctrl_ops = { > +.read = mchp_pfsoc_ctrl_read, > +.write = mchp_pfsoc_ctrl_write, > +.endianness = DEVICE_LITTLE_ENDIAN, > +}; > + > static void mchp_pfsoc_ioscb_realize(DeviceState *dev, Error **errp) > { > MchpPfSoCIoscbState *s = MCHP_PFSOC_IOSCB(dev); > @@ -162,10 +220,18 @@ static void mchp_pfsoc_ioscb_realize(DeviceState *dev, > Error **errp) >"mchp.pfsoc.ioscb.lane23", IOSCB_SUBMOD_REG_SIZE); > memory_region_add_subregion(>container, IOSCB_LANE23_BASE, > >lane23); > > -memory_region_init_io(>ctrl, OBJECT(s), _pfsoc_dummy_ops, s, > - "mchp.pfsoc.ioscb.ctrl", IOSCB_SUBMOD_REG_SIZE); > +memory_region_init_io(>ctrl, OBJECT(s), _pfsoc_ctrl_ops, s, > +
Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed
On Fri, Dec 02, 2022 at 02:13:45PM +0800, Chao Peng wrote: > A large page with mixed private/shared subpages can't be mapped as large > page since its sub private/shared pages are from different memory > backends and may also treated by architecture differently. When > private/shared memory are mixed in a large page, the current lpage_info > is not sufficient to decide whether the page can be mapped as large page > or not and additional private/shared mixed information is needed. > > Tracking this 'mixed' information with the current 'count' like > disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage' > to indicate a large page has mixed private/share subpages and update > this 'mixed' bit whenever the memory attribute is changed between > private and shared. > > Signed-off-by: Chao Peng > --- > arch/x86/include/asm/kvm_host.h | 8 ++ > arch/x86/kvm/mmu/mmu.c | 134 +++- > arch/x86/kvm/x86.c | 2 + > include/linux/kvm_host.h| 19 + > virt/kvm/kvm_main.c | 9 ++- > 5 files changed, 169 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 283cbb83d6ae..7772ab37ac89 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -38,6 +38,7 @@ > #include > > #define __KVM_HAVE_ARCH_VCPU_DEBUGFS > +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES > > #define KVM_MAX_VCPUS 1024 > > @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch { > #endif > }; > > +/* > + * Use a bit in disallow_lpage to indicate private/shared pages mixed at the > + * level. The remaining bits are used as a reference count. > + */ > +#define KVM_LPAGE_PRIVATE_SHARED_MIXED (1U << 31) > +#define KVM_LPAGE_COUNT_MAX ((1U << 31) - 1) > + > struct kvm_lpage_info { > int disallow_lpage; > }; > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e2c70b5afa3e..2190fd8c95c0 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const > struct kvm_memory_slot *slot, > { > struct kvm_lpage_info *linfo; > int i; > + int disallow_count; > > for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { > linfo = lpage_info_slot(gfn, slot, i); > + > + disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX; > + WARN_ON(disallow_count + count < 0 || > + disallow_count > KVM_LPAGE_COUNT_MAX - count); > + > linfo->disallow_lpage += count; > - WARN_ON(linfo->disallow_lpage < 0); > } > } > > @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm) > if (kvm->arch.nx_huge_page_recovery_thread) > kthread_stop(kvm->arch.nx_huge_page_recovery_thread); > } > + > +static bool linfo_is_mixed(struct kvm_lpage_info *linfo) > +{ > + return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED; > +} > + > +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot, > + int level, bool mixed) > +{ > + struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level); > + > + if (mixed) > + linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED; > + else > + linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED; > +} > + > +static bool is_expected_attr_entry(void *entry, unsigned long expected_attrs) > +{ > + bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE; > + > + if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) { > + if (!expect_private) > + return false; > + } else if (expect_private) > + return false; > + > + return true; > +} > + > +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs, > +gfn_t start, gfn_t end) > +{ > + XA_STATE(xas, >mem_attr_array, start); > + gfn_t gfn = start; > + void *entry; > + bool mixed = false; > + > + rcu_read_lock(); > + entry = xas_load(); > + while (gfn < end) { > + if (xas_retry(, entry)) > + continue; > + > + KVM_BUG_ON(gfn != xas.xa_index, kvm); > + > + if (!is_expected_attr_entry(entry, attrs)) { > + mixed = true; > + break; > + } > + > + entry = xas_next(); > + gfn++; > + } > + > + rcu_read_unlock(); > + return mixed; > +} > + > +static bool mem_attrs_mixed(struct kvm *kvm, struct kvm_memory_slot *slot, > + int level, unsigned long attrs, > + gfn_t start, gfn_t end) > +{ > + unsigned long gfn; > + > + if (level == PG_LEVEL_2M) > + return mem_attrs_mixed_2m(kvm, attrs, start, end); > + > + for (gfn =
Re: [RFC PATCH 18/21] kvm/ioapic: mark gsi-2 used in ioapic routing init
On 5/12/22 18:31, David Woodhouse wrote: From: Ankur Arora GSI-2/IOAPIC pin-2 is treated specially while initing IRQ routing: PIC does not use it at all while the IOAPIC maps virq=0 to pin-2 and does not use GSI-2. (all other GSIs are identity mapped to pins.) This results in any later code which allocates a virq to be assigned GSI-2. This virq is in-turn used to remap interrupts to HYPERVISOR_CALLBACK_VECTOR (0xf3) to deliver to the guest. Ordinarily this would be okay, but if the event delivery is via direct injection via KVM_REQ_EVENT (without going through the LAPIC) we see vmentry failure. This works fine for any other values of GSI. As a workaround, mark GSI-2 used. Signed-off-by: Ankur Arora Signed-off-by: David Woodhouse --- accel/kvm/kvm-all.c | 5 + hw/i386/kvm/ioapic.c | 1 + include/sysemu/kvm.h | 1 + 3 files changed, 7 insertions(+) diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c index ee7c8ef68b..5fab0d35c9 100644 --- a/hw/i386/kvm/ioapic.c +++ b/hw/i386/kvm/ioapic.c @@ -43,6 +43,7 @@ void kvm_pc_setup_irq_routing(bool pci_enabled) } } } Workarounds usually deserve some comment in the code. +kvm_irqchip_set_gsi(s, 2); kvm_irqchip_commit_routes(s); }
Re: [RFC PATCH 14/21] i386/xen: implement HYPERVISOR_vcpu_op
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins This is simply when guest tries to register a vcpu_info and since vcpu_info placement is optional in the minimum ABI therefore we can just fail with -ENOSYS Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 19 +++ 1 file changed, 19 insertions(+) +static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, + int cmd, uint64_t arg) +{ +switch (cmd) { +case VCPUOP_register_vcpu_info: { +/* no vcpu info placement for now */ +exit->u.hcall.result = -ENOSYS; +return 0; +} +} Can we log some trace-event or GUEST_ERROR? +exit->u.hcall.result = -ENOSYS; +return HCALL_ERR; +}
Re: [RFC PATCH 12/21] i386/xen: set shared_info page
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins This is done by implementing HYPERVISOR_memory_op specifically XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While Xen removes the page with its own, we instead use the gfn passed by the guest. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- accel/kvm/kvm-all.c | 6 include/hw/core/cpu.h| 2 ++ include/sysemu/kvm.h | 2 ++ include/sysemu/kvm_int.h | 3 ++ target/i386/cpu.h| 8 ++ target/i386/trace-events | 1 + target/i386/xen-proto.h | 19 + target/i386/xen.c| 61 8 files changed, 102 insertions(+) create mode 100644 target/i386/xen-proto.h diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 8830546121..e57b693528 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -443,6 +443,8 @@ struct CPUState { /* track IOMMUs whose translations we've cached in the TCG TLB */ GArray *iommu_notifiers; + +struct XenState *xen_state; Since you define a type definition below, use it. }; typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index e9a97eda8c..8e882fbe96 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -582,4 +582,6 @@ bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +struct XenState *kvm_get_xen_state(KVMState *s); Ditto. #endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 3b4adcdc10..0d89cfe273 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -110,6 +110,9 @@ struct KVMState struct KVMDirtyRingReaper reaper; NotifyVmexitOption notify_vmexit; uint32_t notify_window; + +/* xen guest state */ +struct XenState xen; Ditto. }; void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5ddd14467e..09c0281b8b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -23,6 +23,14 @@ #include "sysemu/tcg.h" #include "cpu-qom.h" #include "kvm/hyperv-proto.h" +#include "xen-proto.h" + +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif How come you don't have access to the definitions from "cpu-param.h" here? Regards, Phil.
Re: [RFC PATCH 13/21] i386/xen: implement HYPERVISOR_hvm_op
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins This is when guest queries for support for HVMOP_pagetable_dying. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index 5d2d8a7e00..38d4cae3d0 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -17,6 +17,7 @@ #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" +#include "standard-headers/xen/hvm/hvm_op.h" #define PAGE_OFFSET0x8000UL #define PAGE_SHIFT 12 @@ -181,6 +182,20 @@ static int kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, return err ? HCALL_ERR : 0; } +static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, +int cmd, uint64_t arg) +{ +switch (cmd) { +case HVMOP_pagetable_dying: { +exit->u.hcall.result = -ENOSYS; +return 0; +} +} Could it be helpful to have a trace event here, or log a GUEST_ERROR? +exit->u.hcall.result = -ENOSYS; +return HCALL_ERR; +}
Re: [RFC PATCH 10/21] i386/xen: handle guest hypercalls
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins This means handling the new exit reason for Xen but still crashing on purpose. As we implement each of the hypercalls we will then return the right return code. Signed-off-by: Joao Martins [dwmw2: Add CPL to hypercall tracing, disallow hypercalls from CPL > 0] Signed-off-by: David Woodhouse --- target/i386/kvm/kvm.c| 5 + target/i386/trace-events | 3 +++ target/i386/xen.c| 45 target/i386/xen.h| 1 + 4 files changed, 54 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 4b21d03250..6396d11f1e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5468,6 +5468,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); ret = kvm_handle_wrmsr(cpu, run); break; +#ifdef CONFIG_XEN CONFIG_XEN is set when the _host_ has Xen development files available. IIUC here you want to check if Xen HVM guest support is enabled. You might want to use a different CONFIG_XEN_xxx key, which itself depends on CONFIG_XEN. +case KVM_EXIT_XEN: +ret = kvm_xen_handle_exit(cpu, >xen); +break; +#endif default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; diff --git a/target/i386/xen.h b/target/i386/xen.h index d4903ecfa1..3537415d31 100644 --- a/target/i386/xen.h +++ b/target/i386/xen.h @@ -23,5 +23,6 @@ #define XEN_VERSION(maj, min) ((maj) << 16 | (min)) int kvm_xen_init(KVMState *s, uint32_t xen_version); +int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit); #endif /* QEMU_I386_XEN_H */
Re: [RFC PATCH 09/21] pc_piix: allow xenfv machine with XEN_EMULATE
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins This allows -machine xenfv to work with Xen emulated guests. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/i386/pc_piix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3dcac2f4b6..d1127adde0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -404,8 +404,8 @@ static void pc_xen_hvm_init(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -if (!xen_enabled()) { -error_report("xenfv machine requires the xen accelerator"); +if (!xen_enabled() && (xen_mode != XEN_EMULATE)) { +error_report("xenfv machine requires the xen or kvm accelerator"); exit(1); } What about the XEN_EMULATE case? Shouldn't this be: if (!xen_enabled()) { if (xen_mode == XEN_EMULATE) { error_report("xenfv machine requires the xen accelerator"); } else { error_report("xenfv machine requires the xen or kvm accelerator"); } exit(1); } ?
Re: [PATCH 15/15] hw/intc: sifive_plic: Fix the pending register range check
On Mon, 2022-12-05 at 16:21 +0800, Bin Meng wrote: > On Fri, Dec 2, 2022 at 8:28 AM Wilfred Mallawa > wrote: > > > > On Thu, 2022-12-01 at 22:08 +0800, Bin Meng wrote: > > > The pending register upper limit is currently set to > > > plic->num_sources >> 3, which is wrong, e.g.: considering > > > plic->num_sources is 7, the upper limit becomes 0 which fails > > > the range check if reading the pending register at pending_base. > > > > > > Fixes: 1e24429e40df ("SiFive RISC-V PLIC Block") > > > Signed-off-by: Bin Meng > > > > > > --- > > > > > > hw/intc/sifive_plic.c | 5 +++-- > > > 1 file changed, 3 insertions(+), 2 deletions(-) > > > > > > diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c > > > index 7a6a358c57..a3fc8222c7 100644 > > > --- a/hw/intc/sifive_plic.c > > > +++ b/hw/intc/sifive_plic.c > > > @@ -143,7 +143,8 @@ static uint64_t sifive_plic_read(void > > > *opaque, > > > hwaddr addr, unsigned size) > > > uint32_t irq = (addr - plic->priority_base) >> 2; > > > > > > return plic->source_priority[irq]; > > > - } else if (addr_between(addr, plic->pending_base, plic- > > > > num_sources >> 3)) { > > > + } else if (addr_between(addr, plic->pending_base, > > > + (plic->num_sources + 31) >> 3)) { > > why does adding specifically 31 work here? > > > > Each pending register is 32-bit for 32 interrupt sources. Adding 31 > is > to round up to next pending register offset. > Ah I see, thanks for that. Regards, Wilfred > Regards, > Bin
Re: [RFC PATCH 08/21] xen_platform: exclude vfio-pci from the PCI platform unplug
On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins Such that PCI passthrough devices work for Xen emulated guests. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/i386/xen/xen_platform.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 710039851a..ae0a21f5bf 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -109,12 +109,25 @@ static void log_writeb(PCIXenPlatformState *s, char val) #define _UNPLUG_NVME_DISKS 3 #define UNPLUG_NVME_DISKS (1u << _UNPLUG_NVME_DISKS) +static bool pci_device_is_passthrough(PCIDevice *d) +{ +if (!strcmp(d->name, "xen-pci-passthrough")) { The 'QOM way' to do this check is: if (object_dynamic_cast(OBJECT(d), TYPE_XEN_PT_DEVICE)) { +return true; +} + +if (xen_mode == XEN_EMULATE && !strcmp(d->name, "vfio-pci")) { if (xen_mode == XEN_EMULATE && object_dynamic_cast(OBJECT(d), TYPE_VFIO_PCI)) { +return true; +} + +return false; +}
Re: [RFC PATCH 03/21] i386/kvm: handle Xen HVM cpuid leaves
Hi David, On 5/12/22 18:31, David Woodhouse wrote: From: Joao Martins Introduce support for emulating CPUID for Xen HVM guests via xen, xen_vapic as changeable params. Signed-off-by: Joao Martins [dwmw2: Obtain xen_version from machine property] Signed-off-by: David Woodhouse --- target/i386/cpu.c | 2 ++ target/i386/cpu.h | 3 ++ target/i386/kvm/kvm.c | 72 +++ target/i386/xen.h | 8 + 4 files changed, 85 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 22b681ca37..45aa9e40a5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7069,6 +7069,8 @@ static Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), +DEFINE_PROP_BOOL("xen", X86CPU, xen, false), Maybe name it 'xen-hvm'? +DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), What happens if we use -cpu host,-kvm,+xen,-xen-vapic ? Is -cpu host,-kvm,-xen,+xen-vapic meaningful? Otherwise we need to error out (eventually displaying some hint). /* * From "Requirements for Implementing the Microsoft diff --git a/target/i386/cpu.h b/target/i386/cpu.h index d4bc19577a..5ddd14467e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1964,6 +1964,9 @@ struct ArchCPU { int32_t thread_id; int32_t hv_max_vps; + +bool xen; +bool xen_vapic; };
Re: [PATCH 4/6] hw/misc: Allwinner AXP-209 Emulation
Hi Philippe, On Sun, Dec 4, 2022 at 10:39 PM Philippe Mathieu-Daudé wrote: > > Hi Strahinja, > > On 4/12/22 00:19, Strahinja Jankovic wrote: > > This patch adds minimal support for AXP-209 PMU. > > Most important is chip ID since U-Boot SPL expects version 0x1. Besides > > the chip ID register, reset values for two more registers used by A10 > > U-Boot SPL are covered. > > > > Signed-off-by: Strahinja Jankovic > > --- > > hw/arm/Kconfig | 1 + > > hw/misc/Kconfig | 4 + > > hw/misc/allwinner-axp-209.c | 263 > > hw/misc/meson.build | 1 + > > 4 files changed, 269 insertions(+) > > create mode 100644 hw/misc/allwinner-axp-209.c > > > > diff --git a/hw/misc/allwinner-axp-209.c b/hw/misc/allwinner-axp-209.c > > new file mode 100644 > > index 00..229e3961b6 > > --- /dev/null > > +++ b/hw/misc/allwinner-axp-209.c > > @@ -0,0 +1,263 @@ > > +/* > > + * AXP-209 Emulation > > + * > > + * Written by Strahinja Jankovic > > + * > > You missed the "Copyright (c) " line. Ok, I will add it. > > > + * Permission is hereby granted, free of charge, to any person obtaining a > > + * copy of this software and associated documentation files (the > > "Software"), > > + * to deal in the Software without restriction, including without > > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be included > > in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS > > OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > > THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > If you mind, please also include: > > * SPDX-License-Identifier: MIT Ok, I will add it. > > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "qemu/log.h" > > +#include "hw/i2c/i2c.h" > > +#include "migration/vmstate.h" > > + > > +#ifndef AXP_209_ERR_DEBUG > > +#define AXP_209_ERR_DEBUG 0 > > +#endif > > + > > +#define TYPE_AXP_209 "allwinner.axp209" > > + > > +#define AXP_209(obj) \ > > +OBJECT_CHECK(AXP209I2CState, (obj), TYPE_AXP_209) > > + > > +#define DB_PRINT(fmt, args...) do { \ > > +if (AXP_209_ERR_DEBUG) { \ > > +fprintf(stderr, "%s: " fmt, __func__, ## args); \ > > Please replace the DB_PRINT() calls by trace events which are more > powerful: when a tracing backend is present, the events are built > in and you can individually enable them at runtime. I will do my best to update this to trace events. Have not used them before, but I will look at other places in code and docs. > > > +} \ > > +} while (0) > > > > +#define AXP_209_CHIP_VERSION_ID (0x01) > > +#define AXP_209_DC_DC2_OUT_V_CTRL_RESET (0x16) > > +#define AXP_209_IRQ_BANK_1_CTRL_RESET (0xd8) > > > > +/* Reset all counters and load ID register */ > > +static void axp_209_reset_enter(Object *obj, ResetType type) > > +{ > > +AXP209I2CState *s = AXP_209(obj); > > + > > +memset(s->regs, 0, NR_REGS); > > +s->ptr = 0; > > +s->count = 0; > > +s->regs[REG_CHIP_VERSION] = AXP_209_CHIP_VERSION_ID; > > +s->regs[REG_DC_DC2_OUT_V_CTRL] = AXP_209_DC_DC2_OUT_V_CTRL_RESET; > > +s->regs[REG_IRQ_BANK_1_CTRL] = AXP_209_IRQ_BANK_1_CTRL_RESET; > > +} > > > > +/* Initialization */ > > +static void axp_209_init(Object *obj) > > +{ > > +AXP209I2CState *s = AXP_209(obj); > > + > > +s->count = 0; > > +s->ptr = 0; > > +memset(s->regs, 0, NR_REGS); > > +s->regs[REG_CHIP_VERSION] = AXP_209_CHIP_VERSION_ID; > > +s->regs[REG_DC_DC2_OUT_V_CTRL] = 0x16; > > +s->regs[REG_IRQ_BANK_1_CTRL] = 0xd8; > > The device initialization flow is: > > - init() > - realize() > - reset() > > So these values are already set in axp_209_reset_enter(). Thanks, that makes perfect sense. I will update .init and .reset functions accordingly in v2 of the patch. > > Besides, you should use the definition you added instead of > magic values (AXP_209_DC_DC2_OUT_V_CTRL_RESET and > AXP_209_IRQ_BANK_1_CTRL_RESET). Yes, that was an oversight. I used the macros in .reset, but I forgot to update them in .init. > > > + > > +DB_PRINT("INIT AXP209\n"); > > + > > +return; > > +} > > Otherwise LGTM! Thanks! Best regards, Strahinja > > Thanks, > > Phil.
RE: [RFC PATCH for 8.0 10/13] virtio-net: Migrate vhost inflight descriptors
> From: Eugenio Pérez > Sent: Monday, December 5, 2022 12:05 PM > > There is currently no data to be migrated, since nothing populates or read > the fields on virtio-net. > > The migration of in-flight descriptors is modelled after the migration of > requests in virtio-blk. With some differences: > * virtio-blk migrates queue number on each request. Here we only add a > vq if it has descriptors to migrate, and then we make all descriptors > in an array. > * Use of QTAILQ since it works similar to signal the end of the inflight > descriptors: 1 for more data, 0 if end. But do it for each vq instead > of for each descriptor. > * Usage of VMState macros. > > The fields of descriptors would be way more complicated if we use the > VirtQueueElements directly, since there would be a few levels of > indirections. Using VirtQueueElementOld for the moment, and migrate to > VirtQueueElement for the final patch. > > TODO: Proper migration versioning > TODO: Do not embed vhost-vdpa structs > TODO: Migrate the VirtQueueElement, not VirtQueueElementOld. > > Signed-off-by: Eugenio Pérez > --- > include/hw/virtio/virtio-net.h | 2 + > include/migration/vmstate.h| 11 +++ > hw/net/virtio-net.c| 129 + > 3 files changed, 142 insertions(+) > > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h > index ef234ffe7e..ae7c017ef0 100644 > --- a/include/hw/virtio/virtio-net.h > +++ b/include/hw/virtio/virtio-net.h > @@ -151,9 +151,11 @@ typedef struct VirtIONetQueue { > QEMUTimer *tx_timer; > QEMUBH *tx_bh; > uint32_t tx_waiting; > +uint32_t tx_inflight_num, rx_inflight_num; > struct { > VirtQueueElement *elem; > } async_tx; > +VirtQueueElement **tx_inflight, **rx_inflight; > struct VirtIONet *n; > } VirtIONetQueue; > > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h > index 9726d2d09e..9e0dfef9ee 100644 > --- a/include/migration/vmstate.h > +++ b/include/migration/vmstate.h > @@ -626,6 +626,17 @@ extern const VMStateInfo vmstate_info_qlist; > .offset = vmstate_offset_varray(_state, _field, _type), \ > } > > +#define VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(_field, _state, > _field_num,\ > + _version, _vmsd, _type) { > \ > +.name = (stringify(_field)), > \ > +.version_id = (_version), > \ > +.vmsd = &(_vmsd), > \ > +.num_offset = vmstate_offset_value(_state, _field_num, uint16_t), > \ > +.size = sizeof(_type), > \ > +.flags = VMS_STRUCT | VMS_VARRAY_UINT16 | VMS_ALLOC | > VMS_POINTER, \ > +.offset = vmstate_offset_pointer(_state, _field, _type), > \ > +} > + > #define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, > _version, _vmsd, _type) {\ > .name = (stringify(_field)), \ > .version_id = (_version),\ > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index > aba12759d5..ffd7bf1fc7 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -3077,6 +3077,13 @@ static bool mac_table_doesnt_fit(void *opaque, > int version_id) > return !mac_table_fits(opaque, version_id); } > > +typedef struct VirtIONetInflightQueue { > +uint16_t idx; > +uint16_t num; > +QTAILQ_ENTRY(VirtIONetInflightQueue) entry; > +VirtQueueElementOld *elems; > +} VirtIONetInflightQueue; > + > /* This temporary type is shared by all the WITH_TMP methods > * although only some fields are used by each. > */ > @@ -3086,6 +3093,7 @@ struct VirtIONetMigTmp { > uint16_tcurr_queue_pairs_1; > uint8_t has_ufo; > uint32_thas_vnet_hdr; > +QTAILQ_HEAD(, VirtIONetInflightQueue) queues_inflight; > }; > > /* The 2nd and subsequent tx_waiting flags are loaded later than @@ - > 3231,6 +3239,124 @@ static const VMStateDescription > vmstate_virtio_net_rss = { > }, > }; > > +static const VMStateDescription vmstate_virtio_net_inflight_queue = { > +.name = "virtio-net-device/inflight/queue", > +.fields = (VMStateField[]) { > +VMSTATE_UINT16(idx, VirtIONetInflightQueue), > +VMSTATE_UINT16(num, VirtIONetInflightQueue), > + > +VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(elems, > VirtIONetInflightQueue, num, > + 0, vmstate_virtqueue_element_old, > + VirtQueueElementOld), > +VMSTATE_END_OF_LIST() > +}, > +}; > + > +static int virtio_net_inflight_init(void *opaque) { > +struct VirtIONetMigTmp *tmp = opaque; > + > +QTAILQ_INIT(>queues_inflight); > +return 0; > +} > + > +static
Re: Sad to see the advent calendar go
Hi Simon, Thanks for your interest in the calendar! I am not a regular QEMU contributor, but I am a longtime user, and ran the calendar in 2020. I also put out a submission request for the calendar this past year, but there wasn't much response. The cost to host the calendar is trivial, we just need to convert some of the sysadmin responsibilities, and that is in progress. As far as future calendars go, the effort to construct a new calendar with novel, thematic content is substantial. Some of the difficulty lies in the technical element, but a lot of it lies in the synthesis and ideation. In 2020 I had a number of community submissions, and good pointers, but I also put in substantial effort, which I was capable of as a result of a holiday sabbatical. This year was a little different. I'm not sure if others in the community have the same perspective, but that is my perspective, and I would be committed to the 2023 calendar if folks were interested. I assembled some resources for the 2022 calendar, but I think with a little more upfront planning, and some directed request, I can coordinate a much better outcome for 2023. Thanks, Eldon
Re: [PATCH v6 14/14] target/arm: Use the max page size in a 2-stage ptw
On 12/5/22 10:50, Peter Maydell wrote: @@ -2639,6 +2640,14 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, return ret; } +/* + * Use the maximum of the S1 & S2 page size, so that invalidation + * of pages > TARGET_PAGE_SIZE works correctly. + */ +if (result->f.lg_page_size < s1_lgpgsz) { +result->f.lg_page_size = s1_lgpgsz; +} + /* Combine the S1 and S2 cache attributes. */ hcr = arm_hcr_el2_eff_secstate(env, is_secure); if (hcr & HCR_DC) { Firstly, what if the lg_page_size is < TARGET_PAGE_SIZE ? I think this can't happen for VMSA, but for PMSA it will when the region (in either S1 or S2) is less than the page size (in which case lg_page_size is 0). Presumably in this case we want to set the result's lg_page_size to also be 0 to preserve the "don't put this in the TLB" effect. Hmm, I hadn't considered that -- probably because I assumed a-profile. You're right that we should preserve the "don't cache the result" behaviour. Secondly, how does this work for VMSA? Suppose that stage 1 is using 4K pages and stage 2 is using 64K pages. We will then claim here that the result lg_page_size is 64K, but the attributes and mapping in the result are only valid for the 4K page that we looked up in stage 1 -- the surrounding 4K pages could have entirely different permissions/mapping. This only works because the middle-end only registers one page, at TARGET_PAGE_SIZE. But we need to record this as a large page, so that a flush of the (64k) stage2 page address affects all of the (4k) stage1 page entries that it covers. Perhaps it would be less confusing in N target/ implementations if we have two lg_page_size structure members, and handle the unioning in the middle-end? Soliciting suggestions for what to name such a beast (considering RME adds a stage3 lookup, and associated page/granule sizes), and how to signal that it is or isn't used (presumably 0, meaning that stageN+1 can't have a "don't record" setting). r~
Re: [PATCH for-8.0] target/s390x/tcg/mem_helper: Test the right bits in psw_key_valid()
On Mon, 2022-12-05 at 15:20 +0100, Thomas Huth wrote: > The PSW key mask is a 16 bit field, and the psw_key variable is > in the range from 0 to 15, so it does not make sense to use > "0x80 >> psw_key" for testing the bits here. We should use 0x8000 > instead. > > Signed-off-by: Thomas Huth Reviewed-by: Nina Schoetterl-Glausch > --- > Found by code inspection (Linux likely does not use these PSW key masks > yet, otherwise we might have noticed earlier) > > target/s390x/tcg/mem_helper.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c > index 9542fad59b..cb82cd1c1d 100644 > --- a/target/s390x/tcg/mem_helper.c > +++ b/target/s390x/tcg/mem_helper.c > @@ -51,7 +51,7 @@ static inline bool psw_key_valid(CPUS390XState *env, > uint8_t psw_key) > > if (env->psw.mask & PSW_MASK_PSTATE) { > /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */ > -return pkm & (0x80 >> psw_key); > +return pkm & (0x8000 >> psw_key); > } > return true; > }
[RFC PATCH 09/21] pc_piix: allow xenfv machine with XEN_EMULATE
From: Joao Martins This allows -machine xenfv to work with Xen emulated guests. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/i386/pc_piix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3dcac2f4b6..d1127adde0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -404,8 +404,8 @@ static void pc_xen_hvm_init(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -if (!xen_enabled()) { -error_report("xenfv machine requires the xen accelerator"); +if (!xen_enabled() && (xen_mode != XEN_EMULATE)) { +error_report("xenfv machine requires the xen or kvm accelerator"); exit(1); } -- 2.35.3
[RFC PATCH 12/21] i386/xen: set shared_info page
From: Joao Martins This is done by implementing HYPERVISOR_memory_op specifically XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While Xen removes the page with its own, we instead use the gfn passed by the guest. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- accel/kvm/kvm-all.c | 6 include/hw/core/cpu.h| 2 ++ include/sysemu/kvm.h | 2 ++ include/sysemu/kvm_int.h | 3 ++ target/i386/cpu.h| 8 ++ target/i386/trace-events | 1 + target/i386/xen-proto.h | 19 + target/i386/xen.c| 61 8 files changed, 102 insertions(+) create mode 100644 target/i386/xen-proto.h diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f99b0becd8..8a227515b7 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -172,6 +172,11 @@ void kvm_resample_fd_notify(int gsi) } } +struct XenState *kvm_get_xen_state(KVMState *s) +{ +return >xen; +} + int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_accel()); @@ -405,6 +410,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) cpu->vcpu_dirty = true; cpu->dirty_pages = 0; cpu->throttle_us_per_full = 0; +cpu->xen_state = >xen; mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 8830546121..e57b693528 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -443,6 +443,8 @@ struct CPUState { /* track IOMMUs whose translations we've cached in the TCG TLB */ GArray *iommu_notifiers; + +struct XenState *xen_state; }; typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index e9a97eda8c..8e882fbe96 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -582,4 +582,6 @@ bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +struct XenState *kvm_get_xen_state(KVMState *s); #endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 3b4adcdc10..0d89cfe273 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -110,6 +110,9 @@ struct KVMState struct KVMDirtyRingReaper reaper; NotifyVmexitOption notify_vmexit; uint32_t notify_window; + +/* xen guest state */ +struct XenState xen; }; void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5ddd14467e..09c0281b8b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -23,6 +23,14 @@ #include "sysemu/tcg.h" #include "cpu-qom.h" #include "kvm/hyperv-proto.h" +#include "xen-proto.h" + +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif + #include "exec/cpu-defs.h" #include "qapi/qapi-types-common.h" #include "qemu/cpu-float.h" diff --git a/target/i386/trace-events b/target/i386/trace-events index 3fb9ee3add..2bf732ee07 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -14,3 +14,4 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data # target/i386/xen.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIu64 +kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64 diff --git a/target/i386/xen-proto.h b/target/i386/xen-proto.h new file mode 100644 index 00..c394909f54 --- /dev/null +++ b/target/i386/xen-proto.h @@ -0,0 +1,19 @@ +/* + * Definitions for Xen guest/hypervisor interaction - x86-specific part + * + * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef TARGET_I386_XEN_PROTO_H +#define TARGET_I386_XEN_PROTO_H + +typedef struct XenState { +struct shared_info *shared_info; +} XenState; + +#endif + diff --git a/target/i386/xen.c b/target/i386/xen.c index ee6f99523d..5d2d8a7e00 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -16,8 +16,10 @@ #include "trace.h" #include "standard-headers/xen/version.h" +#include "standard-headers/xen/memory.h" #define PAGE_OFFSET0x8000UL +#define PAGE_SHIFT 12 /* * Unhandled hypercalls error: @@ -123,6 +125,62 @@ static int kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, return err ? HCALL_ERR : 0; } +static int xen_set_shared_info(CPUState *cs, struct shared_info *shi, + uint64_t gfn) +{ +struct kvm_xen_hvm_attr xhsi; +XenState *xen = cs->xen_state; +KVMState *s = cs->kvm_state; +int err; + +xhsi.type = KVM_XEN_ATTR_TYPE_SHARED_INFO; +
[RFC PATCH 08/21] xen_platform: exclude vfio-pci from the PCI platform unplug
From: Joao Martins Such that PCI passthrough devices work for Xen emulated guests. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/i386/xen/xen_platform.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 710039851a..ae0a21f5bf 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -109,12 +109,25 @@ static void log_writeb(PCIXenPlatformState *s, char val) #define _UNPLUG_NVME_DISKS 3 #define UNPLUG_NVME_DISKS (1u << _UNPLUG_NVME_DISKS) +static bool pci_device_is_passthrough(PCIDevice *d) +{ +if (!strcmp(d->name, "xen-pci-passthrough")) { +return true; +} + +if (xen_mode == XEN_EMULATE && !strcmp(d->name, "vfio-pci")) { +return true; +} + +return false; +} + static void unplug_nic(PCIBus *b, PCIDevice *d, void *o) { /* We have to ignore passthrough devices */ if (pci_get_word(d->config + PCI_CLASS_DEVICE) == PCI_CLASS_NETWORK_ETHERNET -&& strcmp(d->name, "xen-pci-passthrough") != 0) { +&& !pci_device_is_passthrough(d)) { object_unparent(OBJECT(d)); } } @@ -187,9 +200,8 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque) !(flags & UNPLUG_IDE_SCSI_DISKS); /* We have to ignore passthrough devices */ -if (!strcmp(d->name, "xen-pci-passthrough")) { +if (pci_device_is_passthrough(d)) return; -} switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) { case PCI_CLASS_STORAGE_IDE: -- 2.35.3
[RFC PATCH 02/21] i386/xen: Add xen-version machine property and init KVM Xen support
From: David Woodhouse The original Oracle version of this made it a CPU property, but it isn't really a per-CPU thing. I then tried making it a KVM accelerator property but moved to a machine property for two reasons. One is that it allows us to set it in default_machine_opts for the xenfv platform when not running on actual Xen, and also because theoretically we *could* do this with TCG too; we'd just have to implement a bunch of the stuff that KVM already does for us. Signed-off-by: David Woodhouse --- hw/i386/pc.c| 32 +++ hw/i386/pc_piix.c | 10 +++-- include/hw/i386/pc.h| 3 +++ target/i386/kvm/kvm.c | 17 ++ target/i386/meson.build | 1 + target/i386/xen.c | 49 + target/i386/xen.h | 19 7 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 target/i386/xen.c create mode 100644 target/i386/xen.h diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 546b703cb4..9bada1a8ff 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1811,6 +1811,32 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, pcms->max_fw_size = value; } +static void pc_machine_get_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); +uint32_t value = pcms->xen_version; + +visit_type_uint32(v, name, , errp); +} + +static void pc_machine_set_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); +Error *error = NULL; +uint32_t value; + +visit_type_uint32(v, name, , ); +if (error) { +error_propagate(errp, error); +return; +} + +pcms->xen_version = value; +} static void pc_machine_initfn(Object *obj) { @@ -1978,6 +2004,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP, "SMBIOS Entry Point type [32, 64]"); + +object_class_property_add(oc, "xen-version", "uint32", +pc_machine_get_xen_version, pc_machine_set_xen_version, +NULL, NULL); +object_class_property_set_description(oc, "xen-version", +"Xen version to be emulated (in XENVER_version form e.g. 0x4000a for 4.10)"); } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0ad0ed1603..13286d0739 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -876,7 +876,10 @@ static void xenfv_4_2_machine_options(MachineClass *m) pc_i440fx_4_2_machine_options(m); m->desc = "Xen Fully-virtualized PC"; m->max_cpus = HVM_MAX_VCPUS; -m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +if (xen_enabled()) +m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +else +m->default_machine_opts = "accel=kvm,xen-version=0x40002"; } DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init, @@ -888,7 +891,10 @@ static void xenfv_3_1_machine_options(MachineClass *m) m->desc = "Xen Fully-virtualized PC"; m->alias = "xenfv"; m->max_cpus = HVM_MAX_VCPUS; -m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +if (xen_enabled()) +m->default_machine_opts = "accel=xen,suppress-vmdesc=on"; +else +m->default_machine_opts = "accel=kvm,xen-version=0x30001"; } DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c95333514e..9b14b18836 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -52,6 +52,9 @@ typedef struct PCMachineState { bool default_bus_bypass_iommu; uint64_t max_fw_size; +/* Xen HVM emulation */ +uint32_t xen_version; + /* ACPI Memory hotplug IO base address */ hwaddr memhp_io_base; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a213209379..ff3ea245cf 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -31,6 +31,7 @@ #include "sysemu/runstate.h" #include "kvm_i386.h" #include "sev.h" +#include "xen.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -2459,6 +2460,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) { uint64_t identity_base = 0xfffbc000; uint64_t shadow_mem; +uint32_t xen_version; int ret; struct utsname utsname; Error *local_err = NULL; @@ -2513,6 +2515,21 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } +xen_version = object_property_get_int(OBJECT(ms), "xen-version", NULL); +if (xen_version == (uint32_t) -1) +xen_version = 0; +if (xen_version) { +#ifdef CONFIG_XEN +ret = kvm_xen_init(s, xen_version); +
[RFC PATCH 13/21] i386/xen: implement HYPERVISOR_hvm_op
From: Joao Martins This is when guest queries for support for HVMOP_pagetable_dying. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index 5d2d8a7e00..38d4cae3d0 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -17,6 +17,7 @@ #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" +#include "standard-headers/xen/hvm/hvm_op.h" #define PAGE_OFFSET0x8000UL #define PAGE_SHIFT 12 @@ -181,6 +182,20 @@ static int kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, return err ? HCALL_ERR : 0; } +static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, +int cmd, uint64_t arg) +{ +switch (cmd) { +case HVMOP_pagetable_dying: { +exit->u.hcall.result = -ENOSYS; +return 0; +} +} + +exit->u.hcall.result = -ENOSYS; +return HCALL_ERR; +} + static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) { uint16_t code = exit->u.hcall.input; @@ -191,6 +206,9 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) } switch (code) { +case __HYPERVISOR_hvm_op: +return kvm_xen_hcall_hvm_op(exit, exit->u.hcall.params[0], +exit->u.hcall.params[1]); case __HYPERVISOR_memory_op: return kvm_xen_hcall_memory_op(exit, exit->u.hcall.params[0], exit->u.hcall.params[1], cpu); -- 2.35.3
[RFC PATCH 16/21] i386/xen: handle register_vcpu_time_memory_area
From: Joao Martins In order to support Linux vdso in Xen. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 36 +--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/target/i386/xen.c b/target/i386/xen.c index 59aca2ad19..1def526e08 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -43,17 +43,22 @@ static void *gpa_to_hva(uint64_t gpa) mrs.offset_within_region); } -static void *gva_to_hva(CPUState *cs, uint64_t gva) +static uint64_t gva_to_gpa(CPUState *cs, uint64_t gva) { struct kvm_translation t = { .linear_address = gva }; int err; err = kvm_vcpu_ioctl(cs, KVM_TRANSLATE, ); if (err || !t.valid) { -return NULL; +return 0; } -return gpa_to_hva(t.physical_address); +return t.physical_address; +} + +static void *gva_to_hva(CPUState *cs, uint64_t gva) +{ +return gpa_to_hva(gva_to_gpa(cs, gva)); } int kvm_xen_init(KVMState *s, uint32_t xen_version) @@ -244,6 +249,27 @@ static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, return xen_set_vcpu_attr(target, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); } +static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ +struct vcpu_register_time_memory_area *tma; +uint64_t gpa; +void *hva; + +tma = gva_to_hva(cs, arg); +if (!tma) { +return -EFAULT; +} + +hva = gva_to_hva(cs, tma->addr.p); +if (!hva || !tma->addr.p) { +return -EFAULT; +} + +gpa = gva_to_gpa(cs, tma->addr.p); +return xen_set_vcpu_attr(target, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, gpa); +} + static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, int cmd, int vcpu_id, uint64_t arg) { @@ -252,6 +278,10 @@ static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, int err = -ENOSYS; switch (cmd) { +case VCPUOP_register_vcpu_time_memory_area: { +err = vcpuop_register_vcpu_time_info(cs, dest, arg); +break; +} case VCPUOP_register_vcpu_info: { err = vcpuop_register_vcpu_info(cs, dest, arg); break; -- 2.35.3
[RFC PATCH 18/21] kvm/ioapic: mark gsi-2 used in ioapic routing init
From: Ankur Arora GSI-2/IOAPIC pin-2 is treated specially while initing IRQ routing: PIC does not use it at all while the IOAPIC maps virq=0 to pin-2 and does not use GSI-2. (all other GSIs are identity mapped to pins.) This results in any later code which allocates a virq to be assigned GSI-2. This virq is in-turn used to remap interrupts to HYPERVISOR_CALLBACK_VECTOR (0xf3) to deliver to the guest. Ordinarily this would be okay, but if the event delivery is via direct injection via KVM_REQ_EVENT (without going through the LAPIC) we see vmentry failure. This works fine for any other values of GSI. As a workaround, mark GSI-2 used. Signed-off-by: Ankur Arora Signed-off-by: David Woodhouse --- accel/kvm/kvm-all.c | 5 + hw/i386/kvm/ioapic.c | 1 + include/sysemu/kvm.h | 1 + 3 files changed, 7 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 8a227515b7..b40cfc4144 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1677,6 +1677,11 @@ static void set_gsi(KVMState *s, unsigned int gsi) set_bit(gsi, s->used_gsi_bitmap); } +void kvm_irqchip_set_gsi(KVMState *s, unsigned int gsi) +{ +set_gsi(s, gsi); +} + static void clear_gsi(KVMState *s, unsigned int gsi) { clear_bit(gsi, s->used_gsi_bitmap); diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c index ee7c8ef68b..5fab0d35c9 100644 --- a/hw/i386/kvm/ioapic.c +++ b/hw/i386/kvm/ioapic.c @@ -43,6 +43,7 @@ void kvm_pc_setup_irq_routing(bool pci_enabled) } } } +kvm_irqchip_set_gsi(s, 2); kvm_irqchip_commit_routes(s); } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 8e882fbe96..a249ea480f 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -512,6 +512,7 @@ static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c) } void kvm_irqchip_release_virq(KVMState *s, int virq); +void kvm_irqchip_set_gsi(KVMState *s, unsigned int gsi); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint); -- 2.35.3
[RFC PATCH 00/21] Xen HVM support under KVM
In 2019, Joao Martins posted a set of Linux KVM patches¹ which added support for hosting Xen HVM guests directly under KVM. Referenced from that post was a qemu git tree² which made use of it. Now that the core of the kernel support has been merged upstream, I'm looking at updating the qemu support and potentially getting it merged too. This is the first attempt at the first round of that, adding the basics of KVM support with '-machine xenfv' and then just enough of the hypercall support to register the shared_info and vcpu_info areas before the guest kernel will panic when it can't use event channels for IPIs. Before I go much further, I think it needs all the runtime state (the shared info page address, etc.) to be correctly live migratable. Some pointers on how to do that would be welcomed. There's plenty more to heckle too... qemu-system-x86_64 -serial mon:stdio -machine xenfv,xen-version=0x4000a \ -cpu host,-kvm,+xen,+xen-vapic -display none \ -kernel /boot/vmlinuz-5.17.8-200.fc35.x86_64 \ -append "console=ttyS0,115200 earlyprintk=ttyS0,115200" \ --trace "kvm_xen*" ¹ https://lore.kernel.org/kvm/20190220201609.28290-1-joao.m.mart...@oracle.com/ ² https://github.com/jpemartins/qemu/commits/xen-shim-rfc Ankur Arora (2): kvm/ioapic: mark gsi-2 used in ioapic routing init i386/xen: handle event channel upcall related hypercalls David Woodhouse (1): i386/xen: Add xen-version machine property and init KVM Xen support Joao Martins (18): include: import xen public headers i386/kvm: handle Xen HVM cpuid leaves xen-platform-pci: allow its creation with XEN_EMULATE mode hw/xen_backend: refactor xen_be_init() pc_piix: handle XEN_EMULATE backend init xen-platform-pci: register xen-mmio as RAM for XEN_EMULATE xen_platform: exclude vfio-pci from the PCI platform unplug pc_piix: allow xenfv machine with XEN_EMULATE i386/xen: handle guest hypercalls i386/xen: implement HYPERCALL_xen_version i386/xen: set shared_info page i386/xen: implement HYPERVISOR_hvm_op i386/xen: implement HYPERVISOR_vcpu_op i386/xen: handle register_vcpu_info i386/xen: handle register_vcpu_time_memory_area i386/xen: handle register_runstate_memory_area i386/xen: implement HYPERVISOR_event_channel_op i386/xen: implement HYPERVISOR_sched_op accel/kvm/kvm-all.c| 11 + backends/cryptodev-vhost.c |4 +- backends/vhost-user.c |4 +- hw/block/vhost-user-blk.c | 45 +- hw/display/next-fb.c |2 +- hw/i386/kvm/ioapic.c |1 + hw/i386/pc.c | 32 + hw/i386/pc_piix.c | 19 +- hw/i386/xen/xen_platform.c | 37 +- hw/loongarch/Kconfig |1 - hw/loongarch/acpi-build.c | 18 - hw/loongarch/virt.c| 62 -- hw/net/vhost_net.c |8 +- hw/nvme/ctrl.c | 182 +++- hw/scsi/vhost-scsi-common.c|4 +- hw/virtio/trace-events |4 +- hw/virtio/vhost-user-fs.c |4 +- hw/virtio/vhost-user-gpio.c| 26 +- hw/virtio/vhost-user-i2c.c |4 +- hw/virtio/vhost-user-rng.c |4 +- hw/virtio/vhost-user.c | 71 -- hw/virtio/vhost-vsock-common.c |4 +- hw/virtio/vhost.c | 44 +- hw/xen/xen-legacy-backend.c| 62 +- include/hw/core/cpu.h |2 + include/hw/i386/pc.h |3 + include/hw/loongarch/virt.h|5 - include/hw/virtio/vhost-user-gpio.h| 10 - include/hw/virtio/vhost-user.h | 18 - include/hw/virtio/vhost.h |6 +- include/hw/virtio/virtio.h | 23 +- include/hw/xen/xen-legacy-backend.h|5 + include/standard-headers/xen/arch-x86/cpuid.h | 118 +++ include/standard-headers/xen/arch-x86/xen-x86_32.h | 194 include/standard-headers/xen/arch-x86/xen-x86_64.h | 241 + include/standard-headers/xen/arch-x86/xen.h| 398 include/standard-headers/xen/event_channel.h | 388 include/standard-headers/xen/features.h| 143 +++ include/standard-headers/xen/grant_table.h | 686 + include/standard-headers/xen/hvm/hvm_op.h | 395 include/standard-headers/xen/hvm/params.h | 318 ++
[RFC PATCH 19/21] i386/xen: handle event channel upcall related hypercalls
From: Ankur Arora Handle both HVMOP_set_param(.index = HVM_PARAM_CALLBACK_IRQ) and HVMOP_set_evtchn_upcall_vector which set the system-wide and per-vCPU upcall vectors respectively. The former injects the vector directly to the vCPU, which KVM handles for us when entering the vCPU with vcpu_info->evtchn_upcall_pending set. The latter is injected to the local APIC just like an MSI. The GSI and PCI_INTX delivery methods are not supported. yet; those need to simulate a level-triggered event on the I/OAPIC. Signed-off-by: Ankur Arora Signed-off-by: Joao Martins [dwmw2: Rework for upstream kernel changes in evtchn handling] Signed-off-by: David Woodhouse --- target/i386/kvm/kvm.c| 1 + target/i386/trace-events | 1 + target/i386/xen.c| 132 +-- 3 files changed, 129 insertions(+), 5 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6396d11f1e..8a381c2073 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -36,6 +36,7 @@ #include "hyperv.h" #include "hyperv-proto.h" #include "xen.h" +#include "hw/xen/xen.h" #include "exec/gdbstub.h" #include "qemu/host-utils.h" diff --git a/target/i386/trace-events b/target/i386/trace-events index 1c6c44f291..3ff3ec6f61 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -16,3 +16,4 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIu64 kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64 kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIu64 +kvm_xen_set_vcpu_callback(int cpu, int vector) "callback vcpu %d vector %d" diff --git a/target/i386/xen.c b/target/i386/xen.c index 6f0c46c018..21146204e1 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -19,6 +19,7 @@ #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" #include "standard-headers/xen/hvm/hvm_op.h" +#include "standard-headers/xen/hvm/params.h" #include "standard-headers/xen/vcpu.h" #define PAGE_OFFSET0x8000UL @@ -34,6 +35,8 @@ #define HCALL_ERR 0 #endif +static QemuMutex xen_global_mutex; + static void *gpa_to_hva(uint64_t gpa) { MemoryRegionSection mrs; @@ -93,6 +96,8 @@ int kvm_xen_init(KVMState *s, uint32_t xen_version) return ret; } +qemu_mutex_init(_global_mutex); + return 0; } @@ -124,7 +129,8 @@ static int kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, * XENFEAT_memory_op_vnode_supported * XENFEAT_writable_page_tables */ -fi->submap = (1U << XENFEAT_auto_translated_physmap); +fi->submap = (1U << XENFEAT_auto_translated_physmap) | + (1U << XENFEAT_hvm_callback_vector); break; } } @@ -200,18 +206,131 @@ static int kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, return err ? HCALL_ERR : 0; } -static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, +static void xen_vcpu_set_callback(CPUState *cs, run_on_cpu_data data) +{ +struct kvm_xen_vcpu_attr xvuv; +uint8_t vector = data.host_int; +int err; + +xvuv.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR; +xvuv.u.vector = vector; +err = kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, ); +if (err < 0) { +return; +} + +trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector); +} + +static int handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, +uint64_t arg) +{ +CPUState *cs = CPU(cpu); +struct xen_hvm_param *hp; +int err = 0, via; + +hp = gva_to_hva(cs, arg); +if (!hp) { +err = -EFAULT; +goto out; +} + +if (hp->domid != DOMID_SELF) { +err = -EINVAL; +goto out; +} + +#define CALLBACK_VIA_TYPE_SHIFT 56 +#define CALLBACK_VIA_TYPE_GSI 0x0 +#define CALLBACK_VIA_TYPE_PCI_INTX0x1 +#define CALLBACK_VIA_TYPE_VECTOR 0x2 +#define CALLBACK_VIA_TYPE_EVTCHN 0x3 +switch (hp->index) { +case HVM_PARAM_CALLBACK_IRQ: +via = hp->value >> CALLBACK_VIA_TYPE_SHIFT; +if (via == CALLBACK_VIA_TYPE_GSI || +via == CALLBACK_VIA_TYPE_PCI_INTX) { +err = -ENOSYS; +goto out; +} else if (via == CALLBACK_VIA_TYPE_VECTOR) { +struct kvm_xen_hvm_attr xhuv; +xhuv.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR; +xhuv.u.vector = (uint8_t)hp->value; +err = kvm_vm_ioctl(cs->kvm_state, KVM_XEN_HVM_SET_ATTR, ); +} +break; +default: +err = -ENOSYS; +goto out; +} + + +out: +exit->u.hcall.result = err; +return err ?
[RFC PATCH 07/21] xen-platform-pci: register xen-mmio as RAM for XEN_EMULATE
From: Joao Martins This is a workaround while we find the most elegant solution in grant table frames mapping. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/i386/xen/xen_platform.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 914619d140..710039851a 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -469,8 +469,12 @@ static const MemoryRegionOps platform_mmio_handler = { static void platform_mmio_setup(PCIXenPlatformState *d) { -memory_region_init_io(>mmio_bar, OBJECT(d), _mmio_handler, d, - "xen-mmio", 0x100); +if (xen_mode == XEN_EMULATE) +memory_region_init_ram(>mmio_bar, OBJECT(d), "xen-mmio", 0x100, + _fatal); +else +memory_region_init_io(>mmio_bar, OBJECT(d), _mmio_handler, d, + "xen-mmio", 0x100); } static int xen_platform_post_load(void *opaque, int version_id) -- 2.35.3
[RFC PATCH 15/21] i386/xen: handle register_vcpu_info
From: Joao Martins Handle the hypercall to set a per vcpu info, as opposed to using shared_info equivalent. Also, Guests may not call VCPUOP_register_vcpu_info and will fail in event channel operations if a proper one isn't set in Qemu. So derive the hva from shared_info which is where these are located when guest doesn't seed an additional pointer. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/cpu.h| 1 + target/i386/trace-events | 1 + target/i386/xen-proto.h | 4 +++ target/i386/xen.c| 72 ++-- 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 09c0281b8b..db152d6902 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1801,6 +1801,7 @@ typedef struct CPUArchState { HVFX86LazyFlags hvf_lflags; void *hvf_mmio_buf; #endif +struct XenCPUState xen_vcpu; uint64_t mcg_cap; uint64_t mcg_ctl; diff --git a/target/i386/trace-events b/target/i386/trace-events index 2bf732ee07..1c6c44f291 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -15,3 +15,4 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data # target/i386/xen.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIu64 kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64 +kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIu64 diff --git a/target/i386/xen-proto.h b/target/i386/xen-proto.h index c394909f54..8f6ee4c17b 100644 --- a/target/i386/xen-proto.h +++ b/target/i386/xen-proto.h @@ -15,5 +15,9 @@ typedef struct XenState { struct shared_info *shared_info; } XenState; +typedef struct XenCPUState { + struct vcpu_info *info; +} XenCPUState; + #endif diff --git a/target/i386/xen.c b/target/i386/xen.c index 61c9959981..59aca2ad19 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -14,6 +14,7 @@ #include "exec/address-spaces.h" #include "xen.h" #include "trace.h" +#include "sysemu/sysemu.h" #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" @@ -133,13 +134,24 @@ static int xen_set_shared_info(CPUState *cs, struct shared_info *shi, struct kvm_xen_hvm_attr xhsi; XenState *xen = cs->xen_state; KVMState *s = cs->kvm_state; -int err; +XenCPUState *xcpu; +CPUState *cpu; +int i, err; xhsi.type = KVM_XEN_ATTR_TYPE_SHARED_INFO; xhsi.u.shared_info.gfn = gfn; err = kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, ); trace_kvm_xen_set_shared_info(gfn); xen->shared_info = shi; + +for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) { +cpu = qemu_get_cpu(i); +if (cpu) { +xcpu = _CPU(cpu)->env.xen_vcpu; +xcpu->info = >vcpu_info[cpu->cpu_index]; +} +} + return err; } @@ -197,19 +209,57 @@ static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, return HCALL_ERR; } -static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, - int cmd, uint64_t arg) +static int xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa) +{ +struct kvm_xen_vcpu_attr xhsi; + +xhsi.type = type; +xhsi.u.gpa = gpa; + +trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa); + +return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, ); +} + +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, + uint64_t arg) { +XenCPUState *xt = _CPU(target)->env.xen_vcpu; +struct vcpu_register_vcpu_info *rvi; +uint64_t gpa; +void *hva; + +rvi = gva_to_hva(cs, arg); +if (!rvi) { +return -EFAULT; +} + +gpa = ((rvi->mfn << PAGE_SHIFT) + rvi->offset); +hva = gpa_to_hva(gpa); +if (!hva) { +return -EFAULT; +} + +xt->info = hva; +return xen_set_vcpu_attr(target, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); +} + +static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, int vcpu_id, uint64_t arg) +{ +CPUState *dest = qemu_get_cpu(vcpu_id); +CPUState *cs = CPU(cpu); +int err = -ENOSYS; + switch (cmd) { case VCPUOP_register_vcpu_info: { -/* no vcpu info placement for now */ -exit->u.hcall.result = -ENOSYS; -return 0; +err = vcpuop_register_vcpu_info(cs, dest, arg); +break; } } -exit->u.hcall.result = -ENOSYS; -return HCALL_ERR; +exit->u.hcall.result = err; +return err ? HCALL_ERR : 0; } static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) @@ -223,8 +273,10 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
[RFC PATCH 20/21] i386/xen: implement HYPERVISOR_event_channel_op
From: Joao Martins Additionally set XEN_INTERFACE_VERSION to most recent in order to exercise both event_channel_op and event_channel_op_compat. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 46 ++ 1 file changed, 46 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index 21146204e1..f3cc240bff 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -16,11 +16,14 @@ #include "trace.h" #include "sysemu/sysemu.h" +#define __XEN_INTERFACE_VERSION__ 0x00040400 + #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" #include "standard-headers/xen/hvm/hvm_op.h" #include "standard-headers/xen/hvm/params.h" #include "standard-headers/xen/vcpu.h" +#include "standard-headers/xen/event_channel.h" #define PAGE_OFFSET0x8000UL #define PAGE_SHIFT 12 @@ -436,6 +439,43 @@ static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, return err ? HCALL_ERR : 0; } +static int kvm_xen_hcall_evtchn_op_compat(struct kvm_xen_exit *exit, + X86CPU *cpu, uint64_t arg) +{ +struct evtchn_op *op = gva_to_hva(CPU(cpu), arg); +int err = -ENOSYS; + +if (!op) { +goto err; +} + +switch (op->cmd) { +default: +exit->u.hcall.result = err; +return 0; +} +err: +exit->u.hcall.result = err; +return err ? HCALL_ERR : 0; +} + +static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, + int cmd, uint64_t arg) +{ +int err = -ENOSYS; + +switch (cmd) { +case EVTCHNOP_init_control: +/* FIFO ABI */ +default: +exit->u.hcall.result = err; +return 0; +} + +exit->u.hcall.result = err; +return err ? HCALL_ERR : 0; +} + static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) { uint16_t code = exit->u.hcall.input; @@ -449,6 +489,12 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) case HVMOP_set_evtchn_upcall_vector: return kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, exit->u.hcall.params[0]); +case __HYPERVISOR_event_channel_op_compat: +return kvm_xen_hcall_evtchn_op_compat(exit, cpu, + exit->u.hcall.params[0]); +case __HYPERVISOR_event_channel_op: +return kvm_xen_hcall_evtchn_op(exit, exit->u.hcall.params[0], + exit->u.hcall.params[1]); case __HYPERVISOR_vcpu_op: return kvm_xen_hcall_vcpu_op(exit, cpu, exit->u.hcall.params[0], -- 2.35.3
[RFC PATCH 11/21] i386/xen: implement HYPERCALL_xen_version
From: Joao Martins This is just meant to serve as an example on how we can implement hypercalls. xen_version specifically since Qemu does all kind of feature controllability. So handling that here seems appropriate. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 67 +++ 1 file changed, 67 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index d7e942289c..ee6f99523d 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -11,9 +11,14 @@ #include "qemu/osdep.h" #include "kvm/kvm_i386.h" +#include "exec/address-spaces.h" #include "xen.h" #include "trace.h" +#include "standard-headers/xen/version.h" + +#define PAGE_OFFSET0x8000UL + /* * Unhandled hypercalls error: * @@ -24,6 +29,28 @@ #define HCALL_ERR 0 #endif +static void *gpa_to_hva(uint64_t gpa) +{ +MemoryRegionSection mrs; + +mrs = memory_region_find(get_system_memory(), gpa, 1); +return !mrs.mr ? NULL : qemu_map_ram_ptr(mrs.mr->ram_block, + mrs.offset_within_region); +} + +static void *gva_to_hva(CPUState *cs, uint64_t gva) +{ +struct kvm_translation t = { .linear_address = gva }; +int err; + +err = kvm_vcpu_ioctl(cs, KVM_TRANSLATE, ); +if (err || !t.valid) { +return NULL; +} + +return gpa_to_hva(t.physical_address); +} + int kvm_xen_init(KVMState *s, uint32_t xen_version) { const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | @@ -59,6 +86,43 @@ int kvm_xen_init(KVMState *s, uint32_t xen_version) return 0; } +static int kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ +int err = 0; + +switch (cmd) { +case XENVER_get_features: { +struct xen_feature_info *fi; + +fi = gva_to_hva(CPU(cpu), arg); +if (!fi) { +err = -EFAULT; +break; +} + +if (fi->submap_idx != 0) { +err = -EINVAL; +break; +} + +/* + * There's only HVM guests and we only expose what + * we intend to support. These are left in the open + * whether we should or not support them: + * + * XENFEAT_memory_op_vnode_supported + * XENFEAT_writable_page_tables + */ +fi->submap = (1U << XENFEAT_auto_translated_physmap); +break; + } +} + +exit->u.hcall.result = err; +return err ? HCALL_ERR : 0; +} + static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) { uint16_t code = exit->u.hcall.input; @@ -69,6 +133,9 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) } switch (code) { +case __HYPERVISOR_xen_version: +return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); default: exit->u.hcall.result = -ENOSYS; return HCALL_ERR; -- 2.35.3
[RFC PATCH 14/21] i386/xen: implement HYPERVISOR_vcpu_op
From: Joao Martins This is simply when guest tries to register a vcpu_info and since vcpu_info placement is optional in the minimum ABI therefore we can just fail with -ENOSYS Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index 38d4cae3d0..61c9959981 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -18,6 +18,7 @@ #include "standard-headers/xen/version.h" #include "standard-headers/xen/memory.h" #include "standard-headers/xen/hvm/hvm_op.h" +#include "standard-headers/xen/vcpu.h" #define PAGE_OFFSET0x8000UL #define PAGE_SHIFT 12 @@ -196,6 +197,21 @@ static int kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, return HCALL_ERR; } +static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, + int cmd, uint64_t arg) +{ +switch (cmd) { +case VCPUOP_register_vcpu_info: { +/* no vcpu info placement for now */ +exit->u.hcall.result = -ENOSYS; +return 0; +} +} + +exit->u.hcall.result = -ENOSYS; +return HCALL_ERR; +} + static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) { uint16_t code = exit->u.hcall.input; @@ -206,6 +222,9 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) } switch (code) { +case __HYPERVISOR_vcpu_op: +return kvm_xen_hcall_vcpu_op(exit, exit->u.hcall.params[0], + exit->u.hcall.params[1]); case __HYPERVISOR_hvm_op: return kvm_xen_hcall_hvm_op(exit, exit->u.hcall.params[0], exit->u.hcall.params[1]); -- 2.35.3
[RFC PATCH 21/21] i386/xen: implement HYPERVISOR_sched_op
From: Joao Martins It allows to shutdown itself via hypercall with any of the 3 reasons: 1) self-reboot 2) shutdown 3) crash Implementing SCHEDOP_shutdown sub op let us handle crashes gracefully rather than leading to triple faults if it remains unimplemented. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 44 1 file changed, 44 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index f3cc240bff..9606f5978a 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -15,6 +15,7 @@ #include "xen.h" #include "trace.h" #include "sysemu/sysemu.h" +#include "sysemu/runstate.h" #define __XEN_INTERFACE_VERSION__ 0x00040400 @@ -23,6 +24,7 @@ #include "standard-headers/xen/hvm/hvm_op.h" #include "standard-headers/xen/hvm/params.h" #include "standard-headers/xen/vcpu.h" +#include "standard-headers/xen/sched.h" #include "standard-headers/xen/event_channel.h" #define PAGE_OFFSET0x8000UL @@ -476,6 +478,44 @@ static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, return err ? HCALL_ERR : 0; } +static int schedop_shutdown(CPUState *cs, uint64_t arg) +{ +struct sched_shutdown *shutdown; + +shutdown = gva_to_hva(cs, arg); +if (!shutdown) { +return -EFAULT; +} + +if (shutdown->reason == SHUTDOWN_crash) { +cpu_dump_state(cs, stderr, CPU_DUMP_CODE); +qemu_system_guest_panicked(NULL); +} else if (shutdown->reason == SHUTDOWN_reboot) { +qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); +} else if (shutdown->reason == SHUTDOWN_poweroff) { +qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); +} + +return 0; +} + +static int kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ +CPUState *cs = CPU(cpu); +int err = -ENOSYS; + +switch (cmd) { +case SCHEDOP_shutdown: { + err = schedop_shutdown(cs, arg); + break; + } +} + +exit->u.hcall.result = err; +return err; +} + static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) { uint16_t code = exit->u.hcall.input; @@ -489,6 +529,10 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) case HVMOP_set_evtchn_upcall_vector: return kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, exit->u.hcall.params[0]); +case __HYPERVISOR_sched_op_compat: +case __HYPERVISOR_sched_op: +return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); case __HYPERVISOR_event_channel_op_compat: return kvm_xen_hcall_evtchn_op_compat(exit, cpu, exit->u.hcall.params[0]); -- 2.35.3
[RFC PATCH 03/21] i386/kvm: handle Xen HVM cpuid leaves
From: Joao Martins Introduce support for emulating CPUID for Xen HVM guests via xen, xen_vapic as changeable params. Signed-off-by: Joao Martins [dwmw2: Obtain xen_version from machine property] Signed-off-by: David Woodhouse --- target/i386/cpu.c | 2 ++ target/i386/cpu.h | 3 ++ target/i386/kvm/kvm.c | 72 +++ target/i386/xen.h | 8 + 4 files changed, 85 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 22b681ca37..45aa9e40a5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7069,6 +7069,8 @@ static Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), +DEFINE_PROP_BOOL("xen", X86CPU, xen, false), +DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), /* * From "Requirements for Implementing the Microsoft diff --git a/target/i386/cpu.h b/target/i386/cpu.h index d4bc19577a..5ddd14467e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1964,6 +1964,9 @@ struct ArchCPU { int32_t thread_id; int32_t hv_max_vps; + +bool xen; +bool xen_vapic; }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index ff3ea245cf..4b21d03250 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -22,6 +22,7 @@ #include #include "standard-headers/asm-x86/kvm_para.h" +#include "standard-headers/xen/arch-x86/cpuid.h" #include "cpu.h" #include "host-cpu.h" @@ -34,6 +35,7 @@ #include "xen.h" #include "hyperv.h" #include "hyperv-proto.h" +#include "xen.h" #include "exec/gdbstub.h" #include "qemu/host-utils.h" @@ -775,6 +777,12 @@ static inline bool freq_within_bounds(int freq, int target_freq) return false; } + +static bool xen_enabled_on_kvm(X86CPU *cpu) +{ +return cpu->xen; +} + static int kvm_arch_set_tsc_khz(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); @@ -1800,6 +1808,70 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_hv_hypercall = true; } +if (xen_enabled_on_kvm(cpu) && kvm_base == XEN_CPUID_SIGNATURE) { +struct kvm_cpuid_entry2 *xen_max_leaf; +MachineState *ms = MACHINE(qdev_get_machine()); +uint32_t xen_version = object_property_get_int(OBJECT(ms), "xen-version", _abort); + +memcpy(signature, "XenVMMXenVMM", 12); + +xen_max_leaf = c = _data.entries[cpuid_i++]; +c->function = XEN_CPUID_SIGNATURE; +c->eax = XEN_CPUID_TIME; +c->ebx = signature[0]; +c->ecx = signature[1]; +c->edx = signature[2]; + +c = _data.entries[cpuid_i++]; +c->function = XEN_CPUID_VENDOR; +c->eax = xen_version; +c->ebx = 0; +c->ecx = 0; +c->edx = 0; + +c = _data.entries[cpuid_i++]; +c->function = XEN_CPUID_HVM_MSR; +/* Number of hypercall-transfer pages */ +c->eax = 1; +/* Hypercall MSR base address */ +c->ebx = XEN_HYPERCALL_MSR; +c->ecx = 0; +c->edx = 0; + +c = _data.entries[cpuid_i++]; +c->function = XEN_CPUID_TIME; +c->eax = ((!!tsc_is_stable_and_known(env) << 1) | +(!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); +/* default=0 (emulate if necessary) */ +c->ebx = 0; +/* guest tsc frequency */ +c->ecx = env->user_tsc_khz; +/* guest tsc incarnation (migration count) */ +c->edx = 0; + +c = _data.entries[cpuid_i++]; +c->function = XEN_CPUID_HVM; +xen_max_leaf->eax = XEN_CPUID_HVM; +if (xen_version >= XEN_VERSION(4,5)) { +c->function = XEN_CPUID_HVM; + +if (cpu->xen_vapic) { +c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; +c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; +} + +c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; + +if (xen_version >= XEN_VERSION(4,6)) { +c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; +c->ebx = cs->cpu_index; +} +} + +kvm_base = KVM_CPUID_SIGNATURE_NEXT; +} + + if (cpu->expose_kvm) { memcpy(signature, "KVMKVMKVM\0\0\0", 12); c = _data.entries[cpuid_i++]; diff --git a/target/i386/xen.h b/target/i386/xen.h index 6c4f3b7822..d4903ecfa1 100644 --- a/target/i386/xen.h +++ b/target/i386/xen.h @@ -14,6 +14,14 @@ #define XEN_HYPERCALL_MSR 0x4000 +#define XEN_CPUID_SIGNATURE0x4000 +#define XEN_CPUID_VENDOR 0x4001 +#define XEN_CPUID_HVM_MSR 0x4002 +#define XEN_CPUID_TIME 0x4003 +#define XEN_CPUID_HVM 0x4004 + +#define XEN_VERSION(maj, min) ((maj) << 16 | (min)) + int kvm_xen_init(KVMState *s, uint32_t xen_version); #endif /* QEMU_I386_XEN_H */ -- 2.35.3
[RFC PATCH 06/21] pc_piix: handle XEN_EMULATE backend init
From: Joao Martins And use newly added xen_emulated_machine_init() to iniitalize the xenstore and the sysdev bus for future emulated devices. Signed-off-by: Joao Martins [dwmw2: Move it to xen-legacy-backend.c] Signed-off-by: David Woodhouse --- hw/i386/pc_piix.c | 5 + hw/xen/xen-legacy-backend.c | 22 -- include/hw/xen/xen-legacy-backend.h | 2 ++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 13286d0739..3dcac2f4b6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -47,6 +47,7 @@ #include "hw/sysbus.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/xen/xen-x86.h" +#include "hw/xen/xen-legacy-backend.h" #include "exec/memory.h" #include "hw/acpi/acpi.h" #include "hw/acpi/piix4.h" @@ -155,6 +156,10 @@ static void pc_init1(MachineState *machine, x86ms->above_4g_mem_size = 0; x86ms->below_4g_mem_size = machine->ram_size; } + +if (pcms->xen_version && !xen_be_xenstore_open()) { +xen_emulated_machine_init(); +} } pc_machine_init_sgx_epc(pcms); diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index 694e7bbc54..60a7bc7ab6 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -31,6 +31,7 @@ #include "qapi/error.h" #include "hw/xen/xen-legacy-backend.h" #include "hw/xen/xen_pvdev.h" +#include "hw/xen/xen-bus.h" #include "monitor/qdev.h" DeviceState *xen_sysdev; @@ -294,13 +295,15 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, xendev->debug = debug; xendev->local_port = -1; -xendev->evtchndev = xenevtchn_open(NULL, 0); -if (xendev->evtchndev == NULL) { -xen_pv_printf(NULL, 0, "can't open evtchn device\n"); -qdev_unplug(DEVICE(xendev), NULL); -return NULL; +if (xen_mode != XEN_EMULATE) { +xendev->evtchndev = xenevtchn_open(NULL, 0); +if (xendev->evtchndev == NULL) { +xen_pv_printf(NULL, 0, "can't open evtchn device\n"); +qdev_unplug(DEVICE(xendev), NULL); +return NULL; +} +qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); } -qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); xen_pv_insert_xendev(xendev); @@ -859,3 +862,10 @@ static void xenbe_register_types(void) } type_init(xenbe_register_types) + +void xen_emulated_machine_init(void) +{ +xen_bus_init(); +xen_be_sysdev_init(); +xen_be_register_common(); +} diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h index 0aa171f6c2..aa09015662 100644 --- a/include/hw/xen/xen-legacy-backend.h +++ b/include/hw/xen/xen-legacy-backend.h @@ -105,4 +105,6 @@ int xen_config_dev_vfb(int vdev, const char *type); int xen_config_dev_vkbd(int vdev); int xen_config_dev_console(int vdev); +void xen_emulated_machine_init(void); + #endif /* HW_XEN_LEGACY_BACKEND_H */ -- 2.35.3
[RFC PATCH 17/21] i386/xen: handle register_runstate_memory_area
From: Joao Martins Allow guest to setup the vcpu runstates which is used as steal clock. Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- target/i386/xen.c | 25 + 1 file changed, 25 insertions(+) diff --git a/target/i386/xen.c b/target/i386/xen.c index 1def526e08..6f0c46c018 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -270,6 +270,27 @@ static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target, return xen_set_vcpu_attr(target, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, gpa); } +static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ +struct vcpu_register_runstate_memory_area *rma; +uint64_t gpa; +void *hva; + +rma = gva_to_hva(cs, arg); +if (!rma) { +return -EFAULT; +} + +hva = gva_to_hva(cs, rma->addr.p); +if (!hva || !rma->addr.p) { +return -EFAULT; +} + +gpa = gva_to_gpa(cs, rma->addr.p); +return xen_set_vcpu_attr(target, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, gpa); +} + static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, int cmd, int vcpu_id, uint64_t arg) { @@ -278,6 +299,10 @@ static int kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, int err = -ENOSYS; switch (cmd) { +case VCPUOP_register_runstate_memory_area: { +err = vcpuop_register_runstate_info(cs, dest, arg); +break; +} case VCPUOP_register_vcpu_time_memory_area: { err = vcpuop_register_vcpu_time_info(cs, dest, arg); break; -- 2.35.3
[RFC PATCH 05/21] hw/xen_backend: refactor xen_be_init()
From: Joao Martins Signed-off-by: Joao Martins Signed-off-by: David Woodhouse --- hw/xen/xen-legacy-backend.c | 40 + include/hw/xen/xen-legacy-backend.h | 3 +++ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index 085fd31ef7..694e7bbc54 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -676,17 +676,40 @@ void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev) } /* */ -int xen_be_init(void) +int xen_be_xenstore_open(void) { -xengnttab_handle *gnttabdev; - xenstore = xs_daemon_open(); if (!xenstore) { -xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); return -1; } qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); +return 0; +} + +void xen_be_xenstore_close(void) +{ +qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL); +xs_daemon_close(xenstore); +xenstore = NULL; +} + +void xen_be_sysdev_init(void) +{ +xen_sysdev = qdev_new(TYPE_XENSYSDEV); +sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), _fatal); +xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); +qbus_set_bus_hotplug_handler(xen_sysbus); +} + +int xen_be_init(void) +{ +xengnttab_handle *gnttabdev; + +if (xen_be_xenstore_open()) { +xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); +return -1; +} if (xen_xc == NULL || xen_fmem == NULL) { /* Check if xen_init() have been called */ @@ -701,17 +724,12 @@ int xen_be_init(void) xengnttab_close(gnttabdev); } -xen_sysdev = qdev_new(TYPE_XENSYSDEV); -sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), _fatal); -xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); -qbus_set_bus_hotplug_handler(xen_sysbus); +xen_be_sysdev_init(); return 0; err: -qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL); -xs_daemon_close(xenstore); -xenstore = NULL; +xen_be_xenstore_close(); return -1; } diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h index be281e1f38..0aa171f6c2 100644 --- a/include/hw/xen/xen-legacy-backend.h +++ b/include/hw/xen/xen-legacy-backend.h @@ -42,6 +42,9 @@ int xenstore_read_fe_uint64(struct XenLegacyDevice *xendev, const char *node, void xen_be_check_state(struct XenLegacyDevice *xendev); /* xen backend driver bits */ +int xen_be_xenstore_open(void); +void xen_be_xenstore_close(void); +void xen_be_sysdev_init(void); int xen_be_init(void); void xen_be_register_common(void); int xen_be_register(const char *type, struct XenDevOps *ops); -- 2.35.3