Re: [PATCH v2 4/5] x86/iommu: pass full IO-APIC RTE for remapping table update

2023-07-26 Thread Roger Pau Monné
On Wed, Jul 19, 2023 at 12:37:47PM +0200, Jan Beulich wrote:
> On 18.07.2023 14:43, Roger Pau Monne wrote:
> > @@ -439,36 +427,47 @@ unsigned int cf_check io_apic_read_remap_rte(
> >  }
> >  
> >  void cf_check io_apic_write_remap_rte(
> > -unsigned int apic, unsigned int reg, unsigned int value)
> > +unsigned int apic, unsigned int pin, uint64_t raw)
> >  {
> > -unsigned int pin = (reg - 0x10) / 2;
> > +struct IO_xAPIC_route_entry rte = { .raw = raw };
> >  struct IO_xAPIC_route_entry old_rte = { };
> >  struct IO_APIC_route_remap_entry *remap_rte;
> > -unsigned int rte_upper = (reg & 1) ? 1 : 0;
> >  struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
> > -int saved_mask;
> > -
> > -old_rte = __ioapic_read_entry(apic, pin, true);
> > +bool masked = true;
> > +int rc;
> >  
> >  remap_rte = (struct IO_APIC_route_remap_entry *) _rte;
> >  
> > -/* mask the interrupt while we change the intremap table */
> > -saved_mask = remap_rte->mask;
> > -remap_rte->mask = 1;
> > -__io_apic_write(apic, reg & ~1, *(u32 *)_rte);
> > -remap_rte->mask = saved_mask;
> > -
> > -if ( ioapic_rte_to_remap_entry(iommu, apic, pin,
> > -   _rte, rte_upper, value) )
> > +if ( !cpu_has_cx16 )
> >  {
> > -__io_apic_write(apic, reg, value);
> > +   /*
> > +* Cannot atomically update the IRTE entry: mask the IO-APIC pin to
> > +* avoid interrupts seeing an inconsistent IRTE entry.
> > +*/
> > +old_rte = __ioapic_read_entry(apic, pin, true);
> > +if ( !old_rte.mask )
> > +{
> > +masked = false;
> > +old_rte.mask = 1;
> > +__ioapic_write_entry(apic, pin, true, old_rte);
> > +}
> > +}
> >  
> > -/* Recover the original value of 'mask' bit */
> > -if ( rte_upper )
> > -__io_apic_write(apic, reg & ~1, *(u32 *)_rte);
> > +rc = ioapic_rte_to_remap_entry(iommu, apic, pin, _rte, rte);
> 
> I realize it has been like this before, but passing _rte here is
> odd. We already have its properly typed alias: remap_rte. All the
> called function does is do the same type cast again. Question is
> whether ...
> 
> > +if ( rc )
> > +{
> > +if ( !masked )
> > +{
> > +/* Recover the original value of 'mask' bit */
> > +old_rte.mask = 0;
> > +__ioapic_write_entry(apic, pin, true, old_rte);
> > +}
> > +dprintk(XENLOG_ERR VTDPREFIX,
> > +"failed to update IRTE for IO-APIC#%u pin %u: %d\n",
> > +apic, pin, rc);
> > +return;
> >  }
> > -else
> > -__ioapic_write_entry(apic, pin, true, old_rte);
> > +__ioapic_write_entry(apic, pin, true, old_rte);
> 
> ... the further uses of old_rte then won't end up yet more confusing
> than they already are (first and foremost again because of "old" not
> being applicable here).

I've instead opted to remove remap_rte from io_apic_write_remap_rte(),
as it was unused.  I've also added a comment to clarify the usage of
old_rte when ioapic_rte_to_remap_entry() returns success.

Thanks, Roger.



Re: [PATCH v2 4/5] x86/iommu: pass full IO-APIC RTE for remapping table update

2023-07-19 Thread Jan Beulich
On 18.07.2023 14:43, Roger Pau Monne wrote:
> So that the remapping entry can be updated atomically when possible.
> 
> Doing such update atomically will avoid Xen having to mask the IO-APIC
> pin prior to performing any interrupt movements (ie: changing the
> destination and vector fields), as the interrupt remapping entry is
> always consistent.
> 
> This also simplifies some of the logic on both VT-d and AMD-Vi
> implementations, as having the full RTE available instead of half of
> it avoids to possibly read and update the missing other half from
> hardware.
> 
> While there remove the explicit zeroing of new_ire fields in
> ioapic_rte_to_remap_entry() and initialize the variable at definition
> so all fields are zeroed.  Note fields could be also initialized with
> final values at definition, but I found that likely too much to be
> done at this time.
> 
> Signed-off-by: Roger Pau Monné 
> ---
> Note that certain combination of changes to the RTE are impossible to
> handle atomically. For example changing the vector and/or destination
> fields together with the triggering mode is impossible to be performed
> atomically (as the destination and vector is set in the IRTE, but the
> triggering mode is set in the RTE).  Xen doesn't attempt to perform
> such changes in a single update to the RTE anyway, so it's fine.
> 
> Naming the iommu_update_ire_from_apic() parameter RTE is not really
> correct, as the format of the passed value expands the destination
> field to be 32bits (in order to fit an x2APIC ID).  Passing an
> IO_APIC_route_entry struct is not possible due to the circular
> dependency that would create between io_apic.h and iommu.h.  It might
> be possible to move IO_APIC_route_entry declaration to a different
> header, but I haven't looked into it.
> ---
>  xen/arch/x86/include/asm/iommu.h |   3 +-
>  xen/arch/x86/io_apic.c   |   5 +-
>  xen/drivers/passthrough/amd/iommu.h  |   2 +-
>  xen/drivers/passthrough/amd/iommu_intr.c |  98 ++---
>  xen/drivers/passthrough/vtd/extern.h |   2 +-
>  xen/drivers/passthrough/vtd/intremap.c   | 127 +++
>  xen/drivers/passthrough/x86/iommu.c  |   4 +-
>  xen/include/xen/iommu.h  |   3 +-
>  8 files changed, 80 insertions(+), 164 deletions(-)

Nice diffstat.

> --- a/xen/arch/x86/include/asm/iommu.h
> +++ b/xen/arch/x86/include/asm/iommu.h
> @@ -84,7 +84,8 @@ struct iommu_init_ops {
>  
>  extern const struct iommu_init_ops *iommu_init_ops;
>  
> -void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, 
> unsigned int value);
> +void iommu_update_ire_from_apic(unsigned int apic, unsigned int pin,
> +uint64_t rte);

Much like you have it here, ...

> --- a/xen/drivers/passthrough/amd/iommu.h
> +++ b/xen/drivers/passthrough/amd/iommu.h
> @@ -300,7 +300,7 @@ int cf_check amd_iommu_free_intremap_table(
>  unsigned int amd_iommu_intremap_table_order(
>  const void *irt, const struct amd_iommu *iommu);
>  void cf_check amd_iommu_ioapic_update_ire(
> -unsigned int apic, unsigned int reg, unsigned int value);
> +unsigned int apic, unsigned int pin, uint64_t raw);

... could the changed parameter also have "rte" in its name? I don't
mind if it's "raw_rte", to avoid colliding with existing variable
names (in at least the VT-d counterpart).

> @@ -350,14 +319,11 @@ static int update_intremap_entry_from_ioapic(
>  }
>  
>  void cf_check amd_iommu_ioapic_update_ire(
> -unsigned int apic, unsigned int reg, unsigned int value)
> +unsigned int apic, unsigned int pin, uint64_t raw)
>  {
>  struct IO_APIC_route_entry old_rte = { };

Looks like the initializer here now isn't needed anymore?

> @@ -364,48 +363,37 @@ static int ioapic_rte_to_remap_entry(struct vtd_iommu 
> *iommu,
>  
>  new_ire = *iremap_entry;
>  
> -if ( rte_upper )
> -{
> -if ( x2apic_enabled )
> -new_ire.remap.dst = value;
> -else
> -new_ire.remap.dst = (value >> 24) << 8;
> -}
> +if ( x2apic_enabled )
> +new_ire.remap.dst = new_rte.dest.dest32;
>  else
> -{
> -*(((u32 *)_rte) + 0) = value;
> -new_ire.remap.fpd = 0;
> -new_ire.remap.dm = new_rte.dest_mode;
> -new_ire.remap.tm = new_rte.trigger;
> -new_ire.remap.dlm = new_rte.delivery_mode;
> -/* Hardware require RH = 1 for LPR delivery mode */
> -new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> -new_ire.remap.avail = 0;
> -new_ire.remap.res_1 = 0;
> -new_ire.remap.vector = new_rte.vector;
> -new_ire.remap.res_2 = 0;
> -
> -set_ioapic_source_id(IO_APIC_ID(apic), _ire);
> -new_ire.remap.res_3 = 0;
> -new_ire.remap.res_4 = 0;
> -new_ire.remap.p = 1; /* finally, set present bit */
> -
> -/* now construct new ioapic rte entry */
> -remap_rte->vector = new_rte.vector;
> -

[PATCH v2 4/5] x86/iommu: pass full IO-APIC RTE for remapping table update

2023-07-18 Thread Roger Pau Monne
So that the remapping entry can be updated atomically when possible.

Doing such update atomically will avoid Xen having to mask the IO-APIC
pin prior to performing any interrupt movements (ie: changing the
destination and vector fields), as the interrupt remapping entry is
always consistent.

This also simplifies some of the logic on both VT-d and AMD-Vi
implementations, as having the full RTE available instead of half of
it avoids to possibly read and update the missing other half from
hardware.

While there remove the explicit zeroing of new_ire fields in
ioapic_rte_to_remap_entry() and initialize the variable at definition
so all fields are zeroed.  Note fields could be also initialized with
final values at definition, but I found that likely too much to be
done at this time.

Signed-off-by: Roger Pau Monné 
---
Note that certain combination of changes to the RTE are impossible to
handle atomically. For example changing the vector and/or destination
fields together with the triggering mode is impossible to be performed
atomically (as the destination and vector is set in the IRTE, but the
triggering mode is set in the RTE).  Xen doesn't attempt to perform
such changes in a single update to the RTE anyway, so it's fine.

Naming the iommu_update_ire_from_apic() parameter RTE is not really
correct, as the format of the passed value expands the destination
field to be 32bits (in order to fit an x2APIC ID).  Passing an
IO_APIC_route_entry struct is not possible due to the circular
dependency that would create between io_apic.h and iommu.h.  It might
be possible to move IO_APIC_route_entry declaration to a different
header, but I haven't looked into it.
---
 xen/arch/x86/include/asm/iommu.h |   3 +-
 xen/arch/x86/io_apic.c   |   5 +-
 xen/drivers/passthrough/amd/iommu.h  |   2 +-
 xen/drivers/passthrough/amd/iommu_intr.c |  98 ++---
 xen/drivers/passthrough/vtd/extern.h |   2 +-
 xen/drivers/passthrough/vtd/intremap.c   | 127 +++
 xen/drivers/passthrough/x86/iommu.c  |   4 +-
 xen/include/xen/iommu.h  |   3 +-
 8 files changed, 80 insertions(+), 164 deletions(-)

diff --git a/xen/arch/x86/include/asm/iommu.h b/xen/arch/x86/include/asm/iommu.h
index 0540cd9faa87..eb720205e25e 100644
--- a/xen/arch/x86/include/asm/iommu.h
+++ b/xen/arch/x86/include/asm/iommu.h
@@ -84,7 +84,8 @@ struct iommu_init_ops {
 
 extern const struct iommu_init_ops *iommu_init_ops;
 
-void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned 
int value);
+void iommu_update_ire_from_apic(unsigned int apic, unsigned int pin,
+uint64_t rte);
 unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
 int iommu_setup_hpet_msi(struct msi_desc *);
 
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 85b4b4c6bc98..255ba3e8b073 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -275,10 +275,7 @@ void __ioapic_write_entry(
 __io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
 }
 else
-{
-iommu_update_ire_from_apic(apic, 0x11 + 2 * pin, eu.w2);
-iommu_update_ire_from_apic(apic, 0x10 + 2 * pin, eu.w1);
-}
+iommu_update_ire_from_apic(apic, pin, e.raw);
 }
 
 static void ioapic_write_entry(
diff --git a/xen/drivers/passthrough/amd/iommu.h 
b/xen/drivers/passthrough/amd/iommu.h
index 8bc3c35b1bb1..aa3681a9f2aa 100644
--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -300,7 +300,7 @@ int cf_check amd_iommu_free_intremap_table(
 unsigned int amd_iommu_intremap_table_order(
 const void *irt, const struct amd_iommu *iommu);
 void cf_check amd_iommu_ioapic_update_ire(
-unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int apic, unsigned int pin, uint64_t raw);
 unsigned int cf_check amd_iommu_read_ioapic_from_ire(
 unsigned int apic, unsigned int reg);
 int cf_check amd_iommu_msi_msg_update_ire(
diff --git a/xen/drivers/passthrough/amd/iommu_intr.c 
b/xen/drivers/passthrough/amd/iommu_intr.c
index f32c418a7e49..bb324eb16da1 100644
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -247,11 +247,6 @@ static void update_intremap_entry(const struct amd_iommu 
*iommu,
 }
 }
 
-static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
-{
-return rte->vector | (rte->delivery_mode << 8);
-}
-
 static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
 {
 rte->vector = (u8)offset;
@@ -267,7 +262,6 @@ static int update_intremap_entry_from_ioapic(
 int bdf,
 struct amd_iommu *iommu,
 struct IO_APIC_route_entry *rte,
-bool_t lo_update,
 u16 *index)
 {
 unsigned long flags;
@@ -315,31 +309,6 @@ static int update_intremap_entry_from_ioapic(
 spin_lock(lock);
 }
 
-if ( fresh )
-/* nothing */;
-else if ( !lo_update )
-{
-/*