Re: [PATCH] i386/hvf: Integrates x2APIC support with hvf accel

2024-06-24 Thread Bui Quang Minh

On 6/24/24 16:46, Phil Dennis-Jordan wrote:

Support for x2APIC mode was recently introduced in the software emulated
APIC implementation for TCG. Enabling it when using macOS’s hvf
accelerator is useful and significantly helps performance, as Qemu
currently uses the emulated APIC when running on hvf as well.

This change wires up the read & write operations for the MSR VM exits
and allow-lists the CPUID flag in the x86 hvf runtime.

Signed-off-by: Phil Dennis-Jordan 
---
  target/i386/hvf/x86_cpuid.c |  4 ++--
  target/i386/hvf/x86_emu.c   | 31 +++
  2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index e56cd8411b..4f260d46a8 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -64,8 +64,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
   CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX |
   CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS;
  ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
- CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID |
- CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE |
+ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 
|
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
   CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
   CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND;
  ecx |= CPUID_EXT_HYPERVISOR;
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 38c782b8e3..be675bcfb7 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -663,6 +663,15 @@ static void exec_lods(CPUX86State *env, struct x86_decode 
*decode)
  env->eip += decode->len;
  }
  
+static void raise_exception(CPUX86State *env, int exception_index,

+int error_code)
+{
+env->exception_nr = exception_index;
+env->error_code = error_code;
+env->has_error_code = true;
+env->exception_injected = 1;
+}
+
  void simulate_rdmsr(CPUX86State *env)
  {
  X86CPU *cpu = env_archcpu(env);
@@ -677,6 +686,17 @@ void simulate_rdmsr(CPUX86State *env)
  case MSR_IA32_APICBASE:
  val = cpu_get_apic_base(cpu->apic_state);
  break;
+case MSR_APIC_START ... MSR_APIC_END: {
+int ret;
+int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+ret = apic_msr_read(index, &val);
+if (ret < 0) {
+raise_exception(env, EXCP0D_GPF, 0);
+}
+
+break;
+}
  case MSR_IA32_UCODE_REV:
  val = cpu->ucode_rev;
  break;
@@ -777,6 +797,17 @@ void simulate_wrmsr(CPUX86State *env)
  case MSR_IA32_APICBASE:
  cpu_set_apic_base(cpu->apic_state, data);
  break;
+case MSR_APIC_START ... MSR_APIC_END: {
+int ret;
+int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+ret = apic_msr_write(index, data);
+if (ret < 0) {
+raise_exception(env, EXCP0D_GPF, 0);
+}
+
+break;
+}
  case MSR_FSBASE:
  wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
  break;


Acked-by: Bui Quang Minh 



Re: [PATCH] hw/intc/apic: fix memory leak

2024-03-05 Thread Bui Quang Minh

On 3/5/24 05:41, Paolo Bonzini wrote:

deliver_bitmask is allocated on the heap in apic_deliver(), but there
are many paths in the function that return before the corresponding
g_free() is reached.  Fix this by switching to g_autofree and, while at
it, also switch to g_new.  Do the same in apic_deliver_irq() as well
for consistency.

Fixes: b5ee0468e9d ("apic: add support for x2APIC mode", 2024-02-14)
Signed-off-by: Paolo Bonzini 
---
  hw/intc/apic.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 1d887d66b86..4186c57b34c 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -291,14 +291,13 @@ static void apic_deliver_irq(uint32_t dest, uint8_t 
dest_mode,
   uint8_t delivery_mode, uint8_t vector_num,
   uint8_t trigger_mode)
  {
-uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
+g_autofree uint32_t *deliver_bitmask = g_new(uint32_t, max_apic_words);
  
  trace_apic_deliver_irq(dest, dest_mode, delivery_mode, vector_num,

 trigger_mode);
  
  apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);

  apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, 
trigger_mode);
-g_free(deliver_bitmask);
  }
  
  bool is_x2apic_mode(DeviceState *dev)

@@ -662,7 +661,7 @@ static void apic_deliver(DeviceState *dev, uint32_t dest, 
uint8_t dest_mode,
  APICCommonState *s = APIC(dev);
  APICCommonState *apic_iter;
  uint32_t deliver_bitmask_size = max_apic_words * sizeof(uint32_t);
-uint32_t *deliver_bitmask = g_malloc(deliver_bitmask_size);
+g_autofree uint32_t *deliver_bitmask = g_new(uint32_t, max_apic_words);
  uint32_t current_apic_id;
  
  if (is_x2apic_mode(dev)) {

@@ -708,7 +707,6 @@ static void apic_deliver(DeviceState *dev, uint32_t dest, 
uint8_t dest_mode,
  }
  
  apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);

-g_free(deliver_bitmask);
  }
  
  static bool apic_check_pic(APICCommonState *s)


Reviewed-by: Bui Quang Minh 

Thanks,
Quang Minh.



[PATCH v12 4/7] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2024-01-11 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Acked-by: Peter Xu 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1a07faddb4..cf933189d3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4124,11 +4124,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (kvm_enabled() && !kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v12 7/7] test: bios-tables-test: add IVRS changed binary

2024-01-11 Thread Bui Quang Minh
EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[098h 0152 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[099h 0153 002h]   Device ID : 0010
+[09Bh 0155 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[09Ch 0156 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[09Dh 0157 002h]   Device ID : 00F8
+[09Fh 0159 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A0h 0160 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A1h 0161 002h]   Device ID : 00FA
+[0A3h 0163 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A4h 0164 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A5h 0165 002h]   Device ID : 00FB
+[0A7h 0167 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A8h 0168 001h]   Subtable Type : 48 [Device Entry: Special 
Device]
+[0A9h 0169 002h]   Device ID : 
+[0ABh 0171 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+[0ACh 0172 001h]  Handle : 00
+[0ADh 0173 002h]   Source Used Device ID : 00A0
+[0AFh 0175 001h] Variety : 01
+
+Raw Table Data: Length 176 (0xB0)

-: 49 56 52 53 68 00 00 00 01 43 42 4F 43 48 53 20  // IVRShCBOCHS
+: 49 56 52 53 B0 00 00 00 01 74 42 4F 43 48 53 20  // IVRS.tBOCHS
 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPCBXPC
-0020: 01 00 00 00 00 28 00 00 00 00 00 00 00 00 00 00  // .(..
+0020: 01 00 00 00 01 28 00 00 00 00 00 00 00 00 00 00  // .(..
 0030: 10 D1 38 00 10 00 40 00 00 00 D8 FE 00 00 00 00  // ..8...@.
 0040: 00 00 00 00 44 00 00 00 02 00 00 00 02 08 00 00  // D...
 0050: 02 10 00 00 02 F8 00 00 02 FA 00 00 02 FB 00 00  // 
-0060: 48 00 00 00 00 A0 00 01  // H...
+0060: 48 00 00 00 00 A0 00 01 11 11 48 00 10 00 40 00  // H.H...@.
+0070: 00 00 D8 FE 00 00 00 00 00 00 00 00 00 00 00 00  // 
+0080: D3 29 00 00 00 00 00 00 00 00 00 00 00 00 00 00  // .)..
+0090: 02 00 00 00 02 08 00 00 02 10 00 00 02 F8 00 00  // 
+00A0: 02 FA 00 00 02 FB 00 00 48 00 00 00 00 A0 00 01  // ....H...

Signed-off-by: Bui Quang Minh 
---
 tests/data/acpi/q35/IVRS.ivrs   | Bin 104 -> 176 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   1 -
 2 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/q35/IVRS.ivrs
index 
17611202e53a32f7da8e4925d6955b384670b8b1..7f9e91aabc0ba7efc9f219587a4f91f0edb1
 100644
GIT binary patch
delta 63
zcmd1Uz{uqp78JaJfq{XsWFnUoBjZF>XH`K#4+a4S2cY1Me?S5bE^ES>3=9)pl>vea
B3FiO+

delta 22
dcmdnMn8D>478IPpz`(%hJdsO^kzt~$GXOlo1j7IT

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index ac420db6b7..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b

[PATCH v12 5/7] test: bios-tables-test: prepare IVRS change in ACPI table

2024-01-11 Thread Bui Quang Minh
Following the instructions in bios-tables-test, this lists that IVRS.ivrs
in ACPI table will be changed to add new IVHD type 0x11.

Signed-off-by: Bui Quang Minh 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..ac420db6b7 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/IVRS.ivrs",
-- 
2.25.1




[PATCH v12 1/7] i386/tcg: implement x2APIC registers MSR access

2024-01-11 Thread Bui Quang Minh
This commit creates apic_register_read/write which are used by both
apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access.

The apic_msr_read/write returns -1 on error, accelerator can use this to
raise the appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 122 ---
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   3 +
 target/i386/cpu.h|   3 +
 target/i386/tcg/sysemu/misc_helper.c |  27 ++
 5 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..7a349c0723 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,24 +643,19 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+static int apic_register_read(int index, uint64_t *value)
 {
 DeviceState *dev;
 APICCommonState *s;
 uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+int ret = 0;
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return 0;
+return -1;
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
 val = 0;
+ret = -1;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+*value = val;
+return ret;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint64_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+apic_register_read(index, &val);
+
 return val;
 }
 
+int apic_msr_read(int index, uint64_t *val)
+{
+DeviceState *dev;
+
+dev = cpu_get_current_apic();
+if (!dev) {
+return -1;
+}
+
+if (!is_x2apic_mode(dev)) {
+return -1;
+}
+
+return apic_register_read(index, val);
+}
+
 static void apic_send_msi(MSIMessage *msi)
 {
 uint64_t addr = msi->address;
@@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+static int apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return;
+return -1;
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 break;
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
-break;
+return -1;
 }
+
+return 0;
+}
+
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/*
+ * MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa.
+ */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
+int apic_msr_write(int index, uint64_t val)
+{
+Dev

[PATCH v12 3/7] apic, i386/tcg: add x2apic transitions

2024-01-11 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

The set_base in APICCommonClass now returns an integer to indicate error in
execution. apic_set_base return -1 on invalid APIC state transition,
accelerator can use this to raise appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/kvm/apic.c   |  3 +-
 hw/i386/xen/xen_apic.c   |  3 +-
 hw/intc/apic.c   | 62 +++-
 hw/intc/apic_common.c| 13 +++---
 include/hw/i386/apic.h   |  2 +-
 include/hw/i386/apic_internal.h  |  2 +-
 target/i386/cpu.c|  9 ++--
 target/i386/cpu.h|  4 ++
 target/i386/tcg/sysemu/misc_helper.c | 14 ++-
 target/i386/whpx/whpx-apic.c |  3 +-
 10 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 1e89ca0899..a72c28e8a7 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -95,9 +95,10 @@ void kvm_get_apic_state(DeviceState *dev, struct 
kvm_lapic_state *kapic)
 apic_next_timer(s, s->initial_count_load_time);
 }
 
-static void kvm_apic_set_base(APICCommonState *s, uint64_t val)
+static int kvm_apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = val;
+return 0;
 }
 
 static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 7c7a60b166..101e16a766 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -49,8 +49,9 @@ static void xen_apic_realize(DeviceState *dev, Error **errp)
 msi_nonbroken = true;
 }
 
-static void xen_apic_set_base(APICCommonState *s, uint64_t val)
+static int xen_apic_set_base(APICCommonState *s, uint64_t val)
 {
+return 0;
 }
 
 static void xen_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 178fb26b47..1d887d66b8 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -308,8 +308,49 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
-static void apic_set_base(APICCommonState *s, uint64_t val)
+static int apic_set_base_check(APICCommonState *s, uint64_t val)
 {
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD) {
+return -1;
+}
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+return 0;
+}
+
+static int apic_set_base(APICCommonState *s, uint64_t val)
+{
+if (apic_set_base_check(s, val) < 0) {
+return -1;
+}
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -318,6 +359,25 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
+
+return 0;
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 3c43ac9a1d..16ab40a35f 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c

[PATCH v12 0/7] Support x2APIC mode with TCG accelerator

2024-01-11 Thread Bui Quang Minh
7 changes,
- Patch 4:
  + If eim=on, keep checking if kvm x2APIC is enabled when kernel-irqchip
  is split

Version 6 changes,
- Patch 5:
  + Make all places use the amdvi_extended_feature_register to get extended
  feature register

Version 5 changes,
- Patch 3:
  + Rebase to master and fix conflict
- Patch 5:
  + Create a helper function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC support
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (7):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  test: bios-tables-test: prepare IVRS change in ACPI table
  amd_iommu: report x2APIC support to the operating system
  test: bios-tables-test: add IVRS changed binary

 hw/i386/acpi-build.c | 129 +---
 hw/i386/amd_iommu-stub.c |  26 ++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|   6 +-
 hw/i386/kvm/apic.c   |   3 +-
 hw/i386/meson.build  |   3 +-
 hw/i386/x86.c|   6 +-
 hw/i386/xen/xen_apic.c   |   3 +-
 hw/intc/apic.c   | 473 +--
 hw/intc/apic_common.c|  22 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   8 +-
 include/hw/i386/apic_internal.h  |   9 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   9 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  41 ++-
 target/i386/whpx/whpx-apic.c |   3 +-
 tests/data/acpi/q35/IVRS.ivrs| Bin 104 -> 176 bytes
 20 files changed, 628 insertions(+), 189 deletions(-)
 create mode 100644 hw/i386/amd_iommu-stub.c

-- 
2.25.1




[PATCH v12 6/7] amd_iommu: report x2APIC support to the operating system

2024-01-11 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Besides, an amd_iommu-stub.c file is created to provide the definition for
amdvi_extended_feature_register when CONFIG_AMD_IOMMU=n. This function is
used by acpi-build.c to get the extended feature register value for
building the ACPI table. When CONFIG_AMD_IOMMU=y, this function is defined
in amd_iommu.c.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 ---
 hw/i386/amd_iommu-stub.c |  26 
 hw/i386/amd_iommu.c  |  29 -
 hw/i386/amd_iommu.h  |  16 +++--
 hw/i386/meson.build  |   3 +-
 5 files changed, 145 insertions(+), 58 deletions(-)
 create mode 100644 hw/i386/amd_iommu-stub.c

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index edc979379c..8890a299ee 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2333,30 +2333,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2376,56 +2369,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_sup

[PATCH v12 2/7] apic: add support for x2APIC mode

2024-01-11 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   6 +-
 hw/intc/apic.c  | 289 
 hw/intc/apic_common.c   |   9 +
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|  18 +-
 target/i386/cpu.h   |   2 +
 7 files changed, 259 insertions(+), 75 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 2b6291ad8d..3d1bdd334e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -137,7 +137,7 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * a literal `0` in configurations where kvm_* aren't defined)
  */
 if (kvm_enabled() && x86ms->apic_id_limit > 255 &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -147,6 +147,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 7a349c0723..178fb26b47 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -32,14 +32,13 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
-
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for (__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
-for(__j = 0; __j < 32; __j++) {\
+for (__j = 0; __j < 32; __j++) {\
 if (__mask & (1U << __j)) {\
 apic = local_apics[__i * 32 + __j];\
 if (apic) {\
@@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for (i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
 
 trace_apic_deliver_irq(dest, d

Re: CI "pages" job failing with incomprehensible error message from htags

2024-01-08 Thread Bui Quang Minh

On 1/8/24 18:03, Thomas Huth wrote:

On 05/01/2024 20.11, Peter Maydell wrote:

https://gitlab.com/qemu-project/qemu/-/jobs/5871592479

failed with

$ htags -anT --tree-view=filetree -m qemu_init -t "Welcome to the QEMU
sourcecode"
htags: Negative exec line limit = -371

Does anybody have any idea what this is about ?


In case you haven't spotted it yet:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg1014394.html

Is anybody already already creating a patch to clear CI_COMMIT_MESSAGE 
when invoking htags ?


That solution works fine on my CI, however, it is stated in Gitlab 
documentation that overriding predefined variables is not recommended.


https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
	Avoid overriding predefined variables, as it can cause the pipeline to 
behave unexpectedly.


Thanks,
Quang Minh.



Re: [PATCH v11 0/7] Support x2APIC mode with TCG accelerator

2024-01-06 Thread Bui Quang Minh

On 12/28/23 22:44, Bui Quang Minh wrote:

On 12/26/23 16:21, Michael S. Tsirkin wrote:

On Mon, Dec 25, 2023 at 11:40:54PM +0700, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel 
iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. 
With this
series, we can now boot Linux kernel into x2APIC mode with TCG 
accelerator

using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully 
boot

with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device 
qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \

   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \

   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device 
qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \

   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \

   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch


Seems to break build for windows/amd64
https://gitlab.com/mstredhat/qemu/-/pipelines/1118886361/failures


I saw the CI test "pages" failed too. On my CI, most of the time, it 
failed with


$ htags -anT --tree-view=filetree -m qemu_init -t "Welcome to the QEMU 
sourcecode"

00:24
htags: Negative exec line limit = -8099

It only succeeded once. I could not reproduce locally. Do you have any 
ideas what the problem is?


I think I briefly understand why pages test fails. Internally, htags 
call global to parse the output of gtags. When executing command, it 
expects the size of argv and env to 20*1024 
(https://github.com/harai/gnu-global/blob/f86ba74d867385353815f8656c4a6cf4029c1f0b/libutil/xargs.c#L92-L105). 
The failed test case only happens when the last commit is patch 7 (test: 
bios-tables-test: add IVRS changed binary) which has a very long commit 
message (around 9000 bytes). By default, Gitlab appends some environment 
variables to the runner and one of them is CI_COMMIT_MESSAGE which 
contains the long commit message. So it exceeds the limit of 20*1024 
bytes and fails.


In my opinion, this failed test is not so critical and seems unrelated 
to the series so I skip this failed test. I will post the new version to 
fix the windows/amd64 build soon.


Thanks,
Quang Minh.



Re: [PATCH v11 0/7] Support x2APIC mode with TCG accelerator

2023-12-28 Thread Bui Quang Minh

On 12/26/23 16:21, Michael S. Tsirkin wrote:

On Mon, Dec 25, 2023 at 11:40:54PM +0700, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch


Seems to break build for windows/amd64
https://gitlab.com/mstredhat/qemu/-/pipelines/1118886361/failures


I saw the CI test "pages" failed too. On my CI, most of the time, it 
failed with


$ htags -anT --tree-view=filetree -m qemu_init -t "Welcome to the QEMU 
sourcecode"

00:24
htags: Negative exec line limit = -8099

It only succeeded once. I could not reproduce locally. Do you have any 
ideas what the problem is?


Thanks,
Quang Minh.



Re: [PATCH v11 0/7] Support x2APIC mode with TCG accelerator

2023-12-27 Thread Bui Quang Minh

On 12/26/23 16:21, Michael S. Tsirkin wrote:

On Mon, Dec 25, 2023 at 11:40:54PM +0700, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch


Seems to break build for windows/amd64
https://gitlab.com/mstredhat/qemu/-/pipelines/1118886361/failures


The failure is because when CONFIG_AMD_IOMMU=n, amd_iommu.c is not built 
so the linker cannot find the definition of 
amdvi_extended_feature_register (amdvi_extended_feature_register is used 
in acpi-build.c). I create a stub to solve this problem and it passes 
all CI tests. I will squash the following changes into patch 6. What do 
you think about this?


diff --git a/hw/i386/amd_iommu_stub.c b/hw/i386/amd_iommu_stub.c
new file mode 100644
index 00..d62a3732e6
--- /dev/null
+++ b/hw/i386/amd_iommu_stub.c
@@ -0,0 +1,26 @@
+/*
+ * Stubs for AMD IOMMU emulation
+ *
+ * Copyright (C) 2023 Bui Quang Minh 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "amd_iommu.h"
+
+uint64_t amdvi_extended_feature_register(AMDVIState *s)
+{
+return AMDVI_DEFAULT_EXT_FEATURES;
+}
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 369c6bf823..d38637b046 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -9,7 +9,8 @@ i386_ss.add(files(

 i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
   if_false: files('x86-iommu-stub.c'))
-i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'))
+i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
+  if_false: files('amd_iommu_stub.c'))
 i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c'))
 i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 
'acpi-microvm.c', 'microvm-dt.c'))

 i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))


Thanks,
Quang Minh.



[PATCH v11 7/7] test: bios-tables-test: add IVRS changed binary

2023-12-25 Thread Bui Quang Minh
EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[098h 0152 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[099h 0153 002h]   Device ID : 0010
+[09Bh 0155 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[09Ch 0156 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[09Dh 0157 002h]   Device ID : 00F8
+[09Fh 0159 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A0h 0160 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A1h 0161 002h]   Device ID : 00FA
+[0A3h 0163 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A4h 0164 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A5h 0165 002h]   Device ID : 00FB
+[0A7h 0167 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A8h 0168 001h]   Subtable Type : 48 [Device Entry: Special 
Device]
+[0A9h 0169 002h]   Device ID : 
+[0ABh 0171 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+[0ACh 0172 001h]  Handle : 00
+[0ADh 0173 002h]   Source Used Device ID : 00A0
+[0AFh 0175 001h] Variety : 01
+
+Raw Table Data: Length 176 (0xB0)

-: 49 56 52 53 68 00 00 00 01 43 42 4F 43 48 53 20  // IVRShCBOCHS
+: 49 56 52 53 B0 00 00 00 01 74 42 4F 43 48 53 20  // IVRS.tBOCHS
 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPCBXPC
-0020: 01 00 00 00 00 28 00 00 00 00 00 00 00 00 00 00  // .(..
+0020: 01 00 00 00 01 28 00 00 00 00 00 00 00 00 00 00  // .(..
 0030: 10 D1 38 00 10 00 40 00 00 00 D8 FE 00 00 00 00  // ..8...@.
 0040: 00 00 00 00 44 00 00 00 02 00 00 00 02 08 00 00  // D...
 0050: 02 10 00 00 02 F8 00 00 02 FA 00 00 02 FB 00 00  // 
-0060: 48 00 00 00 00 A0 00 01  // H...
+0060: 48 00 00 00 00 A0 00 01 11 11 48 00 10 00 40 00  // H.H...@.
+0070: 00 00 D8 FE 00 00 00 00 00 00 00 00 00 00 00 00  // 
+0080: D3 29 00 00 00 00 00 00 00 00 00 00 00 00 00 00  // .)..
+0090: 02 00 00 00 02 08 00 00 02 10 00 00 02 F8 00 00  // 
+00A0: 02 FA 00 00 02 FB 00 00 48 00 00 00 00 A0 00 01  // ....H...

Signed-off-by: Bui Quang Minh 
---
 tests/data/acpi/q35/IVRS.ivrs   | Bin 104 -> 176 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   1 -
 2 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/q35/IVRS.ivrs
index 
17611202e53a32f7da8e4925d6955b384670b8b1..7f9e91aabc0ba7efc9f219587a4f91f0edb1
 100644
GIT binary patch
delta 63
zcmd1Uz{uqp78JaJfq{XsWFnUoBjZF>XH`K#4+a4S2cY1Me?S5bE^ES>3=9)pl>vea
B3FiO+

delta 22
dcmdnMn8D>478IPpz`(%hJdsO^kzt~$GXOlo1j7IT

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index ac420db6b7..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b

[PATCH v11 1/7] i386/tcg: implement x2APIC registers MSR access

2023-12-25 Thread Bui Quang Minh
This commit creates apic_register_read/write which are used by both
apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access.

The apic_msr_read/write returns -1 on error, accelerator can use this to
raise the appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 122 ---
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   3 +
 target/i386/cpu.h|   3 +
 target/i386/tcg/sysemu/misc_helper.c |  27 ++
 5 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..7a349c0723 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,24 +643,19 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+static int apic_register_read(int index, uint64_t *value)
 {
 DeviceState *dev;
 APICCommonState *s;
 uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+int ret = 0;
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return 0;
+return -1;
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
 val = 0;
+ret = -1;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+*value = val;
+return ret;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint64_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+apic_register_read(index, &val);
+
 return val;
 }
 
+int apic_msr_read(int index, uint64_t *val)
+{
+DeviceState *dev;
+
+dev = cpu_get_current_apic();
+if (!dev) {
+return -1;
+}
+
+if (!is_x2apic_mode(dev)) {
+return -1;
+}
+
+return apic_register_read(index, val);
+}
+
 static void apic_send_msi(MSIMessage *msi)
 {
 uint64_t addr = msi->address;
@@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+static int apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return;
+return -1;
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 break;
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
-break;
+return -1;
 }
+
+return 0;
+}
+
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/*
+ * MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa.
+ */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
+int apic_msr_write(int index, uint64_t val)
+{
+Dev

[PATCH v11 3/7] apic, i386/tcg: add x2apic transitions

2023-12-25 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

The set_base in APICCommonClass now returns an integer to indicate error in
execution. apic_set_base return -1 on invalid APIC state transition,
accelerator can use this to raise appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/kvm/apic.c   |  3 +-
 hw/i386/xen/xen_apic.c   |  3 +-
 hw/intc/apic.c   | 62 +++-
 hw/intc/apic_common.c| 13 +++---
 include/hw/i386/apic.h   |  2 +-
 include/hw/i386/apic_internal.h  |  2 +-
 target/i386/cpu.c|  9 ++--
 target/i386/cpu.h|  4 ++
 target/i386/tcg/sysemu/misc_helper.c | 14 ++-
 target/i386/whpx/whpx-apic.c |  3 +-
 10 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 1e89ca0899..a72c28e8a7 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -95,9 +95,10 @@ void kvm_get_apic_state(DeviceState *dev, struct 
kvm_lapic_state *kapic)
 apic_next_timer(s, s->initial_count_load_time);
 }
 
-static void kvm_apic_set_base(APICCommonState *s, uint64_t val)
+static int kvm_apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = val;
+return 0;
 }
 
 static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 7c7a60b166..101e16a766 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -49,8 +49,9 @@ static void xen_apic_realize(DeviceState *dev, Error **errp)
 msi_nonbroken = true;
 }
 
-static void xen_apic_set_base(APICCommonState *s, uint64_t val)
+static int xen_apic_set_base(APICCommonState *s, uint64_t val)
 {
+return 0;
 }
 
 static void xen_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 178fb26b47..1d887d66b8 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -308,8 +308,49 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
-static void apic_set_base(APICCommonState *s, uint64_t val)
+static int apic_set_base_check(APICCommonState *s, uint64_t val)
 {
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD) {
+return -1;
+}
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+return 0;
+}
+
+static int apic_set_base(APICCommonState *s, uint64_t val)
+{
+if (apic_set_base_check(s, val) < 0) {
+return -1;
+}
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -318,6 +359,25 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
+
+return 0;
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4bc3d2f149..b13a7b0457 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c

[PATCH v11 5/7] test: bios-tables-test: prepare IVRS change in ACPI table

2023-12-25 Thread Bui Quang Minh
Following the instructions in bios-tables-test, this lists that IVRS.ivrs
in ACPI table will be changed to add new IVHD type 0x11.

Signed-off-by: Bui Quang Minh 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..ac420db6b7 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/IVRS.ivrs",
-- 
2.25.1




[PATCH v11 6/7] amd_iommu: report x2APIC support to the operating system

2023-12-25 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 80db183b78..f17fb63be3 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2333,30 +2333,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2376,56 +2369,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v11 4/7] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-12-25 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Acked-by: Peter Xu 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5085a6fee3..cb6ce4a646 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4124,11 +4124,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (kvm_enabled() && !kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v11 0/7] Support x2APIC mode with TCG accelerator

2023-12-25 Thread Bui Quang Minh
er function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC support
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (7):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  test: bios-tables-test: prepare IVRS change in ACPI table
  amd_iommu: report x2APIC support to the operating system
  test: bios-tables-test: add IVRS changed binary

 hw/i386/acpi-build.c | 129 +---
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|   6 +-
 hw/i386/kvm/apic.c   |   3 +-
 hw/i386/x86.c|   6 +-
 hw/i386/xen/xen_apic.c   |   3 +-
 hw/intc/apic.c   | 473 +--
 hw/intc/apic_common.c|  22 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   8 +-
 include/hw/i386/apic_internal.h  |   9 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   9 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  41 ++-
 target/i386/whpx/whpx-apic.c |   3 +-
 tests/data/acpi/q35/IVRS.ivrs| Bin 104 -> 176 bytes
 18 files changed, 600 insertions(+), 188 deletions(-)

-- 
2.25.1




[PATCH v11 2/7] apic: add support for x2APIC mode

2023-12-25 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   6 +-
 hw/intc/apic.c  | 289 
 hw/intc/apic_common.c   |   9 +
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|  18 +-
 target/i386/cpu.h   |   2 +
 7 files changed, 259 insertions(+), 75 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 2b6291ad8d..3d1bdd334e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -137,7 +137,7 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * a literal `0` in configurations where kvm_* aren't defined)
  */
 if (kvm_enabled() && x86ms->apic_id_limit > 255 &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -147,6 +147,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 7a349c0723..178fb26b47 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -32,14 +32,13 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
-
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for (__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
-for(__j = 0; __j < 32; __j++) {\
+for (__j = 0; __j < 32; __j++) {\
 if (__mask & (1U << __j)) {\
 apic = local_apics[__i * 32 + __j];\
 if (apic) {\
@@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for (i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
 
 trace_apic_deliver_irq(dest, d

[PATCH v10 5/7] test: bios-tables-test: prepare IVRS change in ACPI table

2023-11-09 Thread Bui Quang Minh
Following the instructions in bios-tables-test, this lists that IVRS.ivrs
in ACPI table will be changed to add new IVHD type 0x11.

Signed-off-by: Bui Quang Minh 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..ac420db6b7 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/IVRS.ivrs",
-- 
2.25.1




[PATCH v10 6/7] amd_iommu: report x2APIC support to the operating system

2023-11-09 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 80db183b78..f17fb63be3 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2333,30 +2333,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2376,56 +2369,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v10 3/7] apic, i386/tcg: add x2apic transitions

2023-11-09 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

The set_base in APICCommonClass now returns an integer to indicate error in
execution. apic_set_base return -1 on invalid APIC state transition,
accelerator can use this to raise appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/kvm/apic.c   |  3 +-
 hw/i386/xen/xen_apic.c   |  3 +-
 hw/intc/apic.c   | 62 +++-
 hw/intc/apic_common.c| 13 +++---
 include/hw/i386/apic.h   |  2 +-
 include/hw/i386/apic_internal.h  |  2 +-
 target/i386/cpu.c|  9 ++--
 target/i386/cpu.h|  4 ++
 target/i386/tcg/sysemu/misc_helper.c | 14 ++-
 target/i386/whpx/whpx-apic.c |  3 +-
 10 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 1e89ca0899..a72c28e8a7 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -95,9 +95,10 @@ void kvm_get_apic_state(DeviceState *dev, struct 
kvm_lapic_state *kapic)
 apic_next_timer(s, s->initial_count_load_time);
 }
 
-static void kvm_apic_set_base(APICCommonState *s, uint64_t val)
+static int kvm_apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = val;
+return 0;
 }
 
 static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 7c7a60b166..101e16a766 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -49,8 +49,9 @@ static void xen_apic_realize(DeviceState *dev, Error **errp)
 msi_nonbroken = true;
 }
 
-static void xen_apic_set_base(APICCommonState *s, uint64_t val)
+static int xen_apic_set_base(APICCommonState *s, uint64_t val)
 {
+return 0;
 }
 
 static void xen_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 178fb26b47..1d887d66b8 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -308,8 +308,49 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
-static void apic_set_base(APICCommonState *s, uint64_t val)
+static int apic_set_base_check(APICCommonState *s, uint64_t val)
 {
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD) {
+return -1;
+}
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+return 0;
+}
+
+static int apic_set_base(APICCommonState *s, uint64_t val)
+{
+if (apic_set_base_check(s, val) < 0) {
+return -1;
+}
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -318,6 +359,25 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
+
+return 0;
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4bc3d2f149..b13a7b0457 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c

[PATCH v10 7/7] test: bios-tables-test: add IVRS changed binary

2023-11-09 Thread Bui Quang Minh
EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[098h 0152 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[099h 0153 002h]   Device ID : 0010
+[09Bh 0155 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[09Ch 0156 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[09Dh 0157 002h]   Device ID : 00F8
+[09Fh 0159 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A0h 0160 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A1h 0161 002h]   Device ID : 00FA
+[0A3h 0163 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A4h 0164 001h]   Subtable Type : 02 [Device Entry: Select One 
Device]
+[0A5h 0165 002h]   Device ID : 00FB
+[0A7h 0167 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+
+[0A8h 0168 001h]   Subtable Type : 48 [Device Entry: Special 
Device]
+[0A9h 0169 002h]   Device ID : 
+[0ABh 0171 001h] Data Setting (decoded below) : 00
+INITPass : 0
+EIntPass : 0
+ NMIPass : 0
+Reserved : 0
+ System MGMT : 0
+  LINT0 Pass : 0
+  LINT1 Pass : 0
+[0ACh 0172 001h]  Handle : 00
+[0ADh 0173 002h]   Source Used Device ID : 00A0
+[0AFh 0175 001h] Variety : 01
+
+Raw Table Data: Length 176 (0xB0)

-: 49 56 52 53 68 00 00 00 01 43 42 4F 43 48 53 20  // IVRShCBOCHS
+: 49 56 52 53 B0 00 00 00 01 74 42 4F 43 48 53 20  // IVRS.tBOCHS
 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPCBXPC
-0020: 01 00 00 00 00 28 00 00 00 00 00 00 00 00 00 00  // .(..
+0020: 01 00 00 00 01 28 00 00 00 00 00 00 00 00 00 00  // .(..
 0030: 10 D1 38 00 10 00 40 00 00 00 D8 FE 00 00 00 00  // ..8...@.
 0040: 00 00 00 00 44 00 00 00 02 00 00 00 02 08 00 00  // D...
 0050: 02 10 00 00 02 F8 00 00 02 FA 00 00 02 FB 00 00  // 
-0060: 48 00 00 00 00 A0 00 01  // H...
+0060: 48 00 00 00 00 A0 00 01 11 11 48 00 10 00 40 00  // H.H...@.
+0070: 00 00 D8 FE 00 00 00 00 00 00 00 00 00 00 00 00  // 
+0080: D3 29 00 00 00 00 00 00 00 00 00 00 00 00 00 00  // .)..
+0090: 02 00 00 00 02 08 00 00 02 10 00 00 02 F8 00 00  // 
+00A0: 02 FA 00 00 02 FB 00 00 48 00 00 00 00 A0 00 01  // ....H...

Signed-off-by: Bui Quang Minh 
---
 tests/data/acpi/q35/IVRS.ivrs   | Bin 104 -> 176 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   1 -
 2 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/q35/IVRS.ivrs
index 
17611202e53a32f7da8e4925d6955b384670b8b1..7f9e91aabc0ba7efc9f219587a4f91f0edb1
 100644
GIT binary patch
delta 63
zcmd1Uz{uqp78JaJfq{XsWFnUoBjZF>XH`K#4+a4S2cY1Me?S5bE^ES>3=9)pl>vea
B3FiO+

delta 22
dcmdnMn8D>478IPpz`(%hJdsO^kzt~$GXOlo1j7IT

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index ac420db6b7..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b

[PATCH v10 4/7] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-11-09 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Acked-by: Peter Xu 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5085a6fee3..cb6ce4a646 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4124,11 +4124,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (kvm_enabled() && !kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v10 2/7] apic: add support for x2APIC mode

2023-11-09 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   6 +-
 hw/intc/apic.c  | 289 
 hw/intc/apic_common.c   |   9 +
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|  18 +-
 target/i386/cpu.h   |   2 +
 7 files changed, 259 insertions(+), 75 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b3d054889b..9d3f8b9b4e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -133,7 +133,7 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * both in-kernel lapic and X2APIC userspace API.
  */
 if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -143,6 +143,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 7a349c0723..178fb26b47 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -32,14 +32,13 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
-
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for (__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
-for(__j = 0; __j < 32; __j++) {\
+for (__j = 0; __j < 32; __j++) {\
 if (__mask & (1U << __j)) {\
 apic = local_apics[__i * 32 + __j];\
 if (apic) {\
@@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for (i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
 
 trace_apic_deliver_irq(dest, dest_m

[PATCH v10 1/7] i386/tcg: implement x2APIC registers MSR access

2023-11-09 Thread Bui Quang Minh
This commit creates apic_register_read/write which are used by both
apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access.

The apic_msr_read/write returns -1 on error, accelerator can use this to
raise the appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 122 ---
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   3 +
 target/i386/cpu.h|   3 +
 target/i386/tcg/sysemu/misc_helper.c |  27 ++
 5 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..7a349c0723 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,24 +643,19 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+static int apic_register_read(int index, uint64_t *value)
 {
 DeviceState *dev;
 APICCommonState *s;
 uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+int ret = 0;
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return 0;
+return -1;
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
 val = 0;
+ret = -1;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+*value = val;
+return ret;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint64_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+apic_register_read(index, &val);
+
 return val;
 }
 
+int apic_msr_read(int index, uint64_t *val)
+{
+DeviceState *dev;
+
+dev = cpu_get_current_apic();
+if (!dev) {
+return -1;
+}
+
+if (!is_x2apic_mode(dev)) {
+return -1;
+}
+
+return apic_register_read(index, val);
+}
+
 static void apic_send_msi(MSIMessage *msi)
 {
 uint64_t addr = msi->address;
@@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+static int apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return;
+return -1;
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 break;
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
-break;
+return -1;
 }
+
+return 0;
+}
+
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/*
+ * MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa.
+ */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
+int apic_msr_write(int index, uint64_t val)
+{
+Dev

[PATCH v10 0/7] Support x2APIC mode with TCG accelerator

2023-11-09 Thread Bui Quang Minh
0 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (7):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  test: bios-tables-test: prepare IVRS change in ACPI table
  amd_iommu: report x2APIC support to the operating system
  test: bios-tables-test: add IVRS changed binary

 hw/i386/acpi-build.c | 129 +---
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|   6 +-
 hw/i386/kvm/apic.c   |   3 +-
 hw/i386/x86.c|   6 +-
 hw/i386/xen/xen_apic.c   |   3 +-
 hw/intc/apic.c   | 473 +--
 hw/intc/apic_common.c|  22 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   8 +-
 include/hw/i386/apic_internal.h  |   9 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   9 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  41 ++-
 target/i386/whpx/whpx-apic.c |   3 +-
 tests/data/acpi/q35/IVRS.ivrs| Bin 104 -> 176 bytes
 18 files changed, 600 insertions(+), 188 deletions(-)

-- 
2.25.1




Re: [PATCH v9 0/5] Support x2APIC mode with TCG accelerator

2023-11-09 Thread Bui Quang Minh

On 11/9/23 21:32, Joao Martins wrote:

On 09/11/2023 14:10, Bui Quang Minh wrote:

On 11/9/23 17:11, Santosh Shukla wrote:

On 10/24/2023 8:51 PM, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
    -smp 2,maxcpus=260 \
    -cpu qemu64,x2apic=on \
    -machine q35 \
    -device intel-iommu,intremap=on,eim=on \
    -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
    -m 2G \
    -kernel $KERNEL_DIR \
    -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial
net.ifnames=0" \
    -drive file=$IMAGE_DIR,format=raw \
    -nographic \
    -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
    -smp 2,maxcpus=260 \
    -cpu qemu64,x2apic=on \
    -machine q35 \
    -device amd-iommu,intremap=on,xtsup=on \
    -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
    -m 2G \
    -kernel $KERNEL_DIR \
    -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial
net.ifnames=0" \
    -drive file=$IMAGE_DIR,format=raw \
    -nographic \
    -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

  if ((str = getenv("TEST_DEVICE")))
  no_test_device = !atol(str);
+   no_test_device = true;

  if ((str = getenv("MEMLIMIT")))
  fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

    FAIL: apic_disable: *0xfee00030: 50014
    FAIL: apic_disable: *0xfee00080: f0
    FAIL: apic_disable: *0xfee00030: 50014
    FAIL: apic_disable: *0xfee00080: f0
    FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

    FAIL: nmi-after-sti
    FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

    FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 9 changes,


Hi Bui,

I have tested v9 on EPYC-Genoa system with kvm acceleration mode on, I could
see > 255 vCPU for Linux and Windows Guest.

Tested-by: Santosh Shukla 


Hi Santosh,

With KVM enabled, you may be using the in kernel APIC from KVM not the emulated
APIC in userspace as in this series.



Your XTSup code isn't necessarily userspace APIC specific. You can have
accel=kvm with split irqchip and things will still work. I suspect that's how
Santosh tested it.


Ah, I got it. Thanks Santosh, Joao.
Quang Minh.



Re: [PATCH v9 5/5] amd_iommu: report x2APIC support to the operating system

2023-11-09 Thread Bui Quang Minh

On 11/9/23 02:44, Michael S. Tsirkin wrote:

On Wed, Nov 08, 2023 at 09:22:18PM +0700, Bui Quang Minh wrote:

On 11/7/23 07:39, Michael S. Tsirkin wrote:

On Tue, Oct 24, 2023 at 10:21:05PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 



changes IVRS without updating expected files for tests.
result seems to be CI failures:
https://gitlab.com/mstredhat/qemu/-/jobs/5470533834



Thanks Michael, I am preparing the fix in the next version. I've read the
instructions to update the test data in bios-tables-test.c. It says I need
to create some separate patches to update the test data. Are there any
reasons for this? I intend to change the binary and include the ASL diff
into the commit message. Is it enough?


No, not enough.  No, do not ignore the rules please.  Yes, there's a
reason.  The reason is that I need to be able to rebase your patches.  I
then regenerate the binaries. If the patch includes binaries it won't
rebase.


Okay, I got it. I will prepare the fix in the next version.

Thanks,
Quang Minh.



Re: [PATCH v9 0/5] Support x2APIC mode with TCG accelerator

2023-11-09 Thread Bui Quang Minh

On 11/9/23 17:11, Santosh Shukla wrote:

On 10/24/2023 8:51 PM, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

 if ((str = getenv("TEST_DEVICE")))
 no_test_device = !atol(str);
+   no_test_device = true;

 if ((str = getenv("MEMLIMIT")))
 fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

   FAIL: nmi-after-sti
   FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

   FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 9 changes,


Hi Bui,

I have tested v9 on EPYC-Genoa system with kvm acceleration mode on, I could
see > 255 vCPU for Linux and Windows Guest.

Tested-by: Santosh Shukla 


Hi Santosh,

With KVM enabled, you may be using the in kernel APIC from KVM not the 
emulated APIC in userspace as in this series.


Thanks,
Quang Minh.



Re: [PATCH v9 5/5] amd_iommu: report x2APIC support to the operating system

2023-11-08 Thread Bui Quang Minh

On 11/7/23 07:39, Michael S. Tsirkin wrote:

On Tue, Oct 24, 2023 at 10:21:05PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 



changes IVRS without updating expected files for tests.
result seems to be CI failures:
https://gitlab.com/mstredhat/qemu/-/jobs/5470533834



Thanks Michael, I am preparing the fix in the next version. I've read 
the instructions to update the test data in bios-tables-test.c. It says 
I need to create some separate patches to update the test data. Are 
there any reasons for this? I intend to change the binary and include 
the ASL diff into the commit message. Is it enough?




Re: [PATCH v8 1/5] i386/tcg: implement x2APIC registers MSR access

2023-10-24 Thread Bui Quang Minh

On 10/22/23 20:59, Phil Dennis-Jordan wrote:

I can confirm that this works. The build issue obviously needs fixing,
but once that's fixed, this improves on the status quo.

I've tested this and patch 2/5 with x2apic CPUID bit enabled with the
hvf backend on macOS. To make it work in hvf mode, I used the attached
additional minimal patch to wire it up, but with that in place it
noticeably improves guest OS performance. (This patch doesn't yet
implement raising exceptions or checking for x2apic mode, more on that
in my comments below.)

Reviewed-by: Phil Dennis-Jordan 

On Tue, 26 Sept 2023 at 18:08, Bui Quang Minh  wrote:

@@ -455,6 +469,19 @@ void helper_rdmsr(CPUX86State *env)
  val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16);
  break;
  }
+case MSR_APIC_START ... MSR_APIC_END: {
+int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+if (!is_x2apic_mode(env_archcpu(env)->apic_state)) {
+raise_exception_ra(env, EXCP0D_GPF, GETPC());
+}
+
+qemu_mutex_lock_iothread();
+val = apic_register_read(index);
+qemu_mutex_unlock_iothread();


Shouldn't the x2apic mode check technically be inside the lock?
Furthermore, we need the mode check logic in each accelerator whose
MSR read and write we wire up. Finally, there's the exception raising
issue which Michael noted.

So my suggestion would be to wrap the x2apic mode check and the call
to the lower level apic_register_read into a standalone
apic_x2apic_msr_read() or similar, and the equivalent for writes.
These functions should then also return success or failure, the latter
indicating an exception should be raised. Raising the exception can
then also be implemented for each accelerator at the relevant call
site. That contains the raise_exception_ra call in the TCG specific
code, and I can do the equivalent on the hvf side.


Thanks a lot for your suggestion, I've taken this approach and 
implemented an apic_msr_read/write wrapper as you suggested in version 9 
(https://lore.kernel.org/qemu-devel/20231024152105.35942-1-minhquangbu...@gmail.com/)


Thank you,
Quang Minh.



[PATCH v9 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-10-24 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Acked-by: Peter Xu 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e4f6cedcb1..848511b7f8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4049,11 +4049,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (kvm_enabled() && !kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v9 5/5] amd_iommu: report x2APIC support to the operating system

2023-10-24 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 3f2b27cf75..8069971e54 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2337,30 +2337,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2380,56 +2373,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v9 1/5] i386/tcg: implement x2APIC registers MSR access

2023-10-24 Thread Bui Quang Minh
This commit creates apic_register_read/write which are used by both
apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access.

The apic_msr_read/write returns -1 on error, accelerator can use this to
raise the appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 122 ---
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   3 +
 target/i386/cpu.h|   3 +
 target/i386/tcg/sysemu/misc_helper.c |  27 ++
 5 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..7a349c0723 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,24 +643,19 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+static int apic_register_read(int index, uint64_t *value)
 {
 DeviceState *dev;
 APICCommonState *s;
 uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+int ret = 0;
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return 0;
+return -1;
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
 val = 0;
+ret = -1;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+*value = val;
+return ret;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint64_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+apic_register_read(index, &val);
+
 return val;
 }
 
+int apic_msr_read(int index, uint64_t *val)
+{
+DeviceState *dev;
+
+dev = cpu_get_current_apic();
+if (!dev) {
+return -1;
+}
+
+if (!is_x2apic_mode(dev)) {
+return -1;
+}
+
+return apic_register_read(index, val);
+}
+
 static void apic_send_msi(MSIMessage *msi)
 {
 uint64_t addr = msi->address;
@@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+static int apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
-return;
+return -1;
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 break;
 default:
 s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
-break;
+return -1;
 }
+
+return 0;
+}
+
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/*
+ * MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa.
+ */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
+int apic_msr_write(int index, uint64_t val)
+{
+Dev

[PATCH v9 3/5] apic, i386/tcg: add x2apic transitions

2023-10-24 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

The set_base in APICCommonClass now returns an integer to indicate error in
execution. apic_set_base return -1 on invalid APIC state transition,
accelerator can use this to raise appropriate exception.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/kvm/apic.c   |  3 +-
 hw/i386/xen/xen_apic.c   |  3 +-
 hw/intc/apic.c   | 62 +++-
 hw/intc/apic_common.c| 13 +++---
 include/hw/i386/apic.h   |  2 +-
 include/hw/i386/apic_internal.h  |  2 +-
 target/i386/cpu.c|  9 ++--
 target/i386/cpu.h|  4 ++
 target/i386/tcg/sysemu/misc_helper.c | 14 ++-
 target/i386/whpx/whpx-apic.c |  3 +-
 10 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 1e89ca0899..a72c28e8a7 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -95,9 +95,10 @@ void kvm_get_apic_state(DeviceState *dev, struct 
kvm_lapic_state *kapic)
 apic_next_timer(s, s->initial_count_load_time);
 }
 
-static void kvm_apic_set_base(APICCommonState *s, uint64_t val)
+static int kvm_apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = val;
+return 0;
 }
 
 static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 7c7a60b166..101e16a766 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -49,8 +49,9 @@ static void xen_apic_realize(DeviceState *dev, Error **errp)
 msi_nonbroken = true;
 }
 
-static void xen_apic_set_base(APICCommonState *s, uint64_t val)
+static int xen_apic_set_base(APICCommonState *s, uint64_t val)
 {
+return 0;
 }
 
 static void xen_apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 84d428a875..f9e54d92b3 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -308,8 +308,49 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
-static void apic_set_base(APICCommonState *s, uint64_t val)
+static int apic_set_base_check(APICCommonState *s, uint64_t val)
 {
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD) {
+return -1;
+}
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD)) {
+return -1;
+}
+
+return 0;
+}
+
+static int apic_set_base(APICCommonState *s, uint64_t val)
+{
+if (apic_set_base_check(s, val) < 0) {
+return -1;
+}
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -318,6 +359,25 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
+
+return 0;
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4bc3d2f149..b13a7b0457 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c

[PATCH v9 2/5] apic: add support for x2APIC mode

2023-10-24 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   6 +-
 hw/intc/apic.c  | 280 
 hw/intc/apic_common.c   |   9 +
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|  18 +-
 target/i386/cpu.h   |   2 +
 7 files changed, 250 insertions(+), 75 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b3d054889b..9d3f8b9b4e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -133,7 +133,7 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * both in-kernel lapic and X2APIC userspace API.
  */
 if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -143,6 +143,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 7a349c0723..84d428a875 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -32,14 +32,13 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
-
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for (__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
-for(__j = 0; __j < 32; __j++) {\
+for (__j = 0; __j < 32; __j++) {\
 if (__mask & (1U << __j)) {\
 apic = local_apics[__i * 32 + __j];\
 if (apic) {\
@@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for (i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
 
 trace_apic_deliver_irq(dest, dest_m

[PATCH v9 0/5] Support x2APIC mode with TCG accelerator

2023-10-24 Thread Bui Quang Minh
s supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c | 129 +---
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|   6 +-
 hw/i386/kvm/apic.c   |   3 +-
 hw/i386/x86.c|   6 +-
 hw/i386/xen/xen_apic.c   |   3 +-
 hw/intc/apic.c   | 464 +--
 hw/intc/apic_common.c|  22 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   8 +-
 include/hw/i386/apic_internal.h  |   9 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   9 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  41 ++-
 target/i386/whpx/whpx-apic.c |   3 +-
 17 files changed, 591 insertions(+), 188 deletions(-)

-- 
2.25.1




[RFC PATCH] tcg, apic: create a separate root memory region for each CPU

2023-10-05 Thread Bui Quang Minh

Currently, by default, every TCG cpu has root memory region to the same
system memory region. In order to support APIC MMIO relocation as well as
correctly disable APIC MMIO in disabled and x2APIC state, in this commit,
we create a separate root memory region for every CPU. The system memory
region is added to this container root memory region, later an separate
APIC MMIO is added with higher priority than system memory . With separate
APIC MMIO region per CPU, APIC MMIO relocation and disable can be done per
CPU without interfering without others.

Because the MSI base address is the same as APIC MMIO, the MMIO region
currently serves 2 purposes: APIC register access and MSI handler. In case
no interrupt remapping device is used, devices send MSI by writing to
0xfee0 in system memory. However, as we move APIC MMIO out of system
memory we need to change the device code to call apic_send_msi instead of
writing to system memory.

This commit passes the APIC MMIO relocation test in kvm-unit-tests, still
fails APIC disable, however, I think we should treat those as pass

Before:
FAIL: apic_disable: *0xfee00030: 50014
FAIL: apic_disable: *0xfee00080: f0
FAIL: apic_disable: *0xfee00030: 50014
FAIL: apic_disable: *0xfee00080: f0
After:
FAIL: apic_disable: *0xfee00030: 0
FAIL: apic_disable: *0xfee00080: 0
FAIL: apic_disable: *0xfee00030: 0
FAIL: apic_disable: *0xfee00080: 0

Before this commit, we still can read APIC register, after this commit we
cannot. However, the test memset disabled MMIO region with 0xff and expects
to read back the 0xff value. As we disable APIC MMIO memory region, the
write is dispatched to system memory which has unassigned read/write
operation, so read returns 0 and write has no effect.

This commit is tested on booting up Linux kernel with and without Intel/AMD
IOMMU on enabled/disabled x2APIC CPUs.

The memory region tree when running without interrupt remapping device

~ info mtree
Before:
address-space: cpu-memory-0
address-space: cpu-memory-1
address-space: memory
  - (prio 0, i/o): system
After:
address-space: cpu-memory-0
  - (prio 0, i/o): memory
- (prio 0, i/o): alias memory
  @system -
fee0-feef (prio 4096, i/o): apic-msi

address-space: cpu-memory-1
  - (prio 0, i/o): memory
- (prio 0, i/o): alias memory
  @system -
fee0-feef (prio 4096, i/o): apic-msi

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 24 ++--
 hw/intc/ioapic.c | 32 +++-
 hw/pci/pci.c | 25 +++--
 include/hw/i386/apic.h   |  1 +
 target/i386/cpu-sysemu.c | 14 +++---
 target/i386/tcg/sysemu/tcg-cpu.c | 13 +
 6 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index b8f56836a6..0bd5b5d1f9 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -340,17 +340,34 @@ static void apic_set_base_check(APICCommonState 
*s, uint64_t val)

 raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
 }

+/* set base address and enable/disable APIC MMIO */
+static void apic_io_memory_set_base_enabled(APICCommonState *s,
+hwaddr address,
+bool enabled)
+{
+if (tcg_enabled()) {
+qemu_mutex_lock_iothread();
+memory_region_set_address(&s->io_memory, address);
+memory_region_set_enabled(&s->io_memory, enabled);
+qemu_mutex_unlock_iothread();
+}
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+hwaddr new_base = val & MSR_IA32_APICBASE_BASE;
+bool enabled = true;
+
 apic_set_base_check(s, val);

-s->apicbase = (val & 0xf000) |
+s->apicbase = new_base |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | 
MSR_IA32_APICBASE_ENABLE));

 /* if disabled, cannot be enabled again */
 if (!(val & MSR_IA32_APICBASE_ENABLE)) {
 s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
+enabled = false;
 }

 /* Transition from disabled mode to xAPIC */
@@ -368,7 +385,10 @@ static void apic_set_base(APICCommonState *s, 
uint64_t val)


 s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
   (1 << (s->initial_apic_id & 0xf));
+enabled = false;
 }
+
+  

Re: [PATCH v8 0/5] Support x2APIC mode with TCG accelerator

2023-10-05 Thread Bui Quang Minh

On 9/26/23 23:06, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

 if ((str = getenv("TEST_DEVICE")))
 no_test_device = !atol(str);
+   no_test_device = true;

 if ((str = getenv("MEMLIMIT")))
 fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.


I've tried to address these failed tests with the idea of creating 
separate APIC MMIO region per CPU. I've created a working patch with 
this approach and will send it in reply to this message, you can see the 
detail in the patch. However, it has a big drawback, it breaks MSI 
handler. With that patch, device needs to call apic_send_msi directly 
instead of writing to 0xfee0 in system memory. Furthermore, I think 
APIC MMIO relocation is a very unusual use case and APIC MMIO disable is 
not much important for normal system software. I'm pleased to receive 
any comments on that patch.


Thank you,
Quang Minh.



Re: [PATCH v8 0/5] Support x2APIC mode with TCG accelerator

2023-10-04 Thread Bui Quang Minh

On 10/4/23 13:51, Michael S. Tsirkin wrote:

On Tue, Sep 26, 2023 at 11:23:53PM +0700, Bui Quang Minh wrote:

On 9/26/23 23:06, Bui Quang Minh wrote:


Version 8 changes,
- Patch 2, 4:
+ Rebase to master and resolve conflicts in these 2 patches


The conflicts when rebasing is due to the commit 9926cf34de5fa15da
("target/i386: Allow elision of kvm_enable_x2apic()"). AFAIK, this commit
adds kvm_enabled() before kvm_enable_x2apic() in the and (&&) expression so
that when kvm_enabled() is known to be false at the compile time
(CONFIG_KVM_IS_POSSIBLE is undefined), the compiler can omit the
kvm_enable_x2apic() in the and expression.

In patch 2, I simply combine the change logic in patch 2 with logic in the
commit 9926cf34de5fa15da.

In patch 4, the end result of version 8 is the same as version 7. I don't
think we need to add the kvm_enabled() to make the expression become

if (kvm_enabled() && kvm_irqchip_is_split() && !kvm_enable_x2apic())

Because when CONFIG_KVM_IS_POSSIBLE is undefined, kvm_irqchip_is_split() is
known to be false at the compile time too so just keep the expression as

if (kvm_irqchip_is_split() && !kvm_enable_x2apic())

is enough.


git range-diff feat/tcg-x2apic-v7~5..feat/tcg-x2apic-v7

feat/tcg-x2apic-v8~5..feat/tcg-x2apic-v8

1:  c1d197a230 = 1:  f6e3918e0f i386/tcg: implement x2APIC registers MSR
access
2:  dd96cb0238 ! 2:  54d44a15b6 apic: add support for x2APIC mode
 @@ Commit message

   ## hw/i386/x86.c ##
  @@ hw/i386/x86.c: void x86_cpus_init(X86MachineState *x86ms, int
default_cpu_version)
 -  * Can we support APIC ID 255 or higher?
 -  *
 -  * Under Xen: yes.
 -- * With userspace emulated lapic: no
 -+ * With userspace emulated lapic: checked later in
apic_common_set_id.
 -  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
 +  * both in-kernel lapic and X2APIC userspace API.
*/
 - if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
 + if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
  -(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
  +kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
   error_report("current -smp configuration requires kernel "
3:  31a5c555a6 = 3:  eb080d1e2c apic, i386/tcg: add x2apic transitions
4:  d78b5c43b4 ! 4:  59f028f119 intel_iommu: allow Extended Interrupt Mode
when using userspace APIC
 @@ hw/i386/intel_iommu.c: static bool vtd_decide_config(IntelIOMMUState
*s, Error *
  -error_setg(errp, "eim=on requires
accel=kvm,kernel-irqchip=split");
  -return false;
  -}
 --if (!kvm_enable_x2apic()) {
 +-if (kvm_enabled() && !kvm_enable_x2apic()) {
  +if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
   error_setg(errp, "eim=on requires support on the KVM side"
"(X2APIC_API, first shipped in v4.7)");
5:  51f558035d = 5:  bc95c3cb60 amd_iommu: report x2APIC support to the
operating system

As the change is minor and does not change the main logic, I keep the
Reviewed-by and Acked-by tags.

Thank you,
Quang Minh.




Causes some build failures:

https://gitlab.com/mstredhat/qemu/-/jobs/5216377483
/builds/mstredhat/qemu/build/../hw/intc/apic.c:1023: undefined reference to 
`raise_exception_ra'


raise_exception_ra is tcg specific so the builds are failed as tcg is 
disabled. I will remove the use of raise_exception_ra, the invalid 
register read just returns 0, invalid register write has no effect 
without raising the exception anymore. The APIC state invalid transition 
does not raise exception either, just don't change the APIC state. As a 
side effect, we fail some more KVM unit test of invalid APIC state 
transition, as they expect to catch exception in these cases. I think 
it's not a big problem. What's your opinion?


Thank you,
Quang Minh.



Re: [PATCH v8 0/5] Support x2APIC mode with TCG accelerator

2023-09-26 Thread Bui Quang Minh

On 9/26/23 23:06, Bui Quang Minh wrote:


Version 8 changes,
- Patch 2, 4:
   + Rebase to master and resolve conflicts in these 2 patches


The conflicts when rebasing is due to the commit 9926cf34de5fa15da 
("target/i386: Allow elision of kvm_enable_x2apic()"). AFAIK, this 
commit adds kvm_enabled() before kvm_enable_x2apic() in the and (&&) 
expression so that when kvm_enabled() is known to be false at the 
compile time (CONFIG_KVM_IS_POSSIBLE is undefined), the compiler can 
omit the kvm_enable_x2apic() in the and expression.


In patch 2, I simply combine the change logic in patch 2 with logic in 
the commit 9926cf34de5fa15da.


In patch 4, the end result of version 8 is the same as version 7. I 
don't think we need to add the kvm_enabled() to make the expression become


if (kvm_enabled() && kvm_irqchip_is_split() && !kvm_enable_x2apic())

Because when CONFIG_KVM_IS_POSSIBLE is undefined, kvm_irqchip_is_split() 
is known to be false at the compile time too so just keep the expression as


if (kvm_irqchip_is_split() && !kvm_enable_x2apic())

is enough.

> git range-diff feat/tcg-x2apic-v7~5..feat/tcg-x2apic-v7 
feat/tcg-x2apic-v8~5..feat/tcg-x2apic-v8


1:  c1d197a230 = 1:  f6e3918e0f i386/tcg: implement x2APIC registers MSR 
access

2:  dd96cb0238 ! 2:  54d44a15b6 apic: add support for x2APIC mode
@@ Commit message

  ## hw/i386/x86.c ##
 @@ hw/i386/x86.c: void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)

-  * Can we support APIC ID 255 or higher?
-  *
-  * Under Xen: yes.
-- * With userspace emulated lapic: no
-+ * With userspace emulated lapic: checked later in 
apic_common_set_id.

-  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+  * both in-kernel lapic and X2APIC userspace API.
   */
- if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+ if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
 -(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
 +kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
  error_report("current -smp configuration requires kernel "
3:  31a5c555a6 = 3:  eb080d1e2c apic, i386/tcg: add x2apic transitions
4:  d78b5c43b4 ! 4:  59f028f119 intel_iommu: allow Extended Interrupt 
Mode when using userspace APIC
@@ hw/i386/intel_iommu.c: static bool 
vtd_decide_config(IntelIOMMUState *s, Error *
 -error_setg(errp, "eim=on requires 
accel=kvm,kernel-irqchip=split");

 -return false;
 -}
--if (!kvm_enable_x2apic()) {
+-if (kvm_enabled() && !kvm_enable_x2apic()) {
 +if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
  error_setg(errp, "eim=on requires support on the KVM 
side"

   "(X2APIC_API, first shipped in v4.7)");
5:  51f558035d = 5:  bc95c3cb60 amd_iommu: report x2APIC support to the 
operating system


As the change is minor and does not change the main logic, I keep the 
Reviewed-by and Acked-by tags.


Thank you,
Quang Minh.



[PATCH v8 5/5] amd_iommu: report x2APIC support to the operating system

2023-09-26 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 4d2d40bab5..aacd35b926 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2337,30 +2337,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2380,56 +2373,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v8 1/5] i386/tcg: implement x2APIC registers MSR access

2023-09-26 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..cb8c20de93 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 36ff71f947..1ef29d0256 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%

[PATCH v8 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-09-26 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Acked-by: Peter Xu 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c0ce896668..a3e4bf5497 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4049,11 +4049,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (kvm_enabled() && !kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v8 3/5] apic, i386/tcg: add x2apic transitions

2023-09-26 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  8 ++---
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 9f741794a7..b8f56836a6 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index ac8ec00eef..c6b10af88f 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -42,11 +42,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index 0e0f8cf8ad..7422096737 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7836aa6692..1c6e0dc2f3 100644
--- a/target/i386/cpu.c
+++ b/target/i3

[PATCH v8 0/5] Support x2APIC mode with TCG accelerator

2023-09-26 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device intel-iommu,intremap=on,eim=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device amd-iommu,intremap=on,xtsup=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;

if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 8 changes,
- Patch 2, 4:
  + Rebase to master and resolve conflicts in these 2 patches

Version 7 changes,
- Patch 4:
  + If eim=on, keep checking if kvm x2APIC is enabled when kernel-irqchip
  is split

Version 6 changes,
- Patch 5:
  + Make all places use the amdvi_extended_feature_register to get extended
  feature register

Version 5 changes,
- Patch 3:
  + Rebase to master and fix conflict
- Patch 5:
  + Create a helper function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended

[PATCH v8 2/5] apic: add support for x2APIC mode

2023-09-26 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   6 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 230 insertions(+), 69 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index f034df8bf6..88534203c9 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -133,7 +133,7 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * both in-kernel lapic and X2APIC userspace API.
  */
 if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -143,6 +143,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index cb8c20de93..9f741794a7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t));
 
 trace_apic_deliver_irq(dest, dest_mode, delivery_mode, vector_num,
trigger_mode);
 
 apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
 

[PATCH v7 0/5] Support x2APIC mode with TCG accelerator

2023-07-28 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device intel-iommu,intremap=on,eim=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device amd-iommu,intremap=on,xtsup=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;

if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 7 changes,
- Patch 4:
  + If eim=on, keep checking if kvm x2APIC is enabled when kernel-irqchip
  is split

Version 6 changes,
- Patch 5:
  + Make all places use the amdvi_extended_feature_register to get extended
  feature register

Version 5 changes,
- Patch 3:
  + Rebase to master and fix conflict
- Patch 5:
  + Create a helper function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operatin

[PATCH v7 2/5] apic: add support for x2APIC mode

2023-07-28 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index cb8c20de93..9f741794a7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_mallo

[PATCH v7 5/5] amd_iommu: report x2APIC support to the operating system

2023-07-28 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9c74fa17ad..4231b80f25 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2336,30 +2336,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2379,56 +2372,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v7 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-07-28 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Suggested-by: Joao Martins 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dcc334060c..f7ef4e2cfa 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4044,11 +4044,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
+if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
-- 
2.25.1




[PATCH v7 3/5] apic, i386/tcg: add x2apic transitions

2023-07-28 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  8 ++---
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 9f741794a7..b8f56836a6 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 97ad229d8b..240a1f9737 100644
--- a/target/i386/cpu.c
+++ b/target/i3

[PATCH v7 1/5] i386/tcg: implement x2APIC registers MSR access

2023-07-28 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..cb8c20de93 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 36ff71f947..1ef29d0256 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%

Re: [PATCH v6 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-07-21 Thread Bui Quang Minh

On 7/21/23 03:47, Peter Xu wrote:

On Mon, Jul 17, 2023 at 11:29:56PM +0700, Bui Quang Minh wrote:

On 7/17/23 17:47, Joao Martins wrote:

+Peter, +Jason (intel-iommu maintainer/reviewer)


Thanks for copying me, Joan.



On 15/07/2023 16:22, Bui Quang Minh wrote:

As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
   hw/i386/intel_iommu.c | 11 ---
   1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dcc334060c..5e576f6059 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4043,17 +4043,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
 && x86_iommu_ir_supported(x86_iommu) ?
 ON_OFF_AUTO_ON : 
ON_OFF_AUTO_OFF;
   }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}

Given commit 20ca47429e ('Revert "intel_iommu: Fix irqchip / X2APIC
configuration checks"'), won't we regress behaviour again  for the accel=kvm
case by dropping the kvm_enable_x2apic() call here?

Perhaps if we support userspace APIC with TCG the check just needs to be redone
to instead avoid always requiring kvm e.g.:

if (kvm_irqchip_in_kernel()) {
  error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"
 "(X2APIC_API, first shipped in v4.7)");
}

if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
  error_setg(errp, "eim=on requires support on the KVM side"
 "(X2APIC_API, first shipped in v4.7)");
  return false;
}


Thank you for your review. I think the check for kvm_irqchip_in_kernel() is
not correct, AFAIK, kvm_irqchip_is_split() == true also means
kvm_irqchip_in_kernel() == true on x86. To check if kernel-irqchip = on, we
need to do something like in x86_iommu_realize

 bool irq_all_kernel = kvm_irqchip_in_kernel() &&
!kvm_irqchip_is_split();

The original check for !kvm_irqchip_is_split means emulated/userspace APIC.
It's because to reach that check x86_iommu_ir_supported(...) == true and
x86_iommu_ir_supported(...) == true is not supported when kernel-irqchip =
on (there is a check for this in x86_iommu_realize)

So I think we need to change the check to

 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
 if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
  "(X2APIC_API, first shipped in v4.7)");
 return false;
 }
 }

Is it OK?


Mostly to me, except that we may also want to keep failing if all irq chips
are in kernel?


Yes, that behavior does not change after this patch. x86_iommu_realize 
in the parent type TYPE_X86_IOMMU_DEVICE fails when interrupt remapping 
is on and all irq chips are in kernel already.


static void x86_iommu_realize(DeviceState *dev, Error **errp)
{
/* ... */
/* Both Intel and AMD IOMMU IR only support "kernel-irqchip 
{off|split}" */

if (x86_iommu_ir_supported(x86_iommu) && irq_all_kernel) {
error_setg(errp, "Interrupt Remapping cannot work with "
 "kernel-irqchip=on, please use 'split|off'.");
return;
}
/* ... */
}


So in case we reach here in with the interrupt remapping is on and 
decide whether eim is on or not, it cannot be that irq chips are all in 
kernel.


Thanks,
Quang Minh.



Re: [PATCH v6 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-07-17 Thread Bui Quang Minh

On 7/17/23 17:47, Joao Martins wrote:

+Peter, +Jason (intel-iommu maintainer/reviewer)

On 15/07/2023 16:22, Bui Quang Minh wrote:

As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
  hw/i386/intel_iommu.c | 11 ---
  1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dcc334060c..5e576f6059 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4043,17 +4043,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
&& x86_iommu_ir_supported(x86_iommu) ?
ON_OFF_AUTO_ON : 
ON_OFF_AUTO_OFF;
  }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
  

Given commit 20ca47429e ('Revert "intel_iommu: Fix irqchip / X2APIC
configuration checks"'), won't we regress behaviour again  for the accel=kvm
case by dropping the kvm_enable_x2apic() call here?

Perhaps if we support userspace APIC with TCG the check just needs to be redone
to instead avoid always requiring kvm e.g.:

if (kvm_irqchip_in_kernel()) {
 error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"
"(X2APIC_API, first shipped in v4.7)");
}

if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
 error_setg(errp, "eim=on requires support on the KVM side"
"(X2APIC_API, first shipped in v4.7)");
 return false;
}


Thank you for your review. I think the check for kvm_irqchip_in_kernel() 
is not correct, AFAIK, kvm_irqchip_is_split() == true also means 
kvm_irqchip_in_kernel() == true on x86. To check if kernel-irqchip = on, 
we need to do something like in x86_iommu_realize


bool irq_all_kernel = kvm_irqchip_in_kernel() && 
!kvm_irqchip_is_split();


The original check for !kvm_irqchip_is_split means emulated/userspace 
APIC. It's because to reach that check x86_iommu_ir_supported(...) == 
true and x86_iommu_ir_supported(...) == true is not supported when 
kernel-irqchip = on (there is a check for this in x86_iommu_realize)


So I think we need to change the check to

if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
error_setg(errp, "eim=on requires support on the KVM side"
 "(X2APIC_API, first shipped in v4.7)");
return false;
}
}

Is it OK?

Thanks,
Quang Minh.



[PATCH] apic: stop timer when changing mode and current count reaches 0

2023-07-16 Thread Bui Quang Minh
When running kvm-unit-tests[1] on current APIC[2], we get a failed test
case related to APIC timer

env QEMU=build/qemu-system-x86_64 ACCEL=tcg ./run_tests.sh -g apic
FAIL: TMCCT should stay at zero

The test case sets the timer mode to oneshot and sets the intial count, it
waits until the current count reaches 0 then change the mode to periodic.
It is expected that the timer does not start again, the current count must
stay at 0. However, in the current implementation, the write to lvt timer
entry to change to periodic mode triggers the new periodic timer.

This commit adds an additional check when the write to lvt timer entry to
change from oneshot to periodic mode happens. This check verifies if the
current count reaches 0 in oneshot mode already, then it does not start a
new timer and sets timer_stop bool flag to true. The
apic_get_current_count uses this bool flag to report the correct current
count.

[1]: This patch is applied to kvm-unit-tests before running test

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;

if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

[2]: We need to disable the APIC disable before running kvm-unit-tests
because the test try to disable and re-enable APIC which does not follow
the xAPIC disable rule in Intel SDM Section 11.4.3. Enabling or Disabling
the local APIC

"When IA32_APIC_BASE[11] is set to 0, processor APICs based on the 3-wire
APIC bus cannot be generally re-enabled until a system hardware reset"

So the current implementation does not support disable and re-enable local
APIC in xAPIC.

We need to apply this patch to run the test

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ec0a20da59..5c4e0ee3bd 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -293,11 +293,13 @@ static void apic_set_base(APICCommonState *s, 
uint64_t val)
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | 
MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
+/*
 if (!(val & MSR_IA32_APICBASE_ENABLE)) {
 s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+*/
 }

Signed-off-by: Bui Quang Minh 
---
 hw/i386/kvm/apic.c  |  2 +-
 hw/intc/apic.c  | 37 ++---
 hw/intc/apic_common.c   | 41 -
 include/hw/i386/apic_internal.h |  5 +++-
 4 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 1e89ca0899..12273ff991 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -92,7 +92,7 @@ void kvm_get_apic_state(DeviceState *dev, struct 
kvm_lapic_state *kapic)
 s->count_shift = (v + 1) & 7;
 
 s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-apic_next_timer(s, s->initial_count_load_time);
+apic_next_timer(s, s->initial_count_load_time, false);
 }
 
 static void kvm_apic_set_base(APICCommonState *s, uint64_t val)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..ec0a20da59 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -619,9 +619,10 @@ int apic_accept_pic_intr(DeviceState *dev)
 return 0;
 }
 
-static void apic_timer_update(APICCommonState *s, int64_t current_time)
+static void apic_timer_update(APICCommonState *s, int64_t current_time,
+  bool switch_to_periodic)
 {
-if (apic_next_timer(s, current_time)) {
+if (apic_next_timer(s, current_time, switch_to_periodic)) {
 timer_mod(s->timer, s->next_time);
 } else {
 timer_del(s->timer);
@@ -633,7 +634,7 @@ static void apic_timer(void *opaque)
 APICCommonState *s = opaque;
 
 apic_local_deliver(s, APIC_LVT_TIMER);
-apic_timer_update(s, s->next_time);
+apic_timer_update(s, s->next_time, false);
 }
 
 static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
@@ -814,18 +815,38 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 case 0x32 ... 0x37:
 {
 int n = index - 0x32;
-s->lvt[n] = val;
 if (n == APIC_LVT_TIMER) {
-apic_timer_update(s, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-} else if (n == APIC_LVT_LINT0 && apic_check_pic(s)) {
-apic_updat

[PATCH v6 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-07-15 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dcc334060c..5e576f6059 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4043,17 +4043,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   && x86_iommu_ir_supported(x86_iommu) ?
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
 
 /* Currently only address widths supported are 39 and 48 bits */
 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1




[PATCH v6 1/5] i386/tcg: implement x2APIC registers MSR access

2023-07-15 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..cb8c20de93 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 36ff71f947..1ef29d0256 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%

[PATCH v6 2/5] apic: add support for x2APIC mode

2023-07-15 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index cb8c20de93..9f741794a7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_mallo

[PATCH v6 3/5] apic, i386/tcg: add x2apic transitions

2023-07-15 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  8 ++---
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 9f741794a7..b8f56836a6 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 97ad229d8b..240a1f9737 100644
--- a/target/i386/cpu.c
+++ b/target/i3

[PATCH v6 5/5] amd_iommu: report x2APIC support to the operating system

2023-07-15 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 129 +++
 hw/i386/amd_iommu.c  |  29 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9c74fa17ad..4231b80f25 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2336,30 +2336,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2379,56 +2372,94 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v6 0/5] Support x2APIC mode with TCG accelerator

2023-07-15 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device intel-iommu,intremap=on,eim=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device amd-iommu,intremap=on,xtsup=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)

if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;

if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 6 changes,
- Patch 5:
  + Make all places use the amdvi_extended_feature_register to get extended
  feature register

Version 5 changes,
- Patch 3:
  + Rebase to master and fix conflict
- Patch 5:
  + Create a helper function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c | 129 +
 hw/i386/amd_iommu.c  |  29 +-
 h

Re: [PATCH v5 0/5] Support x2APIC mode with TCG accelerator

2023-07-15 Thread Bui Quang Minh

On 7/15/23 21:28, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
  
 if ((str = getenv("TEST_DEVICE")))

 no_test_device = !atol(str);
+   no_test_device = true;
  
 if ((str = getenv("MEMLIMIT")))

 fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

   FAIL: nmi-after-sti
   FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

   FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 5 changes,
- Patch 3:
   + Rebase to master and fix conflict
- Patch 5:
   + Create a helper function to get amdvi extended feature register instead
   of storing it in AMDVIState

Version 4 changes,
- Patch 5:
   + Instead of replacing IVHD type 0x10 with type 0x11, export both types
   for backward compatibility with old guest operating system
   + Flip the xtsup feature check condition in amdvi_int_remap_ga for
   readability

Version 3 changes,
- Patch 2:
   + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
   + Make physical destination mode IPI which has destination id 0x
   a broadcast to xAPIC CPUs
   + Make cluster address 0xf in cluster model of xAPIC logical destination
   mode a broadcast to all clusters
   + Create new extended_log_dest to store APIC_LDR information in x2APIC
   instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
   i386/tcg: implement x2APIC registers MSR access
   apic: add support for x2APIC mode
   apic, i386/tcg: add x2apic transitions
   intel_iommu: allow Extended Interrupt Mode when using userspace APIC
   amd_iommu: report x2APIC support to the operating system

  hw/i386/acpi-build.c | 127 +
  hw/i386/amd_iommu.c  |  30 +-
  hw/i386/amd_iomm

[PATCH v5 2/5] apic: add support for x2APIC mode

2023-07-15 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index cb8c20de93..9f741794a7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_mallo

[PATCH v5 0/5] Support x2APIC mode with TCG accelerator

2023-07-15 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device intel-iommu,intremap=on,eim=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device amd-iommu,intremap=on,xtsup=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
 
if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;
 
if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic 

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0 
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 5 changes,
- Patch 3:
  + Rebase to master and fix conflict
- Patch 5:
  + Create a helper function to get amdvi extended feature register instead
  of storing it in AMDVIState

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c | 127 +
 hw/i386/amd_iommu.c  |  30 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|  11 -
 hw/i386/x86.c   

[PATCH v5 5/5] amd_iommu: report x2APIC support to the operating system

2023-07-15 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 127 ++-
 hw/i386/amd_iommu.c  |  30 +-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 117 insertions(+), 56 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9c74fa17ad..aeb41d917f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2336,30 +2336,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2379,56 +2372,92 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 

[PATCH v5 3/5] apic, i386/tcg: add x2apic transitions

2023-07-15 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  8 ++---
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 9f741794a7..b8f56836a6 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 97ad229d8b..240a1f9737 100644
--- a/target/i386/cpu.c
+++ b/target/i3

[PATCH v5 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-07-15 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dcc334060c..5e576f6059 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4043,17 +4043,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   && x86_iommu_ir_supported(x86_iommu) ?
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
 
 /* Currently only address widths supported are 39 and 48 bits */
 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1




[PATCH v5 1/5] i386/tcg: implement x2APIC registers MSR access

2023-07-15 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..cb8c20de93 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 36ff71f947..1ef29d0256 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%

Re: [PATCH v4 0/5] Support x2APIC mode with TCG accelerator

2023-07-12 Thread Bui Quang Minh

On 7/11/23 01:39, Michael S. Tsirkin wrote:

On Mon, May 22, 2023 at 11:31:52PM +0700, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device intel-iommu,intremap=on,eim=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
   -smp 2,maxcpus=260 \
   -cpu qemu64,x2apic=on \
   -machine q35 \
   -device amd-iommu,intremap=on,xtsup=on \
   -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
   -m 2G \
   -kernel $KERNEL_DIR \
   -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
   -drive file=$IMAGE_DIR,format=raw \
   -nographic \
   -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
  
 if ((str = getenv("TEST_DEVICE")))

 no_test_device = !atol(str);
+   no_test_device = true;
  
 if ((str = getenv("MEMLIMIT")))

 fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

   FAIL: nmi-after-sti
   FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

   FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.


So, I'm not sure how safe it is to merge as is - are we
creating a way for users to shoot themselves in the foot?
Would be better to just fix these issues before we merge.


I may be biased as I am the author and willing this to be merged. These 
issues are not created by this series, the current APIC implementation 
fails those tests too.


Currently, I only have some ideas to fix the timer failed test case. The 
timer failed test case is that when the timer is in one-shot mode, the 
initial count is non-zero. If the current count reaches 0, a timer 
interrupt is delivered and no more interrupt. At this time, if we write 
to timer LVT to change to periodic mode, according to Intel SDM Section 
11.5.4 APIC timer


"Changing the mode of the APIC timer (from one-shot to periodic or vice 
versa) by writing to the
timer LVT entry does not start the timer. To start the timer, it is 
necessary to write to the initial-

count register as described above"

So it is expected that periodic mode does not start until initial count 
is written. However, in our current implementation, we see the initial 
is non-zero, so we start the new periodic timer. The solution might be 
not calling apic_timer_update when timer LVT written anymore and make 
the apic_get_current_count returns 0 in the above case.


In my opinion, this series is expected to add the x2APIC feature, so if 
the user wants to use it, they need to enable th

Re: [PATCH v4 5/5] amd_iommu: report x2APIC support to the operating system

2023-07-12 Thread Bui Quang Minh

On 7/11/23 01:37, Michael S. Tsirkin wrote:

On Fri, Jun 23, 2023 at 10:28:43PM +0700, Bui Quang Minh wrote:

On 6/23/23 03:26, Michael S. Tsirkin wrote:

On Mon, May 22, 2023 at 11:31:57PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Signed-off-by: Bui Quang Minh 
---
   hw/i386/acpi-build.c | 127 ++-
   hw/i386/amd_iommu.c  |  21 ++-
   hw/i386/amd_iommu.h  |  16 --
   3 files changed, 108 insertions(+), 56 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 512162003b..4459122e56 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,30 +2339,23 @@ static void
   build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
   const char *oem_table_id)
   {
-int ivhd_table_len = 24;
   AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
   GArray *ivhd_blob = g_array_new(false, true, 1);
   AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
   .oem_table_id = oem_table_id };
+uint64_t feature_report;
   acpi_table_begin(&table, table_data);
   /* IVinfo - IO virtualization information common to all
* IOMMU units in a system
*/
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
   /* reserved */
   build_append_int_noprefix(table_data, 0, 8);
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
   /*
* A PCI bus walk, for each PCI host bridge, is necessary to create a
* complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2382,56 +2375,92 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
   build_append_int_noprefix(ivhd_blob, 0x001, 4);
   }
-ivhd_table_len += ivhd_blob->len;
-
   /*
* When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
* Refer to spec - Table 95: IVHD device entry type codes
*
* Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
* See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
*/
   if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
 

Re: [PATCH v4 5/5] amd_iommu: report x2APIC support to the operating system

2023-06-23 Thread Bui Quang Minh

On 6/23/23 03:26, Michael S. Tsirkin wrote:

On Mon, May 22, 2023 at 11:31:57PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Signed-off-by: Bui Quang Minh 
---
  hw/i386/acpi-build.c | 127 ++-
  hw/i386/amd_iommu.c  |  21 ++-
  hw/i386/amd_iommu.h  |  16 --
  3 files changed, 108 insertions(+), 56 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 512162003b..4459122e56 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,30 +2339,23 @@ static void
  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
  const char *oem_table_id)
  {
-int ivhd_table_len = 24;
  AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
  GArray *ivhd_blob = g_array_new(false, true, 1);
  AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
  .oem_table_id = oem_table_id };
+uint64_t feature_report;
  
  acpi_table_begin(&table, table_data);

  /* IVinfo - IO virtualization information common to all
   * IOMMU units in a system
   */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
  /* reserved */
  build_append_int_noprefix(table_data, 0, 8);
  
-/* IVHD definition - type 10h */

-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
  /*
   * A PCI bus walk, for each PCI host bridge, is necessary to create a
   * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2382,56 +2375,92 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
  build_append_int_noprefix(ivhd_blob, 0x001, 4);
  }
  
-ivhd_table_len += ivhd_blob->len;

-
  /*
   * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
   * Refer to spec - Table 95: IVHD device entry type codes
   *
   * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
   * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
   */
  if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
   (0x1ull << 56) |   /* type IOAPIC */
   (IOAPIC_SB_DEVID << 40) |  /

Re: [REPOST PATCH v3 5/5] amd_iommu: report x2APIC support to the operating system

2023-05-22 Thread Bui Quang Minh

On 5/14/23 15:55, Bui Quang Minh wrote:

On 5/12/23 21:39, Michael S. Tsirkin wrote:

On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
This commit adds XTSup configuration to let user choose to whether 
enable
this feature or not. When XTSup is enabled, additional bytes in IRTE 
with

enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table 
for

feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use 
XTSup

bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh 


I'm concerned that switching to type 11 will break some older guests.
It would be better if we could export both type 10 and type 11
ivhd. A question however would be how does this interact
with older guests. For example:
https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
it looks like linux before 2016 only expected one ivhd entry?


Export both type 0x10 and 0x11 looks reasonable to me. Before the above 
commit, I see that Linux still loops through multiple ivhd but only 
handles one with type 0x10. On newer kernel, it will choose to handle 
the type that appears last corresponding the first devid, which is weird 
in my opinion.


+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+    u8 *base = (u8 *)ivrs;
+    struct ivhd_header *ivhd = (struct ivhd_header *)
+    (base + IVRS_HEADER_LENGTH);
+    u8 last_type = ivhd->type;
+    u16 devid = ivhd->devid;
+
+    while (((u8 *)ivhd - base < ivrs->length) &&
+   (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+    u8 *p = (u8 *) ivhd;
+
+    if (ivhd->devid == devid)
+    last_type = ivhd->type;
+    ivhd = (struct ivhd_header *)(p + ivhd->length);
+    }
+
+    return last_type;
+}

So when exposing type 0x10 following by 0x11, old kernel only parses 
0x10 and does not support x2APIC while new kernel parse 0x11 and support 
x2APIC. I will expose both types in the next version.



Some research and testing here would be benefitial.
Similarly for windows guests.

Thanks!




---
  hw/i386/acpi-build.c | 28 ++--
  hw/i386/amd_iommu.c  | 21 +++--
  hw/i386/amd_iommu.h  | 16 +++-
  3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..72d6bb2892 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,7 +2339,7 @@ static void
  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char 
*oem_id,

  const char *oem_table_id)
  {
-    int ivhd_table_len = 24;
+    int ivhd_table_len = 40;
  AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
  GArray *ivhd_blob = g_array_new(false, true, 1);
  AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
@@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, 
BIOSLinker *linker, const char *oem_id,

  /* IVinfo - IO virtualization information common to all
   * IOMMU units in a system
   */
-    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+    build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
  /* reserved */
  build_append_int_noprefix(table_data, 0, 8);
-    /* IVHD definition - type 10h */
-    build_append_int_noprefix(table_data, 0x10, 1);
+    /* IVHD definition - type 11h */
+    build_append_int_noprefix(table_data, 0x11, 1);
  /* virtualization flags */
  build_append_int_noprefix(table_data,
   (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */


btw this should have been iotlbsup?


- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
+ (1UL << 4),  /* iotblSup */
   1);
  /*


hmm why are you removing these other flags?


According to the AMD IOMMU specification, the bit 6, 7 are reserved in 
type 0x11 which are PerfSup, PPRSup respectively in type 0x10 so I 
remove those flags when changing to type 0x11. In type 0x11, these 
feature are reported via the below EFR Register Image I believe.




@@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, 
BIOSLinker *linker, const char *oem_id,

  build_append_int_noprefix(table_data, 0, 2);
  /* IOMMU info */
  build_append_int_noprefix(table_data, 0, 2);
-    /* IOMMU Feature Reporting */
-    build_append_int_noprefix(table_data,
-   

[PATCH v4 5/5] amd_iommu: report x2APIC support to the operating system

2023-05-22 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit exports IVHD type 0x11 besides the old IVHD type
0x10 in ACPI table. IVHD type 0x10 does not report full set of IOMMU
features only the legacy ones, so operating system (e.g. Linux) may only
detects x2APIC support if IVHD type 0x11 is available. The IVHD type 0x10
is kept so that old operating system that only parses type 0x10 can detect
the IOMMU device.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 127 ++-
 hw/i386/amd_iommu.c  |  21 ++-
 hw/i386/amd_iommu.h  |  16 --
 3 files changed, 108 insertions(+), 56 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 512162003b..4459122e56 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,30 +2339,23 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
 .oem_table_id = oem_table_id };
+uint64_t feature_report;
 
 acpi_table_begin(&table, table_data);
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
-/* virtualization flags */
-build_append_int_noprefix(table_data,
- (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
- 1);
-
 /*
  * A PCI bus walk, for each PCI host bridge, is necessary to create a
  * complete set of IVHD entries.  Do this into a separate blob so that we
@@ -2382,56 +2375,92 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(ivhd_blob, 0x001, 4);
 }
 
-ivhd_table_len += ivhd_blob->len;
-
 /*
  * When interrupt remapping is supported, we add a special IVHD device
- * for type IO-APIC.
- */
-if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-ivhd_table_len += 8;
-}
-
-/* IVHD length */
-build_append_int_noprefix(table_data, ivhd_table_len, 2);
-/* DeviceID */
-build_append_int_noprefix(table_data,
-  object_property_get_int(OBJECT(&s->pci), "addr",
-  &error_abort), 2);
-/* Capability offset */
-build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
-/* IOMMU base address */
-build_append_int_noprefix(table_data, s->mmio.addr, 8);
-/* PCI Segment Group */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU info */
-build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
-
-/* IVHD entries as found above */
-g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
-g_array_free(ivhd_blob, TRUE);
-
-/*
- * Add a special IVHD device type.
+ * for type IO-APIC
  * Refer to spec - Table 95: IVHD device entry type codes
  *
  * Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
  * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
  */
 if (x86_iommu_ir_supported(x86_iommu_get_default())) {
-build_append_int_noprefix(table_data,
+build_append_int_noprefix(ivhd_blob,
  (0x1ull << 56) |   /* type IOAPIC */
  (IOAPIC_SB_DEVID << 40) |  /* IOAPIC devid */
  0x48,  /* special device 
*/

[PATCH v4 3/5] apic, i386/tcg: add x2apic transitions

2023-05-22 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  5 +--
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 9f741794a7..b8f56836a6 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a61cd6d99d..a426d0f7f1 100644
--- a/target/i386/cpu.c
+++ b/targe

[PATCH v4 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-05-22 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 94d52f4205..dcaa733972 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4046,17 +4046,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   && x86_iommu_ir_supported(x86_iommu) ?
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
 
 /* Currently only address widths supported are 39 and 48 bits */
 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1




[PATCH v4 1/5] i386/tcg: implement x2APIC registers MSR access

2023-05-22 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index ac3d47d231..cb8c20de93 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 5c6094c457..9e1549fd1c 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%

[PATCH v4 2/5] apic: add support for x2APIC mode

2023-05-22 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Bui Quang Minh 
---
 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index cb8c20de93..9f741794a7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
-  uint8_t vector_num, uint8_t trigger_mode)
+static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode,
+ uint8_t delivery_mode, uint8_t vector_num,
+ uint8_t trigger_mode)
 {
-uint32_t deliver_bitmask[MAX_APIC_WORDS];
+uint32_t *deliver_bitmask = g_mallo

[PATCH v4 0/5] Support x2APIC mode with TCG accelerator

2023-05-22 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing to boot my own built Linux 6.3.0-rc2, the kernel successfully boot
with enabled x2APIC and can enumerate CPU with APIC ID 257

Using Intel IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device intel-iommu,intremap=on,eim=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Using AMD IOMMU

qemu/build/qemu-system-x86_64 \
  -smp 2,maxcpus=260 \
  -cpu qemu64,x2apic=on \
  -machine q35 \
  -device amd-iommu,intremap=on,xtsup=on \
  -device qemu64-x86_64-cpu,x2apic=on,core-id=257,socket-id=0,thread-id=0 \
  -m 2G \
  -kernel $KERNEL_DIR \
  -append "nokaslr console=ttyS0 root=/dev/sda earlyprintk=serial 
net.ifnames=0" \
  -drive file=$IMAGE_DIR,format=raw \
  -nographic \
  -s

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
 
if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;
 
if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic 

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0 
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 4 changes,
- Patch 5:
  + Instead of replacing IVHD type 0x10 with type 0x11, export both types
  for backward compatibility with old guest operating system
  + Flip the xtsup feature check condition in amdvi_int_remap_ga for
  readability

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c | 127 +
 hw/i386/amd_iommu.c  |  21 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|  11 -
 hw/i386/x86.c|   8 +-
 hw/intc/apic.c   | 395 +--
 hw/intc/apic_common.c|  16 +-
 hw/intc/trace-events |   4 +-
 include/h

Re: [REPOST PATCH v3 5/5] amd_iommu: report x2APIC support to the operating system

2023-05-15 Thread Bui Quang Minh

On 5/15/23 03:44, Michael S. Tsirkin wrote:

On Sun, May 14, 2023 at 03:55:11PM +0700, Bui Quang Minh wrote:

On 5/12/23 21:39, Michael S. Tsirkin wrote:

On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh 


I'm concerned that switching to type 11 will break some older guests.
It would be better if we could export both type 10 and type 11
ivhd. A question however would be how does this interact
with older guests. For example:
https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
it looks like linux before 2016 only expected one ivhd entry?


Export both type 0x10 and 0x11 looks reasonable to me. Before the above
commit, I see that Linux still loops through multiple ivhd but only handles
one with type 0x10. On newer kernel, it will choose to handle the type that
appears last corresponding the first devid, which is weird in my opinion.
+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+   u8 *base = (u8 *)ivrs;
+   struct ivhd_header *ivhd = (struct ivhd_header *)
+   (base + IVRS_HEADER_LENGTH);
+   u8 last_type = ivhd->type;
+   u16 devid = ivhd->devid;
+
+   while (((u8 *)ivhd - base < ivrs->length) &&
+  (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+   u8 *p = (u8 *) ivhd;
+
+   if (ivhd->devid == devid)
+   last_type = ivhd->type;
+   ivhd = (struct ivhd_header *)(p + ivhd->length);
+   }
+
+   return last_type;
+}


Yes I don't get the logic here either.
Talk to kernel devs who wrote this?

commit 8c7142f56fedfc6824b5bca56fee1f443e01746b
Author: Suravee Suthikulpanit 
Date:   Fri Apr 1 09:05:59 2016 -0400

 iommu/amd: Use the most comprehensive IVHD type that the driver can support
 
 The IVRS in more recent AMD system usually contains multiple

 IVHD block types (e.g. 0x10, 0x11, and 0x40) for each IOMMU.
 The newer IVHD types provide more information (e.g. new features
 specified in the IOMMU spec), while maintain compatibility with
 the older IVHD type.
 
 Having multiple IVHD type allows older IOMMU drivers to still function

 (e.g. using the older IVHD type 0x10) while the newer IOMMU driver can use
 the newer IVHD types (e.g. 0x11 and 0x40). Therefore, the IOMMU driver
 should only make use of the newest IVHD type that it can support.
 
 This patch adds new logic to determine the highest level of IVHD type

 it can support, and use it throughout the to initialize the driver.
 This requires adding another pass to the IVRS parsing to determine
 appropriate IVHD type (see function get_highest_supported_ivhd_type())
 before parsing the contents.
 
 [Vincent: fix the build error of IVHD_DEV_ACPI_HID flag not found]
 
 Signed-off-by: Wan Zongshun 

 Signed-off-by: Suravee Suthikulpanit 
 Signed-off-by: Joerg Roedel 


I've sent a email to talk to kernel developers about this function. Here 
is the link to the email: 
https://lore.kernel.org/all/e8a87c2b-a29a-ccf9-49c6-3cfceaa20...@gmail.com/




Re: [REPOST PATCH v3 5/5] amd_iommu: report x2APIC support to the operating system

2023-05-14 Thread Bui Quang Minh

On 5/12/23 21:39, Michael S. Tsirkin wrote:

On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:

This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh 


I'm concerned that switching to type 11 will break some older guests.
It would be better if we could export both type 10 and type 11
ivhd. A question however would be how does this interact
with older guests. For example:
https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
it looks like linux before 2016 only expected one ivhd entry?


Export both type 0x10 and 0x11 looks reasonable to me. Before the above 
commit, I see that Linux still loops through multiple ivhd but only 
handles one with type 0x10. On newer kernel, it will choose to handle 
the type that appears last corresponding the first devid, which is weird 
in my opinion.


+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+   u8 *base = (u8 *)ivrs;
+   struct ivhd_header *ivhd = (struct ivhd_header *)
+   (base + IVRS_HEADER_LENGTH);
+   u8 last_type = ivhd->type;
+   u16 devid = ivhd->devid;
+
+   while (((u8 *)ivhd - base < ivrs->length) &&
+  (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+   u8 *p = (u8 *) ivhd;
+
+   if (ivhd->devid == devid)
+   last_type = ivhd->type;
+   ivhd = (struct ivhd_header *)(p + ivhd->length);
+   }
+
+   return last_type;
+}

So when exposing type 0x10 following by 0x11, old kernel only parses 
0x10 and does not support x2APIC while new kernel parse 0x11 and support 
x2APIC. I will expose both types in the next version.



Some research and testing here would be benefitial.
Similarly for windows guests.

Thanks!




---
  hw/i386/acpi-build.c | 28 ++--
  hw/i386/amd_iommu.c  | 21 +++--
  hw/i386/amd_iommu.h  | 16 +++-
  3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..72d6bb2892 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,7 +2339,7 @@ static void
  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
  const char *oem_table_id)
  {
-int ivhd_table_len = 24;
+int ivhd_table_len = 40;
  AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
  GArray *ivhd_blob = g_array_new(false, true, 1);
  AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
@@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
  /* IVinfo - IO virtualization information common to all
   * IOMMU units in a system
   */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
  /* reserved */
  build_append_int_noprefix(table_data, 0, 8);
  
-/* IVHD definition - type 10h */

-build_append_int_noprefix(table_data, 0x10, 1);
+/* IVHD definition - type 11h */
+build_append_int_noprefix(table_data, 0x11, 1);
  /* virtualization flags */
  build_append_int_noprefix(table_data,
   (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */


btw this should have been iotlbsup?


- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
+ (1UL << 4),  /* iotblSup */
   1);
  
  /*


hmm why are you removing these other flags?


According to the AMD IOMMU specification, the bit 6, 7 are reserved in 
type 0x11 which are PerfSup, PPRSup respectively in type 0x10 so I 
remove those flags when changing to type 0x11. In type 0x11, these 
feature are reported via the below EFR Register Image I believe.





@@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
  build_append_int_noprefix(table_data, 0, 2);
  /* IOMMU info */
  build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting 

Re: [REPOST PATCH v3 0/5] Support x2APIC mode with TCG accelerator

2023-05-12 Thread Bui Quang Minh

On 4/11/23 21:24, Bui Quang Minh wrote:

[Reposting due to broken threading in previous post]

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
  
 if ((str = getenv("TEST_DEVICE")))

 no_test_device = !atol(str);
+   no_test_device = true;
  
 if ((str = getenv("MEMLIMIT")))

 fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apic_disable: *0xfee00030: 50014
   FAIL: apic_disable: *0xfee00080: f0
   FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

   FAIL: nmi-after-sti
   FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

   FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 3 changes,
- Patch 2:
   + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
   + Make physical destination mode IPI which has destination id 0x
   a broadcast to xAPIC CPUs
   + Make cluster address 0xf in cluster model of xAPIC logical destination
   mode a broadcast to all clusters
   + Create new extended_log_dest to store APIC_LDR information in x2APIC
   instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
   i386/tcg: implement x2APIC registers MSR access
   apic: add support for x2APIC mode
   apic, i386/tcg: add x2apic transitions
   intel_iommu: allow Extended Interrupt Mode when using userspace APIC
   amd_iommu: report x2APIC support to the operating system

  hw/i386/acpi-build.c |  28 +-
  hw/i386/amd_iommu.c  |  21 +-
  hw/i386/amd_iommu.h  |  16 +-
  hw/i386/intel_iommu.c|  11 -
  hw/i386/x86.c|   8 +-
  hw/intc/apic.c   | 395 +--
  hw/intc/apic_common.c|  16 +-
  hw/intc/trace-events |   4 +-
  include/hw/i386/apic.h   |   6 +-
  include/hw/i386/apic_internal.h  |   7 +-
  target/i386/cpu-sysemu.c |  18 +-
  target/i386/cpu.c|   5 +-
  target/i386/cpu.h|   9 +
  target/i386/tcg/sysemu/misc_helper.c |  31 +++
  14 files changed, 436 insertions(+), 139 deletions(-)


Hello everyone, I just want to politely ping here. Could you spend some 
time to review the series?


Thank you very much,
Quang Minh.



Re: [REPOST PATCH v3 0/5] Support x2APIC mode with TCG accelerator

2023-04-21 Thread Bui Quang Minh

On 4/21/23 14:57, Michael S. Tsirkin wrote:

On Tue, Apr 11, 2023 at 09:24:35PM +0700, Bui Quang Minh wrote:

[Reposting due to broken threading in previous post]

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch



Series:

Reviewed-by: Michael S. Tsirkin 

any acks from tcg maintainers?


Thank you for your review. There is no ack from tcg maintainers yet.
Quang Minh.



[REPOST PATCH v3 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-04-11 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a62896759c..fd7c16b852 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4045,17 +4045,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   && x86_iommu_ir_supported(x86_iommu) ?
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
 
 /* Currently only address widths supported are 39 and 48 bits */
 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1




[REPOST PATCH v3 5/5] amd_iommu: report x2APIC support to the operating system

2023-04-11 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 28 ++--
 hw/i386/amd_iommu.c  | 21 +++--
 hw/i386/amd_iommu.h  | 16 +++-
 3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..72d6bb2892 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,7 +2339,7 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
+int ivhd_table_len = 40;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
@@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
+/* IVHD definition - type 11h */
+build_append_int_noprefix(table_data, 0x11, 1);
 /* virtualization flags */
 build_append_int_noprefix(table_data,
  (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
+ (1UL << 4),  /* iotblSup */
  1);
 
 /*
@@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(table_data, 0, 2);
 /* IOMMU info */
 build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
+/* IOMMU Attributes */
+build_append_int_noprefix(table_data, 0, 4);
+/* EFR Register Image */
+build_append_int_noprefix(table_data, s->efr_reg, 8);
+/* EFR Register Image 2 */
+build_append_int_noprefix(table_data, 0, 8);
 
 /* IVHD entries as found above */
 g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index bcd016f5c5..5dfa93d945 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -31,6 +31,7 @@
 #include "hw/i386/apic_internal.h"
 #include "trace.h"
 #include "hw/i386/apic-msidef.h"
+#include "hw/qdev-properties.h"
 
 /* used AMD-Vi MMIO registers */
 const char *amdvi_mmio_low[] = {
@@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
 irq->vector = irte.hi.fields.vector;
 irq->dest_mode = irte.lo.fields_remap.dm;
 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
-irq->dest = irte.lo.fields_remap.destination;
+if (!iommu->xtsup) {
+irq->dest = irte.lo.fields_remap.destination & 0xff;
+} else {
+irq->dest = irte.lo.fields_remap.destination |
+(irte.hi.fields.destination_hi << 24);
+}
 
 return 0;
 }
@@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
 s->enabled = false;
 s->ats_enabled = false;
 s->cmdbuf_enabled = false;
+s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
+
+if (s->xtsup) {
+s->efr_reg |= AMDVI_FEATURE_XT;
+}
 
 /* reset MMIO */
 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
-amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
 0xffef, 0);
 amdvi_set_quad(s

[REPOST PATCH v3 3/5] apic, i386/tcg: add x2apic transitions

2023-04-11 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  5 +--
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 0f2f29e679..716fe865d7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6576287e5b..6847b2ae02 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -627,12 +627,

[REPOST PATCH v3 2/5] apic: add support for x2APIC mode

2023-04-11 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
Version 3 changes:
- Allow APIC ID > 255 only when x2APIC feature is supported on CPU
- Make physical destination mode IPI which has destination id 0x
a broadcast to xAPIC CPUs
- Make cluster address 0xf in cluster model of xAPIC logical destination
mode a broadcast to all clusters
- Create new extended_log_dest to store APIC_LDR information in x2APIC
instead of extending log_dest for backward compatibility in vmstate

 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 61b494b20a..0f2f29e679 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint

[REPOST PATCH v3 1/5] i386/tcg: implement x2APIC registers MSR access

2023-04-11 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 20b5a94073..61b494b20a 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 50cadfb996..9d4e7a67be 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%08x"
-apic_mem_writel(uint64_

[REPOST PATCH v3 0/5] Support x2APIC mode with TCG accelerator

2023-04-11 Thread Bui Quang Minh
[Reposting due to broken threading in previous post]

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
 
if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;
 
if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic 

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0 
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c |  28 +-
 hw/i386/amd_iommu.c  |  21 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|  11 -
 hw/i386/x86.c|   8 +-
 hw/intc/apic.c   | 395 +--
 hw/intc/apic_common.c|  16 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   6 +-
 include/hw/i386/apic_internal.h  |   7 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   5 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  31 +++
 14 files changed, 436 insertions(+), 139 deletions(-)

-- 
2.25.1




Re: [PATCH v3 0/5] Support x2APIC mode with TCG accelerator

2023-04-11 Thread Bui Quang Minh

On 4/11/23 14:09, Michael S. Tsirkin wrote:

On Sun, Apr 09, 2023 at 09:40:22PM +0700, Bui Quang Minh wrote:

Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch



Threading's broken with this posting.


Oh, thank you, I will repost this series with correct threading.
Quang Minh.



[PATCH v3 4/5] intel_iommu: allow Extended Interrupt Mode when using userspace APIC

2023-04-09 Thread Bui Quang Minh
As userspace APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Signed-off-by: Bui Quang Minh 
---
 hw/i386/intel_iommu.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a62896759c..fd7c16b852 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4045,17 +4045,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
   && x86_iommu_ir_supported(x86_iommu) ?
   ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 }
-if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-if (!kvm_irqchip_is_split()) {
-error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-return false;
-}
-if (!kvm_enable_x2apic()) {
-error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
-return false;
-}
-}
 
 /* Currently only address widths supported are 39 and 48 bits */
 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1




[PATCH v3 3/5] apic, i386/tcg: add x2apic transitions

2023-04-09 Thread Bui Quang Minh
This commit adds support for x2APIC transitions when writing to
MSR_IA32_APICBASE register and finally adds CPUID_EXT_X2APIC to
TCG_EXT_FEATURES.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 50 
 hw/intc/apic_common.c|  7 ++--
 target/i386/cpu-sysemu.c | 10 ++
 target/i386/cpu.c|  5 +--
 target/i386/cpu.h|  6 
 target/i386/tcg/sysemu/misc_helper.c |  4 +++
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 0f2f29e679..716fe865d7 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -309,8 +309,41 @@ bool is_x2apic_mode(DeviceState *dev)
 return s->apicbase & MSR_IA32_APICBASE_EXTD;
 }
 
+static void apic_set_base_check(APICCommonState *s, uint64_t val)
+{
+/* Enable x2apic when x2apic is not supported by CPU */
+if (!cpu_has_x2apic_feature(&s->cpu->env) &&
+val & MSR_IA32_APICBASE_EXTD)
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/*
+ * Transition into invalid state
+ * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+ * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+ */
+if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from disabled mode to x2APIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+/* Invalid transition from x2APIC to xAPIC */
+if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_ENABLE) &&
+!(val & MSR_IA32_APICBASE_EXTD))
+raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+apic_set_base_check(s, val);
+
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cannot be enabled again */
@@ -319,6 +352,23 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
 cpu_clear_apic_feature(&s->cpu->env);
 s->spurious_vec &= ~APIC_SV_ENABLE;
 }
+
+/* Transition from disabled mode to xAPIC */
+if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+(val & MSR_IA32_APICBASE_ENABLE)) {
+s->apicbase |= MSR_IA32_APICBASE_ENABLE;
+cpu_set_apic_feature(&s->cpu->env);
+}
+
+/* Transition from xAPIC to x2APIC */
+if (cpu_has_x2apic_feature(&s->cpu->env) &&
+!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+(val & MSR_IA32_APICBASE_EXTD)) {
+s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+s->log_dest = ((s->initial_apic_id & 0x0) << 16) |
+  (1 << (s->initial_apic_id & 0xf));
+}
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d95914066e..396f828be8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -43,11 +43,8 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 if (dev) {
 APICCommonState *s = APIC_COMMON(dev);
 APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
-/* switching to x2APIC, reset possibly modified xAPIC ID */
-if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
-(val & MSR_IA32_APICBASE_EXTD)) {
-s->id = s->initial_apic_id;
-}
+/* Reset possibly modified xAPIC ID */
+s->id = s->initial_apic_id;
 info->set_base(s, val);
 }
 }
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index a9ff10c517..f6bbe33372 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env)
 env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+void cpu_set_apic_feature(CPUX86State *env)
+{
+env->features[FEAT_1_EDX] |= CPUID_APIC;
+}
+
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
 return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6576287e5b..6847b2ae02 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -627,12 +627,

[PATCH v3 5/5] amd_iommu: report x2APIC support to the operating system

2023-04-09 Thread Bui Quang Minh
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh 
---
 hw/i386/acpi-build.c | 28 ++--
 hw/i386/amd_iommu.c  | 21 +++--
 hw/i386/amd_iommu.h  | 16 +++-
 3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..72d6bb2892 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,7 +2339,7 @@ static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
 const char *oem_table_id)
 {
-int ivhd_table_len = 24;
+int ivhd_table_len = 40;
 AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
 GArray *ivhd_blob = g_array_new(false, true, 1);
 AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
@@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 /* IVinfo - IO virtualization information common to all
  * IOMMU units in a system
  */
-build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+build_append_int_noprefix(table_data,
+ (1UL << 0) | /* EFRSup */
+ (40UL << 8), /* PASize */
+ 4);
 /* reserved */
 build_append_int_noprefix(table_data, 0, 8);
 
-/* IVHD definition - type 10h */
-build_append_int_noprefix(table_data, 0x10, 1);
+/* IVHD definition - type 11h */
+build_append_int_noprefix(table_data, 0x11, 1);
 /* virtualization flags */
 build_append_int_noprefix(table_data,
  (1UL << 0) | /* HtTunEn  */
- (1UL << 4) | /* iotblSup */
- (1UL << 6) | /* PrefSup  */
- (1UL << 7),  /* PPRSup   */
+ (1UL << 4),  /* iotblSup */
  1);
 
 /*
@@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 build_append_int_noprefix(table_data, 0, 2);
 /* IOMMU info */
 build_append_int_noprefix(table_data, 0, 2);
-/* IOMMU Feature Reporting */
-build_append_int_noprefix(table_data,
- (48UL << 30) | /* HATS   */
- (48UL << 28) | /* GATS   */
- (1UL << 2)   | /* GTSup  */
- (1UL << 6),/* GASup  */
- 4);
+/* IOMMU Attributes */
+build_append_int_noprefix(table_data, 0, 4);
+/* EFR Register Image */
+build_append_int_noprefix(table_data, s->efr_reg, 8);
+/* EFR Register Image 2 */
+build_append_int_noprefix(table_data, 0, 8);
 
 /* IVHD entries as found above */
 g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index bcd016f5c5..5dfa93d945 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -31,6 +31,7 @@
 #include "hw/i386/apic_internal.h"
 #include "trace.h"
 #include "hw/i386/apic-msidef.h"
+#include "hw/qdev-properties.h"
 
 /* used AMD-Vi MMIO registers */
 const char *amdvi_mmio_low[] = {
@@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
 irq->vector = irte.hi.fields.vector;
 irq->dest_mode = irte.lo.fields_remap.dm;
 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
-irq->dest = irte.lo.fields_remap.destination;
+if (!iommu->xtsup) {
+irq->dest = irte.lo.fields_remap.destination & 0xff;
+} else {
+irq->dest = irte.lo.fields_remap.destination |
+(irte.hi.fields.destination_hi << 24);
+}
 
 return 0;
 }
@@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
 s->enabled = false;
 s->ats_enabled = false;
 s->cmdbuf_enabled = false;
+s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
+
+if (s->xtsup) {
+s->efr_reg |= AMDVI_FEATURE_XT;
+}
 
 /* reset MMIO */
 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
-amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
 0xffef, 0);
 amdvi_set_quad(s

[PATCH v3 1/5] i386/tcg: implement x2APIC registers MSR access

2023-04-09 Thread Bui Quang Minh
This commit refactors apic_mem_read/write to support both MMIO access in
xAPIC and MSR access in x2APIC.

Signed-off-by: Bui Quang Minh 
---
 hw/intc/apic.c   | 79 ++--
 hw/intc/trace-events |  4 +-
 include/hw/i386/apic.h   |  3 ++
 target/i386/cpu.h|  3 ++
 target/i386/tcg/sysemu/misc_helper.c | 27 ++
 5 files changed, 86 insertions(+), 30 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 20b5a94073..61b494b20a 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+APICCommonState *s = APIC(dev);
+
+return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
 s->apicbase = (val & 0xf000) |
@@ -636,16 +643,11 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
 DeviceState *dev;
 APICCommonState *s;
-uint32_t val;
-int index;
-
-if (size < 4) {
-return 0;
-}
+uint64_t val;
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -653,7 +655,6 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 s = APIC(dev);
 
-index = (addr >> 4) & 0xff;
 switch(index) {
 case 0x02: /* id */
 val = s->id << 24;
@@ -720,7 +721,23 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 val = 0;
 break;
 }
-trace_apic_mem_readl(addr, val);
+
+trace_apic_register_read(index, val);
+return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+uint32_t val;
+int index;
+
+if (size < 4) {
+return 0;
+}
+
+index = (addr >> 4) & 0xff;
+val = (uint32_t)apic_register_read(index);
+
 return val;
 }
 
@@ -737,27 +754,10 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
 DeviceState *dev;
 APICCommonState *s;
-int index = (addr >> 4) & 0xff;
-
-if (size < 4) {
-return;
-}
-
-if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
-MSIMessage msi = { .address = addr, .data = val };
-apic_send_msi(&msi);
-return;
-}
 
 dev = cpu_get_current_apic();
 if (!dev) {
@@ -765,7 +765,7 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 s = APIC(dev);
 
-trace_apic_mem_writel(addr, val);
+trace_apic_register_write(index, val);
 
 switch(index) {
 case 0x02:
@@ -843,6 +843,29 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+   unsigned size)
+{
+int index = (addr >> 4) & 0xff;
+
+if (size < 4) {
+return;
+}
+
+if (addr > 0xfff || !index) {
+/* MSI and MMIO APIC are at the same memory location,
+ * but actually not on the global bus: MSI is on PCI bus
+ * APIC is connected directly to the CPU.
+ * Mapping them on the global bus happens to work because
+ * MSI registers are reserved in APIC MMIO and vice versa. */
+MSIMessage msi = { .address = addr, .data = val };
+apic_send_msi(&msi);
+return;
+}
+
+apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
 apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 50cadfb996..9d4e7a67be 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64
 # apic.c
 apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d"
 apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, 
uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode 
%d vector %d trigger_mode %d"
-apic_mem_readl(uint64_t addr, uint32_t val)  "0x%"PRIx64" = 0x%08x"
-apic_mem_writel(uint64_

[PATCH v3 2/5] apic: add support for x2APIC mode

2023-04-09 Thread Bui Quang Minh
This commit extends the APIC ID to 32-bit long and remove the 255 max APIC
ID limit in userspace APIC. The array that manages local APICs is now
dynamically allocated based on the max APIC ID of created x86 machine.
Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC
mode register access are supported.

Signed-off-by: Bui Quang Minh 
---
Version 3 changes:
- Allow APIC ID > 255 only when x2APIC feature is supported on CPU
- Make physical destination mode IPI which has destination id 0x
a broadcast to xAPIC CPUs
- Make cluster address 0xf in cluster model of xAPIC logical destination
mode a broadcast to all clusters
- Create new extended_log_dest to store APIC_LDR information in x2APIC
instead of extending log_dest for backward compatibility in vmstate

 hw/i386/x86.c   |   8 +-
 hw/intc/apic.c  | 266 
 hw/intc/apic_common.c   |   9 ++
 include/hw/i386/apic.h  |   3 +-
 include/hw/i386/apic_internal.h |   7 +-
 target/i386/cpu-sysemu.c|   8 +-
 6 files changed, 231 insertions(+), 70 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..8b70f0a6ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -132,11 +132,11 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  * Can we support APIC ID 255 or higher?
  *
  * Under Xen: yes.
- * With userspace emulated lapic: no
+ * With userspace emulated lapic: checked later in apic_common_set_id.
  * With KVM's in-kernel lapic: only if X2APIC API is enabled.
  */
 if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
 error_report("current -smp configuration requires kernel "
  "irqchip and X2APIC API support.");
 exit(EXIT_FAILURE);
@@ -146,6 +146,10 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 kvm_set_max_apic_id(x86ms->apic_id_limit);
 }
 
+if (!kvm_irqchip_in_kernel()) {
+apic_set_max_apic_id(x86ms->apic_id_limit);
+}
+
 possible_cpus = mc->possible_cpu_arch_ids(ms);
 for (i = 0; i < ms->smp.cpus; i++) {
 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 61b494b20a..0f2f29e679 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -31,15 +31,15 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
-
-#define MAX_APICS 255
-#define MAX_APIC_WORDS 8
+#include "tcg/helper-tcg.h"
 
 #define SYNC_FROM_VAPIC 0x1
 #define SYNC_TO_VAPIC   0x2
 #define SYNC_ISR_IRR_TO_VAPIC   0x4
 
-static APICCommonState *local_apics[MAX_APICS + 1];
+static APICCommonState **local_apics;
+static uint32_t max_apics;
+static uint32_t max_apic_words;
 
 #define TYPE_APIC "apic"
 /*This is reusing the APICCommonState typedef from APIC_COMMON */
@@ -49,7 +49,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-  uint8_t dest, uint8_t dest_mode);
+  uint32_t dest, uint8_t dest_mode);
+
+void apic_set_max_apic_id(uint32_t max_apic_id)
+{
+int word_size = 32;
+
+/* round up the max apic id to next multiple of words */
+max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1);
+
+local_apics = g_malloc0(sizeof(*local_apics) * max_apics);
+max_apic_words = max_apics >> 5;
+}
+
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -199,7 +211,7 @@ static void apic_external_nmi(APICCommonState *s)
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j;\
-for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+for(__i = 0; __i < max_apic_words; __i++) {\
 uint32_t __mask = deliver_bitmask[__i];\
 if (__mask) {\
 for(__j = 0; __j < 32; __j++) {\
@@ -226,7 +238,7 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
 {
 int i, d;
 d = -1;
-for(i = 0; i < MAX_APIC_WORDS; i++) {
+for(i = 0; i < max_apic_words; i++) {
 if (deliver_bitmask[i]) {
 d = i * 32 + apic_ffs_bit(deliver_bitmask[i]);
 break;
@@ -276,16 +288,18 @@ static void apic_bus_deliver(const uint32_t 
*deliver_bitmask,
  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint

[PATCH v3 0/5] Support x2APIC mode with TCG accelerator

2023-04-09 Thread Bui Quang Minh
Hi everyone,

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. Intel iommu
and AMD iommu are adjusted to support x2APIC interrupt remapping. With this
series, we can now boot Linux kernel into x2APIC mode with TCG accelerator
using either Intel or AMD iommu.

Testing the emulated userspace APIC with kvm-unit-tests, disable test
device with this patch

diff --git a/lib/x86/fwcfg.c b/lib/x86/fwcfg.c
index 1734afb..f56fe1c 100644
--- a/lib/x86/fwcfg.c
+++ b/lib/x86/fwcfg.c
@@ -27,6 +27,7 @@ static void read_cfg_override(void)
 
if ((str = getenv("TEST_DEVICE")))
no_test_device = !atol(str);
+   no_test_device = true;
 
if ((str = getenv("MEMLIMIT")))
fw_override[FW_CFG_MAX_RAM] = atol(str) * 1024 * 1024;

~ env QEMU=/home/minh/Desktop/oss/qemu/build/qemu-system-x86_64 ACCEL=tcg \
./run_tests.sh -v -g apic 

TESTNAME=apic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/apic.flat -smp 2
-cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split FAIL
apic-split (54 tests, 8 unexpected failures, 1 skipped)
TESTNAME=ioapic-split TIMEOUT=90s ACCEL=tcg ./x86/run x86/ioapic.flat -smp
1 -cpu qemu64 -machine kernel_irqchip=split PASS ioapic-split (19 tests)
TESTNAME=x2apic TIMEOUT=30 ACCEL=tcg ./x86/run x86/apic.flat -smp 2 -cpu
qemu64,+x2apic,+tsc-deadline FAIL x2apic (54 tests, 8 unexpected failures,
1 skipped) TESTNAME=xapic TIMEOUT=60 ACCEL=tcg ./x86/run x86/apic.flat -smp
2 -cpu qemu64,-x2apic,+tsc-deadline -machine pit=off FAIL xapic (43 tests,
6 unexpected failures, 2 skipped)

  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0
  FAIL: apic_disable: *0xfee00030: 50014
  FAIL: apic_disable: *0xfee00080: f0 
  FAIL: apicbase: relocate apic

These errors are because we don't disable MMIO region when switching to
x2APIC and don't support relocate MMIO region yet. This is a problem
because, MMIO region is the same for all CPUs, in order to support these we
need to figure out how to allocate and manage different MMIO regions for
each CPUs. This can be an improvement in the future.

  FAIL: nmi-after-sti
  FAIL: multiple nmi

These errors are in the way we handle CPU_INTERRUPT_NMI in core TCG.

  FAIL: TMCCT should stay at zero

This error is related to APIC timer which should be addressed in separate
patch.

Version 3 changes,
- Patch 2:
  + Allow APIC ID > 255 only when x2APIC feature is supported on CPU
  + Make physical destination mode IPI which has destination id 0x
  a broadcast to xAPIC CPUs
  + Make cluster address 0xf in cluster model of xAPIC logical destination
  mode a broadcast to all clusters
  + Create new extended_log_dest to store APIC_LDR information in x2APIC
  instead of extending log_dest for backward compatibility in vmstate

Version 2 changes,
- Add support for APIC ID larger than 255
- Adjust AMD iommu for x2APIC suuport
- Reorganize and split patch 1,2 into patch 1,2,3 in version 2

Thanks,
Quang Minh.

Bui Quang Minh (5):
  i386/tcg: implement x2APIC registers MSR access
  apic: add support for x2APIC mode
  apic, i386/tcg: add x2apic transitions
  intel_iommu: allow Extended Interrupt Mode when using userspace APIC
  amd_iommu: report x2APIC support to the operating system

 hw/i386/acpi-build.c |  28 +-
 hw/i386/amd_iommu.c  |  21 +-
 hw/i386/amd_iommu.h  |  16 +-
 hw/i386/intel_iommu.c|  11 -
 hw/i386/x86.c|   8 +-
 hw/intc/apic.c   | 395 +--
 hw/intc/apic_common.c|  16 +-
 hw/intc/trace-events |   4 +-
 include/hw/i386/apic.h   |   6 +-
 include/hw/i386/apic_internal.h  |   7 +-
 target/i386/cpu-sysemu.c |  18 +-
 target/i386/cpu.c|   5 +-
 target/i386/cpu.h|   9 +
 target/i386/tcg/sysemu/misc_helper.c |  31 +++
 14 files changed, 436 insertions(+), 139 deletions(-)

-- 
2.25.1




  1   2   >