[Xen-devel] [Resend Fix PATCH] Qemu/Xen: Fix early freeing MSIX MMIO memory region

2015-10-02 Thread Lan Tianyu
MSIX MMIO memory region is added to pt device's obj as property.
When pt device is unplugged, all properties will be deleted and
memory region's obj is needed at that point(refer 
object_finalize_child_property()).
But current code frees MSIX MMIO memory region in the xen_pt_msix_delete()
before deleting pt device's properties, this will cause segment fault.
Reproduce the bug via hotplugging device frequently.

This patch is to fix the issue via moving MSIX MMIO memory region into
struct XenPCIPassthroughState and free it together with pt device's obj.

Signed-off-by: Lan Tianyu 
---
Cc Xen devel maillist

 hw/xen/xen_pt.c |4 ++--
 hw/xen/xen_pt.h |2 +-
 hw/xen/xen_pt_msi.c |6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 2b54f52..0c11069 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -587,11 +587,11 @@ static void xen_pt_region_update(XenPCIPassthroughState 
*s,
 };
 
 bar = xen_pt_bar_from_region(s, mr);
-if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) {
+if (bar == -1 && (!s->msix || &s->msix_mmio != mr)) {
 return;
 }
 
-if (s->msix && &s->msix->mmio == mr) {
+if (s->msix && &s->msix_mmio == mr) {
 if (adding) {
 s->msix->mmio_base_addr = sec->offset_within_address_space;
 rc = xen_pt_msix_update_remap(s, s->msix->bar_index);
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 3bc22eb..3569c2c 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -199,7 +199,6 @@ typedef struct XenPTMSIX {
 uint64_t table_base;
 uint32_t table_offset_adjust; /* page align mmap */
 uint64_t mmio_base_addr;
-MemoryRegion mmio;
 void *phys_iomem_base;
 XenPTMSIXEntry msix_entry[0];
 } XenPTMSIX;
@@ -222,6 +221,7 @@ struct XenPCIPassthroughState {
 
 MemoryRegion bar[PCI_NUM_REGIONS - 1];
 MemoryRegion rom;
+MemoryRegion msix_mmio;
 
 MemoryListener memory_listener;
 MemoryListener io_listener;
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index e3d7194..ae39ab3 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -558,7 +558,7 @@ int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t 
base)
 msix->msix_entry[i].pirq = XEN_PT_UNASSIGNED_PIRQ;
 }
 
-memory_region_init_io(&msix->mmio, OBJECT(s), &pci_msix_ops,
+memory_region_init_io(&s->msix_mmio, OBJECT(s), &pci_msix_ops,
   s, "xen-pci-pt-msix",
   (total_entries * PCI_MSIX_ENTRY_SIZE
+ XC_PAGE_SIZE - 1)
@@ -599,7 +599,7 @@ int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t 
base)
msix->phys_iomem_base);
 
 memory_region_add_subregion_overlap(&s->bar[bar_index], table_off,
-&msix->mmio,
+&s->msix_mmio,
 2); /* Priority: pci default + 1 */
 
 return 0;
@@ -626,7 +626,7 @@ void xen_pt_msix_delete(XenPCIPassthroughState *s)
+ msix->table_offset_adjust);
 }
 
-memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio);
+memory_region_del_subregion(&s->bar[msix->bar_index], &s->msix_mmio);
 
 g_free(s->msix);
 s->msix = NULL;
-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] Qemu/Xen: Fix early freeing MSIX MMIO memory region

2015-10-11 Thread Lan Tianyu
From: >

msix->mmio is added to XenPCIPassthroughState's object as property.
object_finalize_child_property is called for XenPCIPassthroughState's
object, which calls object_property_del_all, which is going to try to
delete msix->mmio. object_finalize_child_property() will access
msix->mmio's obj. But the whole msix struct has already been freed
by xen_pt_msix_delete. This will cause segment fault when msix->mmio
has been overwritten.

This patch is to fix the issue.

Signed-off-by: Lan Tianyu 
---
 hw/xen/xen_pt.c |8 
 hw/xen/xen_pt.h |1 +
 hw/xen/xen_pt_config_init.c |2 +-
 hw/xen/xen_pt_msi.c |   13 -
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 2b54f52..aa96288 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -938,10 +938,18 @@ static void xen_pci_passthrough_class_init(ObjectClass 
*klass, void *data)
 dc->props = xen_pci_passthrough_properties;
 };
 
+static void xen_pci_passthrough_finalize(Object *obj)
+{
+XenPCIPassthroughState *s = XEN_PT_DEVICE(obj);
+
+xen_pt_msix_delete(s);
+}
+
 static const TypeInfo xen_pci_passthrough_info = {
 .name = TYPE_XEN_PT_DEVICE,
 .parent = TYPE_PCI_DEVICE,
 .instance_size = sizeof(XenPCIPassthroughState),
+.instance_finalize = xen_pci_passthrough_finalize,
 .class_init = xen_pci_passthrough_class_init,
 };
 
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 3bc22eb..c545280 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -305,6 +305,7 @@ void xen_pt_msi_disable(XenPCIPassthroughState *s);
 
 int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base);
 void xen_pt_msix_delete(XenPCIPassthroughState *s);
+void xen_pt_msix_unmap(XenPCIPassthroughState *s);
 int xen_pt_msix_update(XenPCIPassthroughState *s);
 int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
 void xen_pt_msix_disable(XenPCIPassthroughState *s);
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 4a5bc11..0efee11 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2079,7 +2079,7 @@ void xen_pt_config_delete(XenPCIPassthroughState *s)
 
 /* free MSI/MSI-X info table */
 if (s->msix) {
-xen_pt_msix_delete(s);
+xen_pt_msix_unmap(s);
 }
 g_free(s->msi);
 
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index e3d7194..82de2bc 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -610,7 +610,7 @@ error_out:
 return rc;
 }
 
-void xen_pt_msix_delete(XenPCIPassthroughState *s)
+void xen_pt_msix_unmap(XenPCIPassthroughState *s)
 {
 XenPTMSIX *msix = s->msix;
 
@@ -627,6 +627,17 @@ void xen_pt_msix_delete(XenPCIPassthroughState *s)
 }
 
 memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio);
+}
+
+void xen_pt_msix_delete(XenPCIPassthroughState *s)
+{
+XenPTMSIX *msix = s->msix;
+
+if (!msix) {
+return;
+}
+
+object_unparent(OBJECT(&msix->mmio));
 
 g_free(s->msix);
 s->msix = NULL;
-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] Xen Q35 & virtual VTD support

2016-05-09 Thread Lan, Tianyu

Hi All: 
We are researching how to add virtual VTD support for Xen HVM
guest. Current qemu has a basic virtual VTD support for Q35. I'd like to
confirm whether Xen supports Q35 or not. Can we reuse it for Xen? Thanks.

The motivations of adding virtual VTD support for Xen prepare for
1) Shared Virtual Memory (SVM)
2) Increase max VCPUs > 255 (The feature relies on virtual VTD irq
remapping function.)

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Xen Q35 & virtual VTD support

2016-05-10 Thread Lan, Tianyu



On 5/10/2016 6:11 PM, Stefano Stabellini wrote:

Hello Tianyu,

I am CC'ing Anthony who should have a better idea about this. Also
please use my kernel.org email address for future correspondence.



OK. I get it.


What do you mean by reusing Q35 for Xen? If you mean using QEMU to
emulate a Q35 based machine for HVM guests, I think that should be OK.



From xl code, I findit passes pc, xenpv or xenfv as "-machine" param
to Qemu except Q35. I also tried changing code to select Q35 in the xl
but guest didn't boot up. So I want to check whether Xen supports Q35 in
the current code or not.

If yes, we can reuse Qemu virtual VTD for Xen with minor changes.



Thanks,

Stefano


On Mon, 9 May 2016, Lan, Tianyu wrote:

Hi All: 
We are researching how to add virtual VTD support for Xen HVM
guest. Current qemu has a basic virtual VTD support for Q35. I'd like to
confirm whether Xen supports Q35 or not. Can we reuse it for Xen? Thanks.

The motivations of adding virtual VTD support for Xen prepare for
1) Shared Virtual Memory (SVM)
2) Increase max VCPUs > 255 (The feature relies on virtual VTD irq
remapping function.)

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Xen Q35 & virtual VTD support

2016-05-10 Thread Lan, Tianyu



On 5/10/2016 10:52 PM, Anthony PERARD wrote:

On Tue, May 10, 2016 at 10:31:38PM +0800, Lan, Tianyu wrote:

On 5/10/2016 6:11 PM, Stefano Stabellini wrote:

Hello Tianyu,

What do you mean by reusing Q35 for Xen? If you mean using QEMU to
emulate a Q35 based machine for HVM guests, I think that should be OK.



From xl code, I findit passes pc, xenpv or xenfv as "-machine" param
to Qemu except Q35. I also tried changing code to select Q35 in the xl
but guest didn't boot up. So I want to check whether Xen supports Q35 in
the current code or not.


No, Xen do not support Q35.

But that is possible in the future, I did some work on it in the past, you
can find it here if you are curious:

http://xenbits.xen.org/gitweb/?p=people/aperard/xen-unstable.git;a=shortlog;h=refs/heads/machine-q35-wip

I'm not sure if it's going to help you.



Great. Thanks a lot for your help. BTW, do you have plan to upstream 
these patches?





___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-13 Thread Lan Tianyu
On a machine with a mount of cpus, dump_timerq() lasts several seconds
which may exceed watchdog timeout and cause Xen hyperviosr reboot.
This patch is to disable watchdog when dump timer queues to fix the
issue.

Signed-off-by: Lan Tianyu 
---
 xen/common/timer.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/xen/common/timer.c b/xen/common/timer.c
index 29a60a9..2d9d828 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -524,6 +525,7 @@ static void dump_timerq(unsigned char key)
 s_time_t   now = NOW();
 inti, j;
 
+watchdog_disable();
 printk("Dumping timer queues:\n");
 
 for_each_online_cpu( i )
@@ -538,6 +540,8 @@ static void dump_timerq(unsigned char key)
 dump_timer(t, now);
 spin_unlock_irqrestore(&ts->lock, flags);
 }
+
+watchdog_enable();
 }
 
 static void migrate_timers_from_cpu(unsigned int old_cpu)
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-15 Thread Lan, Tianyu

On 9/13/2016 11:25 PM, Jan Beulich wrote:

Wait - what is do_invalid_op() doing on the stack? I don't think it
belongs there, and hence I wonder whether the keypress
happened after some already fatal event (in which case all bets
are off anyway).


Not clear why do_invalid_op() on the stack. There is no other fatal
event. The issue disappears when set watchdog_timeout to 10s.


> Another solution is to schedule a tasklet to run keyhandler in timer
> handler and invoke process_pending_softirqs() in the dump_timerq().
> This also works but it requires to rework keyhandler mechanism.
>
> Disable watchdog seems to be simpler and I found dump_registers() also
> used the same way to deal with the issue.

That's true. Just that on large machines it defaults to the
alternative model, for which I'm not sure it actually needs the
watchdog disabled (as data for a single CPU shouldn't exceed
the threshold).



It seems not to be necessary to disable watchdog in alternative model
since dumping a single cpu's status will not last a long time.


For the issue in the dump timer info handler, disabling watchdog is ok
for you or you have other suggestions to resolve the issue?

I also found other places where dump a lot of logs disable watchdog.
(E,G run_all_keyhandlers(), debugtrace_dump() debugtrace_toggle() and so
on). This seems a common solution.







Jan


> Here is my draft patch of reworking keyhandler.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-09-15 Thread Lan, Tianyu

Hi Andrew:
Sorry to bother you. To make sure we are on the right direction, it's
better to get feedback from you before we go further step. Could you
have a look? Thanks.

On 8/17/2016 8:05 PM, Lan, Tianyu wrote:

Hi All:
 The following is our Xen vIOMMU high level design for detail
discussion. Please have a look. Very appreciate for your comments.
This design doesn't cover changes when root port is moved to hypervisor.
We may design it later.


Content:
===

1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 2th level translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface
3.2 2nd level translation
3.3 Interrupt remapping
3.4 1st level translation
3.5 Implementation consideration
4. Qemu
4.1 Qemu vIOMMU framework
4.2 Dummy xen-vIOMMU driver
4.3 Q35 vs. i440x
4.4 Report vIOMMU to hvmloader


1 Motivation for Xen vIOMMU
===

1.1 Enable more than 255 vcpu support
HPC virtualization requires more than 255 vcpus support in a single VM
to meet parallel computing requirement. More than 255 vcpus support
requires interrupt remapping capability present on vIOMMU to deliver
interrupt to #vcpu >255 Otherwise Linux guest fails to boot up with >255
vcpus if interrupt remapping is absent.


1.2 Support VFIO-based user space driver (e.g. DPDK) in the guest
It relies on the 2nd level translation capability (IOVA->GPA) on
vIOMMU. pIOMMU 2nd level becomes a shadowing structure of
vIOMMU to isolate DMA requests initiated by user space driver.


1.3 Support guest SVM (Shared Virtual Memory)
It relies on the 1st level translation table capability (GVA->GPA) on
vIOMMU. pIOMMU needs to enable both 1st level and 2nd level translation
in nested mode (GVA->GPA->HPA) for passthrough device. IGD passthrough
is the main usage today (to support OpenCL 2.0 SVM feature). In the
future SVM might be used by other I/O devices too.

2. Xen vIOMMU Architecture



* vIOMMU will be inside Xen hypervisor for following factors
1) Avoid round trips between Qemu and Xen hypervisor
2) Ease of integration with the rest of the hypervisor
3) HVMlite/PVH doesn't use Qemu
* Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
/destory vIOMMU in hypervisor and deal with virtual PCI device's 2th
level translation.

2.1 2th level translation overview
For Virtual PCI device, dummy xen-vIOMMU does translation in the
Qemu via new hypercall.

For physical PCI device, vIOMMU in hypervisor shadows IO page table from
IOVA->GPA to IOVA->HPA and load page table to physical IOMMU.

The following diagram shows 2th level translation architecture.
+-+
|Qemu++   |
|| Virtual|   |
||   PCI device   |   |
|||   |
|++   |
||DMA |
|V|
|  ++   Request  ++   |
|  |+<---+|   |
|  |  Dummy xen vIOMMU  | Target GPA |  Memory region |   |
|  |+--->+|   |
|  +-+--++---++   |
||   ||
||Hypercall  ||
+++
|Hypervisor  |   ||
||   ||
|v   ||
| +--+--+||
| |   vIOMMU|||
| +--+--+||
||   ||
|v   ||
| +--+--+||
| | IOMMU driver|||
| +--+--+||
||   ||
+++
|HW  v   V|
| +--+--+ +-+

Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-19 Thread Lan, Tianyu



On 9/15/2016 10:32 PM, Jan Beulich wrote:

On 15.09.16 at 16:16,  wrote:

On 9/13/2016 11:25 PM, Jan Beulich wrote:

Wait - what is do_invalid_op() doing on the stack? I don't think it
belongs there, and hence I wonder whether the keypress
happened after some already fatal event (in which case all bets
are off anyway).


Not clear why do_invalid_op() on the stack. There is no other fatal
event. The issue disappears when set watchdog_timeout to 10s.


Another solution is to schedule a tasklet to run keyhandler in timer
handler and invoke process_pending_softirqs() in the dump_timerq().
This also works but it requires to rework keyhandler mechanism.

Disable watchdog seems to be simpler and I found dump_registers() also
used the same way to deal with the issue.

That's true. Just that on large machines it defaults to the
alternative model, for which I'm not sure it actually needs the
watchdog disabled (as data for a single CPU shouldn't exceed
the threshold).



It seems not to be necessary to disable watchdog in alternative model
since dumping a single cpu's status will not last a long time.


For the issue in the dump timer info handler, disabling watchdog is ok
for you or you have other suggestions to resolve the issue?


Well, without a clear understanding of why the issue occurs (for
which I need to refer you back to the questionable stack dump)
I'm hesitant to agree to this step, yet ...


After some researches, I found do_invalid_op() on the stack dump is
caused by run_in_exception_handler(__ns16550_poll) in the ns16550_poll()
rather than fatal event. The timeout issue still exists when run
__ns16550_poll() directly in the ns16550_poll().





I also found other places where dump a lot of logs disable watchdog.
(E,G run_all_keyhandlers(), debugtrace_dump() debugtrace_toggle() and so
on). This seems a common solution.


... I'm also not entirely against it considering the various other
examples. I.e. as almost always: As long as the need for the
change can be properly explained, I won't stand in the way of
getting it in.

Jan



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-20 Thread Lan, Tianyu

On 9/19/2016 10:46 PM, Jan Beulich wrote:

Well, without a clear understanding of why the issue occurs (for
>> which I need to refer you back to the questionable stack dump)
>> I'm hesitant to agree to this step, yet ...

>
> After some researches, I found do_invalid_op() on the stack dump is
> caused by run_in_exception_handler(__ns16550_poll) in the ns16550_poll()
> rather than fatal event. The timeout issue still exists when run
> __ns16550_poll() directly in the ns16550_poll().

Well, I then still don't see why e.g. dump_domains() doesn't also need
it.


After testing, dump_domains() also has such issue after I create two VM
with 128 vcpus.


Earlier you did say:

  Keyhandler may run in the timer handler and the following log shows
  calltrace. The timer subsystem run all expired timers' handler
  before programing next timer event. If keyhandler runs longer than
  timeout, there will be no chance to configure timer before triggering
  watchdog and hypervisor rebooting.

The fact that using debug keys may adversely affect the rest of the
system is known. And the nesting of process_pending_softirqs()
inside do_softirq() should, from looking at them, work fine. So I
continue to have trouble seeing the specific reason for the problem
you say you observe.


The precondition of process_pending_softirq() working in the debug key
handler is that timer interrupt arrives on time and nmi_timer_fn() can
run to update nmi_timer_ticks before watchdog timeout.

When a timer interrupt arrives, timer_softirq_action() will run all
expired timer handlers before programing next timer interrupt via
reprogram_timer(). If a timer handler runs too long E,G >5s(Time for
watchdog timeout is default to be 5s.), this will cause no timer
interrupt arriving within 5s and nmi_timer_fn() also won't be called.
Does this make sense to you?



And as a separate note - dump_registers() is quite an exception
among the key handlers, and that's for a good reason (as the
comment there says). So I continue to be hesitant to see this
spread to other key handlers.

Jan



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-20 Thread Lan Tianyu
On 2016年09月20日 23:36, Jan Beulich wrote:
>> The precondition of process_pending_softirq() working in the debug key
>> > handler is that timer interrupt arrives on time and nmi_timer_fn() can
>> > run to update nmi_timer_ticks before watchdog timeout.
> Precondition?

Process_pending_softirq() in debug key handler is mainly to deal with
timer softirq to update nmi_timer_ticks in order to avoid NMI watchdog.
If there is no timer interrupt arriving for long time,
process_pending_softirq() here is meaningless and NMI watchdog still
will be timeout.

> 
>> > When a timer interrupt arrives, timer_softirq_action() will run all
>> > expired timer handlers before programing next timer interrupt via
>> > reprogram_timer(). If a timer handler runs too long E,G >5s(Time for
>> > watchdog timeout is default to be 5s.), this will cause no timer
>> > interrupt arriving within 5s and nmi_timer_fn() also won't be called.
>> > Does this make sense to you?
> Partly. I continue to think that the sequence
> 
> some keyhandler
>   timer interrupt
> keyhandler continues
> keyhandler calls process_pending_softirq()
> 

Question for your sequence is why there is timer interrupt before
programing timer interrupt.

Actually the sequence in this case is
timer interrupt
run key handlers in timer handler
program next timer interrupt
...



> should, among other things, result in timer_softirq_action() to get
> run. And I don't see the _timer_ handler running for to long here,
> only a key handler.

Key handler may run a long time(E,G >5s) on machine with amount of cpus
or create huge VM. If keyhandler doesn't run for long time,
timer_softirq_action() would also be not necessary since the default
timeout is 5s and nmi timer's interval is 1s.


> Are you perhaps instead suffering from the
> nested instance of timer_softirq_action() not being able to acquire
> its lock?

No, the serial port continues printing timer info before watchdog timeout.


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-22 Thread Lan Tianyu



On 9/21/2016 5:25 PM, Jan Beulich wrote:

On 21.09.16 at 03:54,  wrote:

On 2016年09月20日 23:36, Jan Beulich wrote:

The precondition of process_pending_softirq() working in the debug key

handler is that timer interrupt arrives on time and nmi_timer_fn() can
run to update nmi_timer_ticks before watchdog timeout.

Precondition?


Process_pending_softirq() in debug key handler is mainly to deal with
timer softirq to update nmi_timer_ticks in order to avoid NMI watchdog.
If there is no timer interrupt arriving for long time,
process_pending_softirq() here is meaningless and NMI watchdog still
will be timeout.


Oh, right. Still I continue to be unconvinced that disabling the
watchdog is the right answer (not running timers for a long time
has other undesirable consequence), or if it is, then it being
needed in only this one key handler. So perhaps you should
really consider submitting your generic key handler adjustment
as an alternative.



Disable watchdog is common solution for such kind of issues in current
codes and so I chose it. I also proposed another solution in previous
mail that run keyhandler always in a tasklet and insert
process_pending_softirq() in the keyhandler.


But please also answer the earlier question, which you did strip
from your reply:


Which btw raises another question: Why are you in polling mode in
the first place? Do you have a UART without working interrupt?




I found there was no interrupt with Xen ns16550 dirver while
linux kernel's serial driver can receive interrupt.



Jan





___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen/timer: Disable watchdog during dumping timer queues

2016-09-22 Thread Lan, Tianyu

On 9/22/2016 10:26 PM, Jan Beulich wrote:

But please also answer the earlier question, which you did strip
>> from your reply:
>>

>>> Which btw raises another question: Why are you in polling mode in
>>> the first place? Do you have a UART without working interrupt?

>
> I found there was no interrupt with Xen ns16550 dirver while
> linux kernel's serial driver can receive interrupt.

And do you know the reason? Is it perhaps a PCI plug in card, and
you don't specify the IRQ on the command line? Or the kernel
doesn't provide the necessary information (from ACPI) for Xen to
set up that IRQ?


No, I am not familiar serial device. But it's a ACPI device from linux
sysfs node and serial drivers use irq 4 for their interrupt both on
linux and Xen.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [Resend PATCH 1/2] Xen/Keyhandler: Make keyhandler always run in tasklet

2016-09-29 Thread Lan Tianyu
Keyhandler may run for a long time in a timer handler on the large machine
with a lot of physical cpus(E,G keyhandler for dumping timer info) when serial
port driver works in the poll mode. When timer interrupt arrives, timer 
subsystem
runs all timer handlers before programming next timer interrupt. So if timer 
handler
runs longer than time for watchdog timeout, the timer handler of watchdog will 
be
blocked to feed watchdog and xen hypervisor panics. This patch is to fix the 
issue
via always scheduling a tasklet to run keyhandler to avoid timer handler running
too long.

Signed-off-by: Lan Tianyu 
---
 xen/common/keyhandler.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 16de6e8..fce52d2 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -75,7 +75,9 @@ static struct keyhandler {
 
 static void keypress_action(unsigned long unused)
 {
-handle_keypress(keypress_key, NULL);
+console_start_log_everything();
+key_table[keypress_key].fn(keypress_key);
+console_end_log_everything();
 }
 
 static DECLARE_TASKLET(keypress_tasklet, keypress_action, 0);
@@ -87,10 +89,10 @@ void handle_keypress(unsigned char key, struct 
cpu_user_regs *regs)
 if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
 return;
 
-if ( !in_irq() || h->irq_callback )
+if ( h->irq_callback )
 {
 console_start_log_everything();
-h->irq_callback ? h->irq_fn(key, regs) : h->fn(key);
+h->irq_fn(key, regs);
 console_end_log_everything();
 }
 else
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [Resend PATCH 0/2] Xen: Fix Xen hypervisor panic during dumping timer info on huge machine.

2016-09-29 Thread Lan Tianyu
Resend because the patchset seems to miss xen devel maillist.

This patchset is to fix triggering NMI watchdog during dump timer info
on the huge machine with a mount of physical cpus. Detail please see
change log of Patch 1.

Previous discussion:
https://patchwork.kernel.org/patch/9328449/


Lan Tianyu (2):
  Xen/Keyhandler: Make keyhandler always run in tasklet
  Xen/timer: Process softirq during dumping timer info

 xen/common/keyhandler.c |8 +---
 xen/common/timer.c  |1 +
 2 files changed, 6 insertions(+), 3 deletions(-)


 LocalWords:  8f82fa7cd8f2407b92d6994a65084951cf28a247

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [Resend PATCH 2/2] Xen/timer: Process softirq during dumping timer info

2016-09-29 Thread Lan Tianyu
Dumping timer info may run for a long time on the huge machine with
a lot of physical cpus. To avoid triggering NMI watchdog, add
process_pending_softirqs() in the loop of dumping timer info.

Signed-off-by: Lan Tianyu 
---
 xen/common/timer.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/xen/common/timer.c b/xen/common/timer.c
index 29a60a9..ab6bca0 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -530,6 +530,7 @@ static void dump_timerq(unsigned char key)
 {
 ts = &per_cpu(timers, i);
 
+process_pending_softirqs();
 printk("CPU%02d:\n", i);
 spin_lock_irqsave(&ts->lock, flags);
 for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ )
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Resend PATCH 1/2] Xen/Keyhandler: Make keyhandler always run in tasklet

2016-10-07 Thread Lan Tianyu
Hi Konrad:
Thanks for your review.

On 2016年10月01日 02:07, Konrad Rzeszutek Wilk wrote:
> On Fri, Sep 30, 2016 at 10:19:05AM +0800, Lan Tianyu wrote:
>> Keyhandler may run for a long time in a timer handler on the large machine
> 
> I am bit lost.
> 
> You say 'timer handler' which will imply that there is some form
> of 'init_timer' and 'set_timer' that would call the handle_keypress
> function?
> But I am not seeing it?
> 
> Or are you saying that when 'dump_timerq' is invoked?
> If so please say that.


When serial port driver works in the poll mode, it will set a regular
timer to deal with all input key and keyhandler(e,g dump_timerq()) will
run in the timer handler.

> 
>> with a lot of physical cpus(E,G keyhandler for dumping timer info) when 
>> serial
> 
> s/E,G/e.g.g/
> 
>> port driver works in the poll mode. When timer interrupt arrives, timer 
>> subsystem
> 
> s/poll mode/poll mode (via the exception mechanism)/
> 
>> runs all timer handlers before programming next timer interrupt. So if timer 
>> handler
>> runs longer than time for watchdog timeout, the timer handler of watchdog 
>> will be
> 
> Ah, so this is if a guest has set a timer and we are executing it. Or we have
> many of them to go through.

I meant the serial port timer handler here which calls keyhandler
will run long time, no APIC timer interrupt will arrive to trigger timer
softirq and feed watchdog during this procedure. Because there is no
chance to program timer interrupt before completing all timer handlers
in this case.

>
>> blocked to feed watchdog and xen hypervisor panics. This patch is to fix the 
>> issue
>> via always scheduling a tasklet to run keyhandler to avoid timer handler 
>> running
>> too long.
> 
> You say "timer handler" again. But the timer handlers are executed via
> timer_softirq_action (which is a softirq, aka triggered by IPI).

In this case, APIC timer interrupt handler apic_timer_interrupt()
triggers timer softirq and runs all expired timer handlers in timer softirq.

> 
> And the tasklet will mean that that it gets to be executed _after_ the
> do_softirq is done (as softirq.h puts the low numbered ones first, such
> as the TIMER_SOFTIRQ)?
> 
> So what I think you are saying is that you do not want the 
> 'timer_softirq_action'
> to be preempted by the 'dump_timerq' (or any other ones) which will
> trip the watchdog timeout. 

I want to make sure serial port timer handler doesn't run long time and
not affect feed dog operation.

> 
> If that is the case please put something to that affect in the
> commit description.
> 
> That begs one question that should be probably answered in the commit
> description:
> 
> Why can't the dump_timerq or any other keyhandler poke the watchdog
> (expose nmi_timer_fn and call that?)

Do you mean to feed nmi watchdog in the keyhandler directly?

> 
>>
>> Signed-off-by: Lan Tianyu 
> 
> Otherwise the mechanical parts of the patch look good.
> 
>> ---
>>  xen/common/keyhandler.c |8 +---
>>  1 files changed, 5 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
>> index 16de6e8..fce52d2 100644
>> --- a/xen/common/keyhandler.c
>> +++ b/xen/common/keyhandler.c
>> @@ -75,7 +75,9 @@ static struct keyhandler {
>>  
>>  static void keypress_action(unsigned long unused)
>>  {
>> -handle_keypress(keypress_key, NULL);
>> +console_start_log_everything();
>> +key_table[keypress_key].fn(keypress_key);
>> +console_end_log_everything();
>>  }
>>  
>>  static DECLARE_TASKLET(keypress_tasklet, keypress_action, 0);
>> @@ -87,10 +89,10 @@ void handle_keypress(unsigned char key, struct 
>> cpu_user_regs *regs)
>>  if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
>>  return;
>>  
>> -if ( !in_irq() || h->irq_callback )
>> +if ( h->irq_callback )
>>  {
>>  console_start_log_everything();
>> -h->irq_callback ? h->irq_fn(key, regs) : h->fn(key);
>> +h->irq_fn(key, regs);
>>  console_end_log_everything();
>>  }
>>  else
>> -- 
>> 1.7.1
>>


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Resend PATCH 1/2] Xen/Keyhandler: Make keyhandler always run in tasklet

2016-10-07 Thread Lan Tianyu
On 2016年10月06日 20:52, Jan Beulich wrote:
 On 30.09.16 at 04:19,  wrote:
>> @@ -87,10 +89,10 @@ void handle_keypress(unsigned char key, struct 
>> cpu_user_regs *regs)
>>  if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
>>  return;
>>  
>> -if ( !in_irq() || h->irq_callback )
>> +if ( h->irq_callback )
> 
> Please make subject/description reflect this: You don't _always_
> force the use of the tasklet.

Ok. I also find register_irq_keyhandler() isn't called anywhere in
current code and that means none uses irq_callback. Can we remove it?

> 
> And then I don't think we want the debugkey sysctl get processed
> asynchronously - the sysctl should complete only when the key has
> been fully handled, in order to not interfere with a subsequent one
> (namely the one retrieving the log buffer).

We may introduce a new parameter for handle_keypress() to specify
whether it should schedule a tasklet to run keyhandler or not. For
sysctl case, it should be the later one.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Resend PATCH 2/2] Xen/timer: Process softirq during dumping timer info

2016-10-07 Thread Lan Tianyu
On 2016年10月06日 20:56, Jan Beulich wrote:
 On 30.09.16 at 04:19,  wrote:
>> --- a/xen/common/timer.c
>> +++ b/xen/common/timer.c
>> @@ -530,6 +530,7 @@ static void dump_timerq(unsigned char key)
>>  {
>>  ts = &per_cpu(timers, i);
>>  
>> +process_pending_softirqs();
>>  printk("CPU%02d:\n", i);
>>  spin_lock_irqsave(&ts->lock, flags);
>>  for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ )
> 
> Hmm - is that enough when there are many timers on one CPU? But
> well, adding something inside the lock region would of course make
> things quite a bit harder, so I guess this has to be enough for now.
> 

Yes, it's hard to add process_pending_softirqs() under lock just like
you said. I search init_timer() and there are 28 callers. Printing 28
lines of timer info is supposed to last a brief of time.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Resend PATCH 1/2] Xen/Keyhandler: Make keyhandler always run in tasklet

2016-10-10 Thread Lan Tianyu
On 2016年10月10日 21:55, Konrad Rzeszutek Wilk wrote:
> On Sat, Oct 08, 2016 at 11:26:44AM +0800, Lan Tianyu wrote:
>> On 2016年10月06日 20:52, Jan Beulich wrote:
>>>>>> On 30.09.16 at 04:19,  wrote:
>>>> @@ -87,10 +89,10 @@ void handle_keypress(unsigned char key, struct 
>>>> cpu_user_regs *regs)
>>>>  if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
>>>>  return;
>>>>  
>>>> -if ( !in_irq() || h->irq_callback )
>>>> +if ( h->irq_callback )
>>>
>>> Please make subject/description reflect this: You don't _always_
>>> force the use of the tasklet.
>>
>> Ok. I also find register_irq_keyhandler() isn't called anywhere in
>> current code and that means none uses irq_callback. Can we remove it?
> 
> But it is. See IRQ_KEYHANDLER

Oh. Yes. Thanks for your information.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-10-10 Thread Lan Tianyu
On 2016年10月06日 02:36, Konrad Rzeszutek Wilk wrote:
>>> 3.3 Interrupt remapping
>>> > > Interrupts from virtual devices and physical devices will be delivered
>>> > > to vlapic from vIOAPIC and vMSI. It needs to add interrupt remapping
>>> > > hooks in the vmsi_deliver() and ioapic_deliver() to find target vlapic
>>> > > according interrupt remapping table. The following diagram shows the 
>>> > > logic.
>>> > > 
> Uh? Missing diagram?

Sorry. This is stale statement. The diagram was moved to 2.2 Interrupt
remapping overview.

> 
>>> 4.3 Q35 vs i440x
>>> > > VT-D is introduced since Q35 chipset. Previous concern was that IOMMU
> s/since/with/
>>> > > driver has assumption that VTD only exists on Q35 and newer chipset and
>>> > > we have to enable Q35 first.
>>> > > 
>>> > > Consulted with Linux/Windows IOMMU driver experts and get that these
>>> > > drivers doesn't have such assumption. So we may skip Q35 implementation
>>> > > and can emulate vIOMMU on I440x chipset. KVM already have vIOMMU support
>>> > > with virtual PCI device's DMA translation and interrupt remapping. We
>>> > > are using KVM to do experiment of adding vIOMMU on the I440x and test
>>> > > Linux/Windows guest. Will report back when have some results.
> Any results?

We have booted up Win8 guest with virtual VTD and emulated I440x
platform on Xen and guest uses virtual VTD to enable interrupt remapping
function.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 0/2] Xen: Fix Xen hypervisor panic during dumping timer info on huge machine.

2016-10-12 Thread Lan Tianyu
This patchset is to fix triggering NMI watchdog during dump timer info
on the huge machine with a mount of physical cpus. Detail please see
change log of Patch 1.

Previous discussion:
https://patchwork.kernel.org/patch/9328449/

Change since V1:
Add "async" param for handle_keypress() to identify
whether run nonirq keyhandler in tasklet or not. This is to
avoid processing debugkey sysctl asynchronously.


Lan Tianyu (2):
  Xen/Keyhandler: Rework process of nonirq keyhandler
  Xen/timer: Process softirq during dumping timer info

 xen/common/keyhandler.c  |8 +---
 xen/common/sysctl.c  |2 +-
 xen/common/timer.c   |1 +
 xen/drivers/char/console.c   |2 +-
 xen/include/xen/keyhandler.h |4 +++-
 5 files changed, 11 insertions(+), 6 deletions(-)


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 1/2] Xen/Keyhandler: Rework process of nonirq keyhandler

2016-10-12 Thread Lan Tianyu

Keyhandler may run for a long time in serial port driver's
timer handler on the large machine with a lot of physical
cpus(e,g dump_timerq()) when serial port driver works in
the poll mode(via the exception mechanism).

If a timer handler runs a long time, it will block nmi_timer_fn()
to feed NMI watchdog and cause Xen hypervisor panic. Inserting
process_pending_softirqs() in timer handler will not help. when timer
interrupt arrives, timer subsystem calls all expired timer handlers
before programming next timer interrupt. There is no timer interrupt
arriving to trigger timer softirq during run a timer handler.

This patch is to fix the issue to make nonirq keyhandler run in
tasklet when receive debug key from serial port.

Signed-off-by: Lan Tianyu 
---
 xen/common/keyhandler.c  |8 +---
 xen/common/sysctl.c  |2 +-
 xen/drivers/char/console.c   |2 +-
 xen/include/xen/keyhandler.h |4 +++-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 16de6e8..3d50041 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -75,19 +75,21 @@ static struct keyhandler {
 
 static void keypress_action(unsigned long unused)
 {
-handle_keypress(keypress_key, NULL);
+console_start_log_everything();
+key_table[keypress_key].fn(keypress_key);
+console_end_log_everything();
 }
 
 static DECLARE_TASKLET(keypress_tasklet, keypress_action, 0);
 
-void handle_keypress(unsigned char key, struct cpu_user_regs *regs)
+void handle_keypress(unsigned char key, struct cpu_user_regs *regs, bool async)
 {
 struct keyhandler *h;
 
 if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
 return;
 
-if ( !in_irq() || h->irq_callback )
+if ( h->irq_callback || !async )
 {
 console_start_log_everything();
 h->irq_callback ? h->irq_fn(key, regs) : h->fn(key);
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 8aea6ef..1eb7bad 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -136,7 +136,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
 {
 if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
 goto out;
-handle_keypress(c, guest_cpu_user_regs());
+handle_keypress(c, guest_cpu_user_regs(), false);
 }
 ret = 0;
 copyback = 0;
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 55ae31a..184b523 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs);
+return handle_keypress(c, regs, true);
 
 /* Deliver input to guest buffer, unless it is already full. */
 if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE )
diff --git a/xen/include/xen/keyhandler.h b/xen/include/xen/keyhandler.h
index 06c05c8..e9595bd 100644
--- a/xen/include/xen/keyhandler.h
+++ b/xen/include/xen/keyhandler.h
@@ -46,7 +46,9 @@ void register_irq_keyhandler(unsigned char key,
  bool_t diagnostic);
 
 /* Inject a keypress into the key-handling subsystem. */
-extern void handle_keypress(unsigned char key, struct cpu_user_regs *regs);
+extern void handle_keypress(unsigned char key,
+   struct cpu_user_regs *regs,
+   bool async);
 
 /* Scratch space is available for use of any keyhandler. */
 extern char keyhandler_scratch[1024];
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 2/2] Xen/timer: Process softirq during dumping timer info

2016-10-12 Thread Lan Tianyu
Dumping timer info may run for a long time on the huge machine with
a lot of physical cpus. To avoid triggering NMI watchdog, add
process_pending_softirqs() in the loop of dumping timer info.

Reviewed-by: Konrad Rzeszutek Wilk 
Signed-off-by: Lan Tianyu 
---
 xen/common/timer.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/xen/common/timer.c b/xen/common/timer.c
index 29a60a9..ab6bca0 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -530,6 +530,7 @@ static void dump_timerq(unsigned char key)
 {
 ts = &per_cpu(timers, i);
 
+process_pending_softirqs();
 printk("CPU%02d:\n", i);
 spin_lock_irqsave(&ts->lock, flags);
 for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ )
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 0/2] Xen: Fix Xen hypervisor panic during dumping timer info on huge machine.

2016-10-12 Thread Lan Tianyu
On 2016年10月12日 16:09, Jan Beulich wrote:
>>>> On 12.10.16 at 17:44,  wrote:
>> This patchset is to fix triggering NMI watchdog during dump timer info
>> on the huge machine with a mount of physical cpus. Detail please see
>> change log of Patch 1.
>>
>> Previous discussion:
>> https://patchwork.kernel.org/patch/9328449/ 
>>
>> Change since V1:
>> Add "async" param for handle_keypress() to identify
>> whether run nonirq keyhandler in tasklet or not. This is to
>> avoid processing debugkey sysctl asynchronously.
>>
>>
>> Lan Tianyu (2):
>>   Xen/Keyhandler: Rework process of nonirq keyhandler
>>   Xen/timer: Process softirq during dumping timer info
> 
> This second patch went in already a few days ago.
> 

Oh. Sorry for noise. I didn't notice that.

> Also, any reason you send to the list twice (once @lists.xen.org,
> and another time to @lists.xenproject.org)?

Sometime I found my patches wasn't able to arrive xen-devel and so
send to both xen.org and xenproject.org maillist. I will double check.

> 
> Jan
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Discussion about virtual iommu support for Xen guest

2016-05-26 Thread Lan Tianyu
Hi All:
We try pushing virtual iommu support for Xen guest and there are some
features blocked by it.

Motivation:
---
1) Add SVM(Shared Virtual Memory) support for Xen guest
To support iGFX pass-through for SVM enabled devices, it requires
virtual iommu support to emulate related registers and intercept/handle
guest SVM configure in the VMM.

2) Increase max vcpu support for one VM.

So far, max vcpu for Xen hvm guest is 128. For HPC(High Performance
Computing) cloud computing, it requires more vcpus support in a single
VM. The usage model is to create just one VM on a machine with the
same number vcpus as logical cpus on the host and pin vcpu on each
logical cpu in order to get good compute performance.

Intel Xeon phi KNL(Knights Landing) is dedicated to HPC market and
supports 288 logical cpus. So we hope VM can support 288 vcpu
to meet HPC requirement.

Current Linux kernel requires IR(interrupt remapping) when MAX APIC
ID is > 255 because interrupt only can be delivered among 0~255 cpus
without IR. IR in VM relies on the virtual iommu support.

KVM Virtual iommu support status

Current, Qemu has a basic virtual iommu to do address translation for
virtual device and it only works for the Q35 machine type. KVM reuses it
and Redhat is adding IR to support more than 255 vcpus.

How to add virtual iommu for Xen?
-
First idea came to my mind is to reuse Qemu virtual iommu but Xen didn't
support Q35 so far. Enabling Q35 for Xen seems not a short term task.
Anthony did some related jobs before.

I'd like to see your comments about how to implement virtual iommu for Xen.

1) Reuse Qemu virtual iommu or write a separate one for Xen?
2) Enable Q35 for Xen to reuse Qemu virtual iommu?

Your comments are very appreciated. Thanks a lot.
-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-05-26 Thread Lan Tianyu
On 2016年05月26日 16:42, Dong, Eddie wrote:
> If enabling virtual Q35 solves the problem, it has the advantage: When more 
> and more virtual IOMMU feature comes (likely), we can reuse the KVM code for 
> Xen.
> How big is the effort for virtual Q35?

I think the most effort are to rebuild all ACPI tables for Q35 and add
Q35 support in the hvmloader. My concern is about new ACPI tables'
compatibility issue. Especially with Windows guest.

-- 
Best regards
Tianyu Lan

> 
> Thx Eddie
> 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-05-27 Thread Lan Tianyu
On 2016年05月26日 19:35, Andrew Cooper wrote:
> On 26/05/16 09:29, Lan Tianyu wrote:
> 
> To be viable going forwards, any solution must work with PVH/HVMLite as
> much as HVM.  This alone negates qemu as a viable option.
> 
> From a design point of view, having Xen needing to delegate to qemu to
> inject an interrupt into a guest seems backwards.
>

Sorry, I am not familiar with HVMlite. HVMlite doesn't use Qemu and
the qemu virtual iommu can't work for it. We have to rewrite virtual
iommu in the Xen, right?

> 
> A whole lot of this would be easier to reason about if/when we get a
> basic root port implementation in Xen, which is necessary for HVMLite,
> and which will make the interaction with qemu rather more clean.  It is
> probably worth coordinating work in this area.

The virtual iommu also should be under basic root port in Xen, right?

> 
> As for the individual issue of 288vcpu support, there are already issues
> with 64vcpu guests at the moment. While it is certainly fine to remove
> the hard limit at 255 vcpus, there is a lot of other work required to
> even get 128vcpu guests stable.


Could you give some points to these issues? We are enabling more vcpus
support and it can boot up 255 vcpus without IR support basically. It's
very helpful to learn about known issues.

We will also add more tests for 128 vcpus into our regular test to find
related bugs. Increasing max vcpu to 255 should be a good start.





> 
> ~Andrew
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-06-02 Thread Lan, Tianyu

On 5/27/2016 4:19 PM, Lan Tianyu wrote:

On 2016年05月26日 19:35, Andrew Cooper wrote:

On 26/05/16 09:29, Lan Tianyu wrote:

To be viable going forwards, any solution must work with PVH/HVMLite as
much as HVM.  This alone negates qemu as a viable option.

From a design point of view, having Xen needing to delegate to qemu to
inject an interrupt into a guest seems backwards.



Sorry, I am not familiar with HVMlite. HVMlite doesn't use Qemu and
the qemu virtual iommu can't work for it. We have to rewrite virtual
iommu in the Xen, right?



A whole lot of this would be easier to reason about if/when we get a
basic root port implementation in Xen, which is necessary for HVMLite,
and which will make the interaction with qemu rather more clean.  It is
probably worth coordinating work in this area.


The virtual iommu also should be under basic root port in Xen, right?



As for the individual issue of 288vcpu support, there are already issues
with 64vcpu guests at the moment. While it is certainly fine to remove
the hard limit at 255 vcpus, there is a lot of other work required to
even get 128vcpu guests stable.



Could you give some points to these issues? We are enabling more vcpus
support and it can boot up 255 vcpus without IR support basically. It's
very helpful to learn about known issues.

We will also add more tests for 128 vcpus into our regular test to find
related bugs. Increasing max vcpu to 255 should be a good start.


Hi Andrew:
Could you give more inputs about issues with 64 vcpus and what needs to
be done to make 128vcpu guest stable? We hope to do somethings to
improve them.

What's progress of PCI host bridge in Xen? From your opinion, we should
do that first, right? Thanks.










~Andrew






___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-06-03 Thread Lan, Tianyu



On 6/3/2016 7:17 PM, Tian, Kevin wrote:

From: Andrew Cooper [mailto:andrew.coop...@citrix.com]
Sent: Friday, June 03, 2016 2:59 AM

On 02/06/16 16:03, Lan, Tianyu wrote:

On 5/27/2016 4:19 PM, Lan Tianyu wrote:

On 2016年05月26日 19:35, Andrew Cooper wrote:

On 26/05/16 09:29, Lan Tianyu wrote:

To be viable going forwards, any solution must work with PVH/HVMLite as
much as HVM.  This alone negates qemu as a viable option.

From a design point of view, having Xen needing to delegate to qemu to
inject an interrupt into a guest seems backwards.



Sorry, I am not familiar with HVMlite. HVMlite doesn't use Qemu and
the qemu virtual iommu can't work for it. We have to rewrite virtual
iommu in the Xen, right?



A whole lot of this would be easier to reason about if/when we get a
basic root port implementation in Xen, which is necessary for HVMLite,
and which will make the interaction with qemu rather more clean.  It is
probably worth coordinating work in this area.


The virtual iommu also should be under basic root port in Xen, right?



As for the individual issue of 288vcpu support, there are already
issues
with 64vcpu guests at the moment. While it is certainly fine to remove
the hard limit at 255 vcpus, there is a lot of other work required to
even get 128vcpu guests stable.



Could you give some points to these issues? We are enabling more vcpus
support and it can boot up 255 vcpus without IR support basically. It's
very helpful to learn about known issues.

We will also add more tests for 128 vcpus into our regular test to find
related bugs. Increasing max vcpu to 255 should be a good start.


Hi Andrew:
Could you give more inputs about issues with 64 vcpus and what needs to
be done to make 128vcpu guest stable? We hope to do somethings to
improve them.

What's progress of PCI host bridge in Xen? From your opinion, we should
do that first, right? Thanks.


Very sorry for the delay.

There are multiple interacting issues here.  On the one side, it would
be useful if we could have a central point of coordination on
PVH/HVMLite work.  Roger - as the person who last did HVMLite work,
would you mind organising that?

For the qemu/xen interaction, the current state is woeful and a tangled
mess.  I wish to ensure that we don't make any development decisions
which makes the situation worse.

In your case, the two motivations are quite different I would recommend
dealing with them independently.

IIRC, the issue with more than 255 cpus and interrupt remapping is that
you can only use x2apic mode with more than 255 cpus, and IOAPIC RTEs
can't be programmed to generate x2apic interrupts?  In principle, if you
don't have an IOAPIC, are there any other issues to be considered?  What
happens if you configure the LAPICs in x2apic mode, but have the IOAPIC
deliver xapic interrupts?


The key is the APIC ID. There is no modification to existing PCI MSI and
IOAPIC with the introduction of x2apic. PCI MSI/IOAPIC can only send
interrupt message containing 8bit APIC ID, which cannot address >255
cpus. Interrupt remapping supports 32bit APIC ID so it's necessary to
enable >255 cpus with x2apic mode.

If LAPIC is in x2apic while interrupt remapping is disabled, IOAPIC cannot
deliver interrupts to all cpus in the system if #cpu > 255.


Another key factor, Linux kernel disables x2apic mode when MAX APIC id
is > 255 if no interrupt remapping function. The reason for this is what
Kevin said. So booting up >255 cpus relies on the interrupt remapping.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-08-02 Thread Lan, Tianyu

On 5/27/2016 4:19 PM, Lan Tianyu wrote:

> As for the individual issue of 288vcpu support, there are already issues
> with 64vcpu guests at the moment. While it is certainly fine to remove
> the hard limit at 255 vcpus, there is a lot of other work required to
> even get 128vcpu guests stable.


Could you give some points to these issues? We are enabling more vcpus
support and it can boot up 255 vcpus without IR support basically. It's
very helpful to learn about known issues.


Hi Andrew:
We are designing vIOMMU support for Xen. Increasing vcpu
from 128 to 255 also can be implemented parallelly since it doesn't
need vIOMMU support. From your previous comment "there is a lot of other
work required to even get 128vcpu guests stable", you have some concerns 
about stability of 128vcpus. I wonder what we need to do before

starting work of increasing vcpu number from 128 to 255?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen virtual IOMMU high level design doc

2016-08-17 Thread Lan, Tianyu

Hi All:
 The following is our Xen vIOMMU high level design for detail
discussion. Please have a look. Very appreciate for your comments.
This design doesn't cover changes when root port is moved to hypervisor.
We may design it later.


Content:
===
1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 2th level translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface
3.2 2nd level translation
3.3 Interrupt remapping
3.4 1st level translation
3.5 Implementation consideration
4. Qemu
4.1 Qemu vIOMMU framework
4.2 Dummy xen-vIOMMU driver
4.3 Q35 vs. i440x
4.4 Report vIOMMU to hvmloader


1 Motivation for Xen vIOMMU
===
1.1 Enable more than 255 vcpu support
HPC virtualization requires more than 255 vcpus support in a single VM
to meet parallel computing requirement. More than 255 vcpus support
requires interrupt remapping capability present on vIOMMU to deliver
interrupt to #vcpu >255 Otherwise Linux guest fails to boot up with >255
vcpus if interrupt remapping is absent.


1.2 Support VFIO-based user space driver (e.g. DPDK) in the guest
It relies on the 2nd level translation capability (IOVA->GPA) on
vIOMMU. pIOMMU 2nd level becomes a shadowing structure of
vIOMMU to isolate DMA requests initiated by user space driver.


1.3 Support guest SVM (Shared Virtual Memory)
It relies on the 1st level translation table capability (GVA->GPA) on
vIOMMU. pIOMMU needs to enable both 1st level and 2nd level translation
in nested mode (GVA->GPA->HPA) for passthrough device. IGD passthrough
is the main usage today (to support OpenCL 2.0 SVM feature). In the
future SVM might be used by other I/O devices too.

2. Xen vIOMMU Architecture


* vIOMMU will be inside Xen hypervisor for following factors
1) Avoid round trips between Qemu and Xen hypervisor
2) Ease of integration with the rest of the hypervisor
3) HVMlite/PVH doesn't use Qemu
* Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
/destory vIOMMU in hypervisor and deal with virtual PCI device's 2th
level translation.

2.1 2th level translation overview
For Virtual PCI device, dummy xen-vIOMMU does translation in the
Qemu via new hypercall.

For physical PCI device, vIOMMU in hypervisor shadows IO page table from
IOVA->GPA to IOVA->HPA and load page table to physical IOMMU.

The following diagram shows 2th level translation architecture.
+-+
|Qemu++   |
|| Virtual|   |
||   PCI device   |   |
|||   |
|++   |
||DMA |
|V|
|  ++   Request  ++   |
|  |+<---+|   |
|  |  Dummy xen vIOMMU  | Target GPA |  Memory region |   |
|  |+--->+|   |
|  +-+--++---++   |
||   ||
||Hypercall  ||
+++
|Hypervisor  |   ||
||   ||
|v   ||
| +--+--+||
| |   vIOMMU|||
| +--+--+||
||   ||
|v   ||
| +--+--+||
| | IOMMU driver|||
| +--+--+||
||   ||
+++
|HW  v   V|
| +--+--+ +-+ |
| |   IOMMU +>+  Memory | |
| +--+--+ +-+ |
|^|
|||
| +--+--+   

Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-08-17 Thread Lan, Tianyu



On 8/17/2016 8:42 PM, Paul Durrant wrote:

-Original Message-
From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of
Lan, Tianyu
Sent: 17 August 2016 13:06
To: Jan Beulich; Kevin Tian; Andrew Cooper; yang.zhang...@gmail.com; Jun
Nakajima; Stefano Stabellini
Cc: Anthony Perard; xuqu...@huawei.com; xen-
de...@lists.xensource.com; Ian Jackson; Roger Pau Monne
Subject: [Xen-devel] Xen virtual IOMMU high level design doc

Hi All:
  The following is our Xen vIOMMU high level design for detail
discussion. Please have a look. Very appreciate for your comments.
This design doesn't cover changes when root port is moved to hypervisor.
We may design it later.


Content:
==
=
1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 2th level translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface


Would it not have been better to build on the previously discussed (and mostly 
agreed) PV IOMMU interface? (See 
https://lists.xenproject.org/archives/html/xen-devel/2016-02/msg01428.html). An 
RFC implementation series was also posted 
(https://lists.xenproject.org/archives/html/xen-devel/2016-02/msg01441.html).

  Paul



Hi Paul:
Thanks for your input. Glance the patchset and it introduces hypercall
"HYPERVISOR_iommu_op". The hypercall just works for PV IOMMU now. We may
abstract it and make it work for both PV and Virtual IOMMU.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-08-31 Thread Lan Tianyu
Hi Jan:
Sorry for later response. Thanks a lot for your comments.

On 2016年08月25日 19:11, Jan Beulich wrote:
 On 17.08.16 at 14:05,  wrote:
>> 1 Motivation for Xen vIOMMU
>> 
>> ===
>> 1.1 Enable more than 255 vcpu support
>> HPC virtualization requires more than 255 vcpus support in a single VM
>> to meet parallel computing requirement. More than 255 vcpus support
>> requires interrupt remapping capability present on vIOMMU to deliver
>> interrupt to #vcpu >255 Otherwise Linux guest fails to boot up with >255
>> vcpus if interrupt remapping is absent.
> 
> I continue to question this as a valid motivation at this point in
> time, for the reasons Andrew has been explaining.

If we want to support Linux guest with >255 vcpus, interrupt remapping
is necessary.

From Linux commit introducing x2apic and IR mode, it said IR was
a pre-requisite for enabling x2apic mode in the CPU.
https://lwn.net/Articles/289881/

So far, no sure behavior on the other OS. We may watch Windows guest
behavior later on KVM and there is still a bug to run Windows guest with
IR function on KVM.


> 
>> 2. Xen vIOMMU Architecture
>> 
>> 
>>
>> * vIOMMU will be inside Xen hypervisor for following factors
>>  1) Avoid round trips between Qemu and Xen hypervisor
>>  2) Ease of integration with the rest of the hypervisor
>>  3) HVMlite/PVH doesn't use Qemu
>> * Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
>> /destory vIOMMU in hypervisor and deal with virtual PCI device's 2th
>> level translation.
> 
> How does the create/destroy part of this match up with 3) right
> ahead of it?

The create/destroy hypercalls will work for both hvm and hvmlite.
Suppose hvmlite has tool stack(E.G libxl) which can call new hypercalls
to create or destroy virtual iommu in hypervisor.

> 
>> 3 Xen hypervisor
>> ==
>>
>> 3.1 New hypercall XEN_SYSCTL_viommu_op
>> 1) Definition of "struct xen_sysctl_viommu_op" as new hypercall parameter.
>>
>> struct xen_sysctl_viommu_op {
>>  u32 cmd;
>>  u32 domid;
>>  union {
>>  struct {
>>  u32 capabilities;
>>  } query_capabilities;
>>  struct {
>>  u32 capabilities;
>>  u64 base_address;
>>  } create_iommu;
>>  struct {
>>  u8  bus;
>>  u8  devfn;
> 
> Please can we avoid introducing any new interfaces without segment/
> domain value, even if for now it'll be always zero?

Sure. Will add segment field.

> 
>>  u64 iova;
>>  u64 translated_addr;
>>  u64 addr_mask; /* Translation page size */
>>  IOMMUAccessFlags permisson; 
>>  } 2th_level_translation;
> 
> I suppose "translated_addr" is an output here, but for the following
> fields this already isn't clear. Please add IN and OUT annotations for
> clarity.
> 
> Also, may I suggest to name this "l2_translation"? (But there are
> other implementation specific things to be considered here, which
> I guess don't belong into a design doc discussion.)

How about this?
struct {
/* IN parameters. */
u8  segment;
u8  bus;
u8  devfn;
u64 iova;
/* Out parameters. */
u64 translated_addr;
u64 addr_mask; /* Translation page size */
IOMMUAccessFlags permisson;
} l2_translation;

> 
>> };
>>
>> typedef enum {
>>  IOMMU_NONE = 0,
>>  IOMMU_RO   = 1,
>>  IOMMU_WO   = 2,
>>  IOMMU_RW   = 3,
>> } IOMMUAccessFlags;
>>
>>
>> Definition of VIOMMU subops:
>> #define XEN_SYSCTL_viommu_query_capability   0
>> #define XEN_SYSCTL_viommu_create 1
>> #define XEN_SYSCTL_viommu_destroy2
>> #define XEN_SYSCTL_viommu_dma_translation_for_vpdev  3
>>
>> Definition of VIOMMU capabilities
>> #define XEN_VIOMMU_CAPABILITY_1nd_level_translation  (1 << 0)
>> #define XEN_VIOMMU_CAPABILITY_2nd_level_translation  (1 << 1)
> 
> l1 and l2 respectively again, please.

Will update.

> 
>> 3.3 Interrupt remapping
>> Interrupts from virtual devices and physical devices will be delivered
>> to vlapic from vIOAPIC and vMSI. It needs to add interrupt remapping
>> hooks in the vmsi_deliver() and ioapic_deliver() to find target vlapic
>> according interrupt remapping table. The following diagram shows the logic.
> 
> Missing diagram or stale sentence?

Sorry. It's stale sentence and moved the diagram to 2.2 Interrupt
remapping overview.

> 
>> 3.5 Implementation consideration
>> Linux Intel IOMMU driver will fail to be loaded without 2th level
>> translation support even if interrupt remapping and 1th le

Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-08-31 Thread Lan Tianyu
On 2016年08月31日 20:02, Jan Beulich wrote:
 On 31.08.16 at 10:39,  wrote:
>> > On 2016年08月25日 19:11, Jan Beulich wrote:
>> > On 17.08.16 at 14:05,  wrote:
 >>> 1 Motivation for Xen vIOMMU
 >>> 
 >>> ===
 >>> 1.1 Enable more than 255 vcpu support
 >>> HPC virtualization requires more than 255 vcpus support in a single VM
 >>> to meet parallel computing requirement. More than 255 vcpus support
 >>> requires interrupt remapping capability present on vIOMMU to deliver
 >>> interrupt to #vcpu >255 Otherwise Linux guest fails to boot up with 
 >>> >255
 >>> vcpus if interrupt remapping is absent.
>>> >> 
>>> >> I continue to question this as a valid motivation at this point in
>>> >> time, for the reasons Andrew has been explaining.
>> > 
>> > If we want to support Linux guest with >255 vcpus, interrupt remapping
>> > is necessary.
> I don't understand why you keep repeating this, without adding
> _why_ you think there is a demand for such guests and _what_
> your plans are to eliminate Andrew's concerns.
> 

The motivation for such huge VM is for HPC(High-performance computing)
Cloud service which requires high performance parallel computing.
We just create single VM on one machine and expose more than 255 pcpus
to VM in order to make sure high performance parallel computing in VM.
One vcpu is pinged on pcpu.

For performance, we achieved high performance data(>95% native
data of stream, dgemm and sgemm benchmarks in VM) after some tuning and
optimizations. We presented these on Xen summit of this year.

For stability, Andrew found some issues of huge VM with watchdog
enabled and cause hypervisor reboot. We will reproduce and fix them.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 1/2] Xen/Keyhandler: Rework process of nonirq keyhandler

2016-10-12 Thread Lan, Tianyu



On 10/12/2016 9:19 PM, Jan Beulich wrote:

On 12.10.16 at 09:58,  wrote:

--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs);
+return handle_keypress(c, regs, true);


I think it would be nice to pass true here only when in polling mode,
unless you know or can deduce that the a similar problem also exists
in IRQ mode. Perhaps you could simply move the !in_irq() here?


That's a good idea. Thanks.


(Of course the new function parameter would then want to be renamed.)


Since the issue happens when handle_keypress() runs in a timer handler,
how about to name new parameter "intimer"? __serial_rx() is called in a 
timer handler or interrupt handler. Or do you have other suggestion?




Jan



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 0/2] Xen: Fix Xen hypervisor panic during dumping timer info on huge machine.

2016-10-12 Thread Lan, Tianyu



On 10/12/2016 7:08 PM, Ian Jackson wrote:

Wei Liu writes ("Re: [PATCH v2 0/2] Xen: Fix Xen hypervisor panic during dumping 
timer info on huge machine."):

On Wed, Oct 12, 2016 at 04:20:02PM +0800, Lan Tianyu wrote:

On 2016年10月12日 16:09, Jan Beulich wrote:

Also, any reason you send to the list twice (once @lists.xen.org,
and another time to @lists.xenproject.org)?


Sometime I found my patches wasn't able to arrive xen-devel and so
send to both xen.org and xenproject.org maillist. I will double check.


Both addresses should work. There are glitches from time to time though.
So do report to us if this happens again.


I don't think either address is likely to work differently or
separately to the other.  So please just send to one, and if it
doesn't work, please report it and we will try to fix it.


Ok. I get it.



Thanks,
Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 1/2] Xen/Keyhandler: Rework process of nonirq keyhandler

2016-10-12 Thread Lan Tianyu
On 2016年10月13日 00:03, Jan Beulich wrote:
 On 12.10.16 at 16:30,  wrote:
>>
>> Since the issue happens when handle_keypress() runs in a timer handler,
>> how about to name new parameter "intimer"? __serial_rx() is called in a 
>> timer handler or interrupt handler. Or do you have other suggestion?
> 
> I think "intimer" can be confusing (to be mixed up with timer interrupt).
> How about "force_tasklet"?

OK. I will update.
-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3] Xen/Keyhandler: Rework process of nonirq keyhandler

2016-10-12 Thread Lan Tianyu

Keyhandler may run for a long time in serial port driver's
timer handler on the large machine with a lot of physical
cpus(e,g dump_timerq()) when serial port driver works in
the poll mode(via the exception mechanism).

If a timer handler runs a long time, it will block nmi_timer_fn()
to feed NMI watchdog and cause Xen hypervisor panic. Inserting
process_pending_softirqs() in timer handler will not help. when timer
interrupt arrives, timer subsystem calls all expired timer handlers
before programming next timer interrupt. There is no timer interrupt
arriving to trigger timer softirq during run a timer handler.

This patch is to fix the issue to make nonirq keyhandler run in
tasklet when receive debug key from serial port.

Signed-off-by: Lan Tianyu 
---
 xen/common/keyhandler.c  |8 +---
 xen/common/sysctl.c  |2 +-
 xen/drivers/char/console.c   |2 +-
 xen/include/xen/keyhandler.h |4 +++-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 16de6e8..005ef99 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -75,19 +75,21 @@ static struct keyhandler {
 
 static void keypress_action(unsigned long unused)
 {
-handle_keypress(keypress_key, NULL);
+console_start_log_everything();
+key_table[keypress_key].fn(keypress_key);
+console_end_log_everything();
 }
 
 static DECLARE_TASKLET(keypress_tasklet, keypress_action, 0);
 
-void handle_keypress(unsigned char key, struct cpu_user_regs *regs)
+void handle_keypress(unsigned char key, struct cpu_user_regs *regs, bool 
force_tasklet)
 {
 struct keyhandler *h;
 
 if ( key >= ARRAY_SIZE(key_table) || !(h = &key_table[key])->fn )
 return;
 
-if ( !in_irq() || h->irq_callback )
+if ( h->irq_callback || !force_tasklet )
 {
 console_start_log_everything();
 h->irq_callback ? h->irq_fn(key, regs) : h->fn(key);
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 8aea6ef..1eb7bad 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -136,7 +136,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
 {
 if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
 goto out;
-handle_keypress(c, guest_cpu_user_regs());
+handle_keypress(c, guest_cpu_user_regs(), false);
 }
 ret = 0;
 copyback = 0;
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 55ae31a..b0f74ce 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs);
+return handle_keypress(c, regs, !in_irq());
 
 /* Deliver input to guest buffer, unless it is already full. */
 if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE )
diff --git a/xen/include/xen/keyhandler.h b/xen/include/xen/keyhandler.h
index 06c05c8..e9595bd 100644
--- a/xen/include/xen/keyhandler.h
+++ b/xen/include/xen/keyhandler.h
@@ -46,7 +46,9 @@ void register_irq_keyhandler(unsigned char key,
  bool_t diagnostic);
 
 /* Inject a keypress into the key-handling subsystem. */
-extern void handle_keypress(unsigned char key, struct cpu_user_regs *regs);
+extern void handle_keypress(unsigned char key,
+   struct cpu_user_regs *regs,
+   bool async);
 
 /* Scratch space is available for use of any keyhandler. */
 extern char keyhandler_scratch[1024];
-- 
1.7.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-18 Thread Lan Tianyu

Change since V1:
1) Update motivation for Xen vIOMMU - 288 vcpus support part
2) Change definition of struct xen_sysctl_viommu_op
	3) Update "3.5 Implementation consideration" to explain why we needs to 
enable l2 translation first.
	4) Update "4.3 Q35 vs I440x" - Linux/Windows VTD drivers can work on 
the emulated I440 chipset.

5) Remove stale statement in the "3.3 Interrupt remapping"

Content:
===
1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 l2 translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface
3.2 l2 translation
3.3 Interrupt remapping
3.4 l1 translation
3.5 Implementation consideration
4. Qemu
4.1 Qemu vIOMMU framework
4.2 Dummy xen-vIOMMU driver
4.3 Q35 vs. i440x
4.4 Report vIOMMU to hvmloader


1 Motivation for Xen vIOMMU
===
1.1 Enable more than 255 vcpu support
HPC cloud service requires VM provides high performance parallel
computing and we hope to create a huge VM with >255 vcpu on one machine
to meet such requirement.Ping each vcpus on separated pcpus. More than
255 vcpus support requires X2APIC and Linux disables X2APIC mode if
there is no interrupt remapping function which is present by vIOMMU.
Interrupt remapping function helps to deliver interrupt to #vcpu >255.
So we need to add vIOMMU before enabling >255 vcpus.

1.2 Support VFIO-based user space driver (e.g. DPDK) in the guest
It relies on the l2 translation capability (IOVA->GPA) on
vIOMMU. pIOMMU l2 becomes a shadowing structure of
vIOMMU to isolate DMA requests initiated by user space driver.


1.3 Support guest SVM (Shared Virtual Memory)
It relies on the l1 translation table capability (GVA->GPA) on
vIOMMU. pIOMMU needs to enable both l1 and l2 translation in nested
mode (GVA->GPA->HPA) for passthrough device. IGD passthrough
is the main usage today (to support OpenCL 2.0 SVM feature). In the
future SVM might be used by other I/O devices too.

2. Xen vIOMMU Architecture


* vIOMMU will be inside Xen hypervisor for following factors
1) Avoid round trips between Qemu and Xen hypervisor
2) Ease of integration with the rest of the hypervisor
3) HVMlite/PVH doesn't use Qemu
* Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
/destory vIOMMU in hypervisor and deal with virtual PCI device's l2
translation.

2.1 l2 translation overview
For Virtual PCI device, dummy xen-vIOMMU does translation in the
Qemu via new hypercall.

For physical PCI device, vIOMMU in hypervisor shadows IO page table from
IOVA->GPA to IOVA->HPA and load page table to physical IOMMU.

The following diagram shows l2 translation architecture.
+-+
|Qemu++   |
|| Virtual|   |
||   PCI device   |   |
|||   |
|++   |
||DMA |
|V|
|  ++   Request  ++   |
|  |+<---+|   |
|  |  Dummy xen vIOMMU  | Target GPA |  Memory region |   |
|  |+--->+|   |
|  +-+--++---++   |
||   ||
||Hypercall  ||
+++
|Hypervisor  |   ||
||   ||
|v   ||
| +--+--+||
| |   vIOMMU|||
| +--+--+||
||   ||
|v   ||
| +--+--+||
| | IOMMU driver|||
| +--+--+||
||   ||
+++
|HW  v   V|
| +--+--+ +-+ |
| |   IOMMU +-

Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-20 Thread Lan Tianyu

Hi Andrew:
Thanks for your review.

On 2016年10月19日 03:17, Andrew Cooper wrote:

On 18/10/16 15:14, Lan Tianyu wrote:

Change since V1:
1) Update motivation for Xen vIOMMU - 288 vcpus support part
2) Change definition of struct xen_sysctl_viommu_op
3) Update "3.5 Implementation consideration" to explain why we
needs to enable l2 translation first.
4) Update "4.3 Q35 vs I440x" - Linux/Windows VTD drivers can work
on the emulated I440 chipset.
5) Remove stale statement in the "3.3 Interrupt remapping"

Content:
===

1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 l2 translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface
3.2 l2 translation
3.3 Interrupt remapping
3.4 l1 translation
3.5 Implementation consideration
4. Qemu
4.1 Qemu vIOMMU framework
4.2 Dummy xen-vIOMMU driver
4.3 Q35 vs. i440x
4.4 Report vIOMMU to hvmloader


1 Motivation for Xen vIOMMU
===

1.1 Enable more than 255 vcpu support
HPC cloud service requires VM provides high performance parallel
computing and we hope to create a huge VM with >255 vcpu on one machine
to meet such requirement.Ping each vcpus on separated pcpus. More than


Pin ?



Sorry, it's a typo.


Also, grammatically speaking, I think you mean "each vcpu to separate
pcpus".



Yes.




255 vcpus support requires X2APIC and Linux disables X2APIC mode if
there is no interrupt remapping function which is present by vIOMMU.
Interrupt remapping function helps to deliver interrupt to #vcpu >255.


This is only a requirement for xapic interrupt sources.  x2apic
interrupt sources already deliver correctly.


The key is the APIC ID. There is no modification to existing PCI MSI and
IOAPIC with the introduction of x2apic. PCI MSI/IOAPIC can only send
interrupt message containing 8bit APIC ID, which cannot address >255
cpus. Interrupt remapping supports 32bit APIC ID so it's necessary to
enable >255 cpus with x2apic mode.

If LAPIC is in x2apic while interrupt remapping is disabled, IOAPIC
cannot deliver interrupts to all cpus in the system if #cpu > 255.







1.3 Support guest SVM (Shared Virtual Memory)
It relies on the l1 translation table capability (GVA->GPA) on
vIOMMU. pIOMMU needs to enable both l1 and l2 translation in nested
mode (GVA->GPA->HPA) for passthrough device. IGD passthrough
is the main usage today (to support OpenCL 2.0 SVM feature). In the
future SVM might be used by other I/O devices too.


As an aside, how is IGD intending to support SVM?  Will it be with PCIe
ATS/PASID, or something rather more magic as IGD is on the same piece of
silicon?


IGD on Skylake supports PCIe PASID.






2. Xen vIOMMU Architecture



* vIOMMU will be inside Xen hypervisor for following factors
1) Avoid round trips between Qemu and Xen hypervisor
2) Ease of integration with the rest of the hypervisor
3) HVMlite/PVH doesn't use Qemu
* Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
/destory vIOMMU in hypervisor and deal with virtual PCI device's l2
translation.

2.1 l2 translation overview
For Virtual PCI device, dummy xen-vIOMMU does translation in the
Qemu via new hypercall.

For physical PCI device, vIOMMU in hypervisor shadows IO page table from
IOVA->GPA to IOVA->HPA and load page table to physical IOMMU.

The following diagram shows l2 translation architecture.


Which scenario is this?  Is this the passthrough case where the Qemu
Virtual PCI device is a shadow of the real PCI device in hardware?



No, this is for traditional virtual pci device emulated by Qemu and
passthough PCI device.



+-+
|Qemu++   |
|| Virtual|   |
||   PCI device   |   |
|||   |
|++   |
||DMA |
|V|
|  ++   Request  ++   |
|  |+<---+|   |
|  |  Dummy xen vIOMMU  | Target GPA |  Memory region |   |
|  |+--->+|   |
|  +-+--++---++   |
||   ||
||Hypercall

Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-20 Thread Lan, Tianyu


On 10/19/2016 4:26 AM, Konrad Rzeszutek Wilk wrote:

On Tue, Oct 18, 2016 at 10:14:16PM +0800, Lan Tianyu wrote:



1 Motivation for Xen vIOMMU
===
1.1 Enable more than 255 vcpu support
HPC cloud service requires VM provides high performance parallel
computing and we hope to create a huge VM with >255 vcpu on one machine
to meet such requirement.Ping each vcpus on separated pcpus. More than
255 vcpus support requires X2APIC and Linux disables X2APIC mode if
there is no interrupt remapping function which is present by vIOMMU.
Interrupt remapping function helps to deliver interrupt to #vcpu >255.
So we need to add vIOMMU before enabling >255 vcpus.


What about Windows? Does it care about this?


From our test, win8 guest crashes when boot up 288 vcpus without IR and 
it can boot up with IR



3.2 l2 translation
1) For virtual PCI device
Xen dummy xen-vIOMMU in Qemu translates IOVA to target GPA via new
hypercall when DMA operation happens.

2) For physical PCI device
DMA operations go though physical IOMMU directly and IO page table for
IOVA->HPA should be loaded into physical IOMMU. When guest updates
l2 Page-table pointer field, it provides IO page table for
IOVA->GPA. vIOMMU needs to shadow l2 translation table, translate
GPA->HPA and update shadow page table(IOVA->HPA) pointer to l2
Page-table pointer to context entry of physical IOMMU.

Now all PCI devices in same hvm domain share one IO page table
(GPA->HPA) in physical IOMMU driver of Xen. To support l2
translation of vIOMMU, IOMMU driver need to support multiple address
spaces per device entry. Using existing IO page table(GPA->HPA)
defaultly and switch to shadow IO page table(IOVA->HPA) when l2


defaultly?


I mean GPA->HPA mapping will set in the assigned device's context entry 
of pIOMMU when VM creates. Just like current code works.






3.3 Interrupt remapping
Interrupts from virtual devices and physical devices will be delivered
to vlapic from vIOAPIC and vMSI. It needs to add interrupt remapping
hooks in the vmsi_deliver() and ioapic_deliver() to find target vlapic
according interrupt remapping table.


3.4 l1 translation
When nested translation is enabled, any address generated by l1
translation is used as the input address for nesting with l2
translation. Physical IOMMU needs to enable both l1 and l2 translation
in nested translation mode(GVA->GPA->HPA) for passthrough
device.

VT-d context entry points to guest l1 translation table which
will be nest-translated by l2 translation table and so it
can be directly linked to context entry of physical IOMMU.


I think this means that the shared_ept will be disabled?


The shared_ept(GPA->HPA mapping) is used to do nested translation
for any output from l1 translation(GVA->GPA).




___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-21 Thread Lan Tianyu
__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port

Signed-off-by: Lan Tianyu 
---
 xen/drivers/char/console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index b0f74ce..184b523 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs, !in_irq());
+return handle_keypress(c, regs, true);
 
 /* Deliver input to guest buffer, unless it is already full. */
 if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE )
-- 
2.9.3


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/2] Xen/timer: Process softirq during dumping timer info

2016-10-21 Thread Lan, Tianyu



On 10/22/2016 1:27 AM, Wei Liu wrote:

On Wed, Oct 12, 2016 at 03:58:24PM +0800, Lan Tianyu wrote:

Dumping timer info may run for a long time on the huge machine with
a lot of physical cpus. To avoid triggering NMI watchdog, add
process_pending_softirqs() in the loop of dumping timer info.

Reviewed-by: Konrad Rzeszutek Wilk 
Signed-off-by: Lan Tianyu 
---
 xen/common/timer.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/xen/common/timer.c b/xen/common/timer.c
index 29a60a9..ab6bca0 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -530,6 +530,7 @@ static void dump_timerq(unsigned char key)
 {
 ts = &per_cpu(timers, i);

+process_pending_softirqs();


This is causing issues in ARM (x86 has a similar issue):

Oct 20 01:43:31.410010 (XEN) Xen call trace:
Oct 20 01:43:31.410048 (XEN)[<00233920>] process_pending_softirqs+0x34/0x5c 
(PC)
Oct 20 01:43:31.417990 (XEN)[<00237c6c>] timer.c#dump_timerq+0x9c/0x1fc (LR)
Oct 20 01:43:31.418030 (XEN)[<00218658>] handle_keypress+0xc0/0xf4
Oct 20 01:43:31.426001 (XEN)[<002490c8>] console.c#__serial_rx+0x4c/0x9c
Oct 20 01:43:31.433970 (XEN)[<00249b74>] console.c#serial_rx+0xcc/0xe4
Oct 20 01:43:31.434007 (XEN)[<0024b6ec>] serial_rx_interrupt+0xcc/0xf8
Oct 20 01:43:31.441964 (XEN)[<0024ae54>] 
exynos4210-uart.c#exynos4210_uart_interrupt+0xf8/0x160
Oct 20 01:43:31.450001 (XEN)[<00256338>] do_IRQ+0x1a0/0x228
Oct 20 01:43:31.450040 (XEN)[<00254074>] gic_interrupt+0x58/0xfc
Oct 20 01:43:31.457985 (XEN)[<00260f98>] do_trap_irq+0x24/0x38
Oct 20 01:43:31.458022 (XEN)[<00264970>] entry.o#return_from_trap+0/0x4
Oct 20 01:43:31.466010 (XEN)[<0030a240>] 0030a240
Oct 20 01:43:31.466044 (XEN)
Oct 20 01:43:31.466066 (XEN)
Oct 20 01:43:31.466099 (XEN) 
Oct 20 01:43:31.473998 (XEN) Panic on CPU 0:
Oct 20 01:43:31.474029 (XEN) Assertion '!in_irq() && local_irq_is_enabled()' 
failed at softirq.c:57
Oct 20 01:43:31.481982 (XEN) 

See
http://logs.test-lab.xenproject.org/osstest/logs/101571/test-armhf-armhf-libvirt/serial-arndale-bluewater.log

I've reverted this patch in staging.

Wei.


dump_timerq() or other non-irq keyhandlers should not run in irq context 
and has sent out a fix patch.


https://lists.xen.org/archives/html/xen-devel/2016-10/msg01391.html



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-22 Thread Lan, Tianyu

On 10/21/2016 4:36 AM, Andrew Cooper wrote:







255 vcpus support requires X2APIC and Linux disables X2APIC mode if
there is no interrupt remapping function which is present by vIOMMU.
Interrupt remapping function helps to deliver interrupt to #vcpu >255.


This is only a requirement for xapic interrupt sources.  x2apic
interrupt sources already deliver correctly.


The key is the APIC ID. There is no modification to existing PCI MSI and
IOAPIC with the introduction of x2apic. PCI MSI/IOAPIC can only send
interrupt message containing 8bit APIC ID, which cannot address >255
cpus. Interrupt remapping supports 32bit APIC ID so it's necessary to
enable >255 cpus with x2apic mode.

If LAPIC is in x2apic while interrupt remapping is disabled, IOAPIC
cannot deliver interrupts to all cpus in the system if #cpu > 255.


After spending a long time reading up on this, my first observation is
that it is very difficult to find consistent information concerning the
expected content of MSI address/data fields for x86 hardware.  Having
said that, this has been very educational.

It is now clear that any MSI message can either specify an 8 bit APIC ID
directly, or request for the message to be remapped.  Apologies for my
earlier confusion.


Never minder, I will describe this more detail in the following version.





3 Xen hypervisor
==


3.1 New hypercall XEN_SYSCTL_viommu_op
This hypercall should also support pv IOMMU which is still under RFC
review. Here only covers non-pv part.

1) Definition of "struct xen_sysctl_viommu_op" as new hypercall
parameter.


Why did you choose sysctl?  As these are per-domain, domctl would be a
more logical choice.  However, neither of these should be usable by
Qemu, and we are trying to split out "normal qemu operations" into dmops
which can be safely deprivileged.



Do you know what's the status of dmop now? I just found some discussions
about design in the maillist. We may use domctl first and move to dmop
when it's ready?


I believe Paul is looking into respin the series early in the 4.9 dev
cycle.  I expect it won't take long until they are submitted.


Ok. I got it. Thanks for information.








Definition of VIOMMU subops:
#define XEN_SYSCTL_viommu_query_capability0
#define XEN_SYSCTL_viommu_create1
#define XEN_SYSCTL_viommu_destroy2
#define XEN_SYSCTL_viommu_dma_translation_for_vpdev 3

Definition of VIOMMU capabilities
#define XEN_VIOMMU_CAPABILITY_l1_translation(1 << 0)
#define XEN_VIOMMU_CAPABILITY_l2_translation(1 << 1)
#define XEN_VIOMMU_CAPABILITY_interrupt_remapping(1 << 2)


How are vIOMMUs going to be modelled to guests?  On real hardware, they
all seem to end associated with a PCI device of some sort, even if it is
just the LPC bridge.



This design just considers one vIOMMU has all PCI device under its
specified PCI Segment. "INCLUDE_PCI_ALL" bit of DRHD struct is set for
vIOMMU.


Even if the first implementation only supports a single vIOMMU, please
design the interface to cope with multiple.  It will save someone having
to go and break the API/ABI in the future when support for multiple
vIOMMUs is needed.


OK. I got.







How do we deal with multiple vIOMMUs in a single guest?


For multi-vIOMMU, we need to add new field in the struct iommu_op to
designate device scope of vIOMMUs if they are under same PCI
segment. This also needs to change DMAR table.






2) Design for subops
- XEN_SYSCTL_viommu_query_capability
   Get vIOMMU capabilities(l1/l2 translation and interrupt
remapping).

- XEN_SYSCTL_viommu_create
  Create vIOMMU in Xen hypervisor with dom_id, capabilities and reg
base address.

- XEN_SYSCTL_viommu_destroy
  Destory vIOMMU in Xen hypervisor with dom_id as parameters.

- XEN_SYSCTL_viommu_dma_translation_for_vpdev
  Translate IOVA to GPA for specified virtual PCI device with
dom id,
PCI device's bdf and IOVA and xen hypervisor returns translated GPA,
address mask and access permission.


3.2 l2 translation
1) For virtual PCI device
Xen dummy xen-vIOMMU in Qemu translates IOVA to target GPA via new
hypercall when DMA operation happens.

2) For physical PCI device
DMA operations go though physical IOMMU directly and IO page table for
IOVA->HPA should be loaded into physical IOMMU. When guest updates
l2 Page-table pointer field, it provides IO page table for
IOVA->GPA. vIOMMU needs to shadow l2 translation table, translate
GPA->HPA and update shadow page table(IOVA->HPA) pointer to l2
Page-table pointer to context entry of physical IOMMU.


How are you proposing to do this shadowing?  Do we need to trap and
emulate all writes to the vIOMMU pagetables, or is there a better way to
know when the mappings need invalidating?


No, we don't need to trap all write to IO page table.
From VTD spec 6.1, "Reporting the Caching Mode as Set for the
virtual hardware requires the guest software to explicitly issue
invalidatio

Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu

On 10/24/2016 8:19 AM, Konrad Rzeszutek Wilk wrote:

On Sat, Oct 22, 2016 at 07:23:03PM +0800, Lan Tianyu wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port


If the machine is hung with an IRQ handler being stuck, and
one does 'Ctrl-Ax3` followed by 'C' .. which would not be invoked
(as it is not an IRQ handler??


If serial port's interrupt still works in this case, the 'C'
keyhandler kexec_crash() will be invoked in a tasklet. This behavior was
changed by my patches if includes this patch.







Signed-off-by: Lan Tianyu 
---
 xen/drivers/char/console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index b0f74ce..184b523 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs, !in_irq());
+return handle_keypress(c, regs, true);

 /* Deliver input to guest buffer, unless it is already full. */
 if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE )
--
2.9.3



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu



On 10/24/2016 6:53 PM, Jan Beulich wrote:

On 22.10.16 at 13:23,  wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port

Signed-off-by: Lan Tianyu 
---
 xen/drivers/char/console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index b0f74ce..184b523 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs, !in_irq());
+return handle_keypress(c, regs, true);


Together with one of your earlier patches having got reverted, I
think we need to take a step back here instead of going back to
what was requested to be changed from v2 of the original patch.
In particular I assume that the problem you're trying to address is
not limited to dump_timerq() - at least dump_runq() should be as
problematic on many-CPU systems.


I think the issue here is that my previous patch commit
610b4eda2c("keyhandler: rework process of nonirq keyhandler") makes
non-irq keyhandler run in irq context. This is caused by input param
"!in_irq()" which is false in irq context. handle_keypress() runs 
keyhandler synchronically. This patch fixes the issue.




I think (and I vaguely recall possibly having said so during earlier
review) that dump functions the output of which depends on CPU
count should get modeled after dump_registers(), and it might be
worth abstracting this in keyhandler.c.


Yes, but this sounds like a new feature or framework rework rather than 
a fix patch.




In any case quite likely the
other patch of yours (which the one here basically modifies) may
then also want to be reverted.


I think patch "timer: process softirq during dumping timer"
does right thing. The issue is triggered by previous patch.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu



On 10/24/2016 9:38 PM, Konrad Rzeszutek Wilk wrote:

On Mon, Oct 24, 2016 at 09:29:53PM +0800, Lan, Tianyu wrote:

On 10/24/2016 8:19 AM, Konrad Rzeszutek Wilk wrote:

On Sat, Oct 22, 2016 at 07:23:03PM +0800, Lan Tianyu wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port


If the machine is hung with an IRQ handler being stuck, and
one does 'Ctrl-Ax3` followed by 'C' .. which would not be invoked
(as it is not an IRQ handler??


If serial port's interrupt still works in this case, the 'C'
keyhandler kexec_crash() will be invoked in a tasklet. This behavior was
changed by my patches if includes this patch.


Right, but the tasklet won't get to run at that point - as for example
the IRQ handler is stuck - so tasklets never get run? Or maybe
they do on another CPU?




If serial interrupt handler works, the cpu receiving serial port
interrupt should work normally. Tasklet_schedule() in the
handle_keypress() queues keyhandler tasklet to that cpu and tasklet also
should get to run at that point.
.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu



On 10/24/2016 9:54 PM, Jan Beulich wrote:

On 24.10.16 at 15:29,  wrote:

On 10/24/2016 8:19 AM, Konrad Rzeszutek Wilk wrote:

On Sat, Oct 22, 2016 at 07:23:03PM +0800, Lan Tianyu wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port


If the machine is hung with an IRQ handler being stuck, and
one does 'Ctrl-Ax3` followed by 'C' .. which would not be invoked
(as it is not an IRQ handler??


If serial port's interrupt still works in this case, the 'C'
keyhandler kexec_crash() will be invoked in a tasklet. This behavior was
changed by my patches if includes this patch.


Sorry. A typo. I meant the behavior wasn't changed by my patches.



As indicated already by Konrad's reply, this is not going to be
acceptable.






___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu



On 10/24/2016 10:31 PM, Jan Beulich wrote:

On 24.10.16 at 16:15,  wrote:




On 10/24/2016 9:54 PM, Jan Beulich wrote:

On 24.10.16 at 15:29,  wrote:

On 10/24/2016 8:19 AM, Konrad Rzeszutek Wilk wrote:

On Sat, Oct 22, 2016 at 07:23:03PM +0800, Lan Tianyu wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port


If the machine is hung with an IRQ handler being stuck, and
one does 'Ctrl-Ax3` followed by 'C' .. which would not be invoked
(as it is not an IRQ handler??


If serial port's interrupt still works in this case, the 'C'
keyhandler kexec_crash() will be invoked in a tasklet. This behavior was
changed by my patches if includes this patch.


Sorry. A typo. I meant the behavior wasn't changed by my patches.


How was it not? The softirq machinery didn't get invoked in that case
prior to your patch, afaict.



Which softirq? You mean addiing process_pending_softirqs() in the
dump_timerq()?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] Xen: Force non-irq keyhandler to be run in tasklet when receive a debugkey from serial port

2016-10-24 Thread Lan, Tianyu



On 10/24/2016 10:28 PM, Jan Beulich wrote:

On 24.10.16 at 16:01,  wrote:

On 10/24/2016 6:53 PM, Jan Beulich wrote:

On 22.10.16 at 13:23,  wrote:

__serial_rx() runs in either irq handler or timer handler and non-irq
keyhandler should not run in these contexts. So always force non-irq
keyhandler to run in tasklet when receive a debugkey from serial port

Signed-off-by: Lan Tianyu 
---
 xen/drivers/char/console.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index b0f74ce..184b523 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -347,7 +347,7 @@ static void switch_serial_input(void)
 static void __serial_rx(char c, struct cpu_user_regs *regs)
 {
 if ( xen_rx )
-return handle_keypress(c, regs, !in_irq());
+return handle_keypress(c, regs, true);


Together with one of your earlier patches having got reverted, I
think we need to take a step back here instead of going back to
what was requested to be changed from v2 of the original patch.
In particular I assume that the problem you're trying to address is
not limited to dump_timerq() - at least dump_runq() should be as
problematic on many-CPU systems.


I think the issue here is that my previous patch commit
610b4eda2c("keyhandler: rework process of nonirq keyhandler") makes
non-irq keyhandler run in irq context. This is caused by input param
"!in_irq()" which is false in irq context. handle_keypress() runs
keyhandler synchronically. This patch fixes the issue.


Not really - your earlier patch only moved the !in_irq() check, i.e.
things continued to run in the same context they always did
_except_ for the one special case you cared about.


I supposed the special case you meant is to run keyhandler in timer 
handler. It's necessary to make any timer handler run in a short time 
otherwise it will trigger watchdog problem.




Plus your
other patch fixed the respective issue only for one individual
handler, instead of generally.


So you think adding process_pending_softirqs() in the keyhandler isn't
general? But this is a common solution so far.





I think (and I vaguely recall possibly having said so during earlier
review) that dump functions the output of which depends on CPU
count should get modeled after dump_registers(), and it might be
worth abstracting this in keyhandler.c.


Yes, but this sounds like a new feature or framework rework rather than
a fix patch.


In a way, sure. It's a more extensive fix, which would avoid
someone else running into the same issue with another handler.


This seems a big change and a lot of dump function needs to rework, right?






In any case quite likely the
other patch of yours (which the one here basically modifies) may
then also want to be reverted.


I think patch "timer: process softirq during dumping timer"
does right thing. The issue is triggered by previous patch.


Well - the issue did not exist prior to both of your patches
going in, and I think it would have continued to exist if the
keyhandler rework patch alone had been reverted. (And I'm
afraid anyway that "previous" is ambiguous here, as the timer
handler change went in first.)

Jan



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-26 Thread Lan, Tianyu

On 10/26/2016 5:36 PM, Jan Beulich wrote:

On 18.10.16 at 16:14,  wrote:

1.1 Enable more than 255 vcpu support
HPC cloud service requires VM provides high performance parallel
computing and we hope to create a huge VM with >255 vcpu on one machine
to meet such requirement.Ping each vcpus on separated pcpus. More than
255 vcpus support requires X2APIC and Linux disables X2APIC mode if
there is no interrupt remapping function which is present by vIOMMU.
Interrupt remapping function helps to deliver interrupt to #vcpu >255.
So we need to add vIOMMU before enabling >255 vcpus.


I continue to dislike this completely neglecting that we can't even
have >128 vCPU-s at present. Once again - there's other work to
be done prior to lack of vIOMMU becoming the limiting factor.



Yes, we can increase vcpu from 128 to 255 first without vIOMMU support.
We have some draft patches to enable this. Andrew also will rework CPUID
policy and change the rule of allocating vcpu's APIC ID. So we will base
on it to increase vcpu number. VLAPIC also needs to be changed to
support >255 APIC ID. These jobs can be implemented parallel with vIOMMU.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-26 Thread Lan, Tianyu



On 10/26/2016 5:39 PM, Jan Beulich wrote:

On 22.10.16 at 09:32,  wrote:

On 10/21/2016 4:36 AM, Andrew Cooper wrote:

3.5 Implementation consideration
VT-d spec doesn't define a capability bit for the l2 translation.
Architecturally there is no way to tell guest that l2 translation
capability is not available. Linux Intel IOMMU driver thinks l2
translation is always available when VTD exits and fail to be loaded
without l2 translation support even if interrupt remapping and l1
translation are available. So it needs to enable l2 translation first
before other functions.


What then is the purpose of the nested translation support bit in the
extended capability register?


It's to translate output GPA from first level translation(IOVA->GPA)
to HPA.

Detail please see VTD spec - 3.8 Nested Translation
"When Nesting Enable (NESTE) field is 1 in extended-context-entries,
requests-with-PASID translated through first-level translation are also
subjected to nested second-level translation. Such extendedcontext-
entries contain both the pointer to the PASID-table (which contains the
pointer to the firstlevel translation structures), and the pointer to
the second-level translation structures."


I didn't phrase my question very well.  I understand what the nested
translation bit means, but I don't understand why we have a problem
signalling the presence or lack of nested translations to the guest.

In other words, why can't we hide l2 translation from the guest by
simply clearing the nested translation capability?


You mean to tell no support of l2 translation via nest translation bit?
But the nested translation is a different function with l2 translation
even from guest view and nested translation only works requests with
PASID (l1 translation).

Linux intel iommu driver enables l2 translation unconditionally and free
iommu instance when failed to enable l2 translation.


In which cases the wording of your description is confusing: Instead of
"Linux Intel IOMMU driver thinks l2 translation is always available when
VTD exits and fail to be loaded without l2 translation support ..." how
about using something closer to what you've replied with last?



Sorry for my pool English. Will update.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-10-28 Thread Lan Tianyu

On 2016年10月21日 04:36, Andrew Cooper wrote:

>>

>>> u64 iova;
>>> /* Out parameters. */
>>> u64 translated_addr;
>>> u64 addr_mask; /* Translation page size */
>>> IOMMUAccessFlags permisson;

>>
>> How is this translation intended to be used?  How do you plan to avoid
>> race conditions where qemu requests a translation, receives one, the
>> guest invalidated the mapping, and then qemu tries to use its translated
>> address?
>>
>> There are only two ways I can see of doing this race-free.  One is to
>> implement a "memcpy with translation" hypercall, and the other is to
>> require the use of ATS in the vIOMMU, where the guest OS is required to
>> wait for a positive response from the vIOMMU before it can safely reuse
>> the mapping.
>>
>> The former behaves like real hardware in that an intermediate entity
>> performs the translation without interacting with the DMA source.  The
>> latter explicitly exposing the fact that caching is going on at the
>> endpoint to the OS.

>
> The former one seems to move DMA operation into hypervisor but Qemu
> vIOMMU framework just passes IOVA to dummy xen-vIOMMU without input
> data and access length. I will dig more to figure out solution.

Yes - that does in principle actually move the DMA out of Qemu.


Hi Adnrew:

The first solution "Move the DMA out of Qemu": Qemu vIOMMU framework
just give a chance of doing DMA translation to dummy xen-vIOMMU device
model and DMA access operation is in the vIOMMU core code. It's hard to
move this out. There are a lot of places to call translation callback
and some these are not for DMA access(E,G Map guest memory in Qemu).

The second solution "Use ATS to sync invalidation operation.": This
requires to enable ATS for all virtual PCI devices. This is not easy to do.

The following is my proposal:
When IOMMU driver invalidates IOTLB, it also will wait until the
invalidation completion. We may use this to drain in-fly DMA operation.

Guest triggers invalidation operation and trip into vIOMMU in
hypervisor to flush cache data. After this, it should go to Qemu to
drain in-fly DMA translation.

To do that, dummy vIOMMU in Qemu registers the same MMIO region as
vIOMMU's and emulation part of invalidation operation returns
X86EMUL_UNHANDLEABLE after flush cache. MMIO emulation part is supposed
to send event to Qemu and dummy vIOMMU get a chance to starts a thread
to drain in-fly DMA and return emulation done.

Guest polls IVT(invalidate IOTLB) bit in the IOTLB invalidate register
until it's cleared. Dummy vIOMMU notifies vIOMMU drain operation
completed via hypercall, vIOMMU clears IVT bit and guest finish
invalidation operation.

--
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V2

2016-11-03 Thread Lan, Tianyu



On 10/26/2016 5:39 PM, Jan Beulich wrote:

On 22.10.16 at 09:32,  wrote:

On 10/21/2016 4:36 AM, Andrew Cooper wrote:

3.5 Implementation consideration
VT-d spec doesn't define a capability bit for the l2 translation.
Architecturally there is no way to tell guest that l2 translation
capability is not available. Linux Intel IOMMU driver thinks l2
translation is always available when VTD exits and fail to be loaded
without l2 translation support even if interrupt remapping and l1
translation are available. So it needs to enable l2 translation first
before other functions.


What then is the purpose of the nested translation support bit in the
extended capability register?


It's to translate output GPA from first level translation(IOVA->GPA)
to HPA.

Detail please see VTD spec - 3.8 Nested Translation
"When Nesting Enable (NESTE) field is 1 in extended-context-entries,
requests-with-PASID translated through first-level translation are also
subjected to nested second-level translation. Such extendedcontext-
entries contain both the pointer to the PASID-table (which contains the
pointer to the firstlevel translation structures), and the pointer to
the second-level translation structures."


I didn't phrase my question very well.  I understand what the nested
translation bit means, but I don't understand why we have a problem
signalling the presence or lack of nested translations to the guest.

In other words, why can't we hide l2 translation from the guest by
simply clearing the nested translation capability?


You mean to tell no support of l2 translation via nest translation bit?
But the nested translation is a different function with l2 translation
even from guest view and nested translation only works requests with
PASID (l1 translation).

Linux intel iommu driver enables l2 translation unconditionally and free
iommu instance when failed to enable l2 translation.


In which cases the wording of your description is confusing: Instead of
"Linux Intel IOMMU driver thinks l2 translation is always available when
VTD exits and fail to be loaded without l2 translation support ..." how
about using something closer to what you've replied with last?

Jan



Hi All:
I have some updates about implementation dependency between l2
translation(DMA translation) and irq remapping.

I find there are a kernel parameter "intel_iommu=on" and kconfig option
CONFIG_INTEL_IOMMU_DEFAULT_ON which control DMA translation function.
When they aren't set, DMA translation function will not be enabled by
IOMMU driver even if some vIOMMU registers show L2 translation function
available. In the meantime, irq remapping function still can work to
support >255 vcpus.

I check distribution RHEL, SLES, Oracle and ubuntu don't set the kernel
parameter or select the kconfig option. So we can emulate irq remapping
fist with some capability bits(e,g SAGAW of Capability Register) of l2
translation for >255 vcpus support without l2 translation emulation.

Showing l2 capability bits is to make sure IOMMU driver probe ACPI DMAR
tables successfully because IOMMU driver access these bits during
reading ACPI tables.

If someone add "intel_iommu=on" kernel parameter manually, IOMMU driver
will panic guest because it can't enable DMA remapping function via gcmd
register and "Translation Enable Status" bit in gsts register is never
set by vIOMMU. This shows actual vIOMMU status of no l2 translation
emulation and warn user should not enable l2 translation.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen virtual IOMMU high level design doc V3

2016-11-17 Thread Lan Tianyu

Change since V2:
1) Update motivation for Xen vIOMMU - 288 vcpus support part
Add descriptor about plan of increasing vcpu from 128 to 255 and
dependency between X2APIC and interrupt remapping.
2) Update 3.1 New vIOMMU hypercall interface
Change vIOMMU hypercall from sysctl to dmop, add multi vIOMMU
consideration consideration and drain in-fly DMA subcommand
3) Update 3.5 implementation consideration
	We found it's still safe to enable interrupt remapping function before 
adding l2 translation(DMA translation) to increase vcpu number >255.

4) Update 3.2 l2 translation - virtual device part
	Add proposal to deal with race between in-fly DMA and invalidation 
operation in hypervisor.

5) Update 4.4 Report vIOMMU to hvmloader
Add option of building ACPI DMAR table in the toolstack for discussion.

Change since V1:
1) Update motivation for Xen vIOMMU - 288 vcpus support part
2) Change definition of struct xen_sysctl_viommu_op
3) Update "3.5 Implementation consideration" to explain why we needs to
enable l2 translation first.
4) Update "4.3 Q35 vs I440x" - Linux/Windows VTD drivers can work on
the emulated I440 chipset.
5) Remove stale statement in the "3.3 Interrupt remapping"

Content:
===
1. Motivation of vIOMMU
1.1 Enable more than 255 vcpus
1.2 Support VFIO-based user space driver
1.3 Support guest Shared Virtual Memory (SVM)
2. Xen vIOMMU Architecture
2.1 l2 translation overview
2.2 Interrupt remapping overview
3. Xen hypervisor
3.1 New vIOMMU hypercall interface
3.2 l2 translation
3.3 Interrupt remapping
3.4 l1 translation
3.5 Implementation consideration
4. Qemu
4.1 Qemu vIOMMU framework
4.2 Dummy xen-vIOMMU driver
4.3 Q35 vs. i440x
4.4 Report vIOMMU to hvmloader


Glossary:

l1 translation - first-level translation to remap a virtual address to
intermediate (guest) physical address. (GVA->GPA)
l2 translation - second-level translations to remap a intermediate
physical address to machine (host) physical address. (GPA->HPA)

1 Motivation for Xen vIOMMU

1.1 Enable more than 255 vcpu support
HPC cloud service requires VM provides high performance parallel
computing and we hope to create a huge VM with >255 vcpu on one machine
to meet such requirement. Pin each vcpu to separate pcpus.

Now HVM guest can support 128 vcpus at most. We can increase vcpu number
from 128 to 255 via changing some limitations and extending vcpu related
data structure. This also needs to change the rule of allocating vcpu's
APIC ID. Current rule is "(APIC ID) = (vcpu index) * 2". We need to
change it to "(APIC ID) = (vcpu index)". Andrew Cooper's CPUID
improvement work will cover this to improve guest's cpu topology. We
will base on this to increase vcpu number from 128 to 255.

To support >255 vcpus, X2APIC mode in guest is necessary because legacy
APIC(XAPIC) just supports 8-bit APIC ID and it only can support 255
vcpus at most. X2APIC mode supports 32-bit APIC ID and it requires
interrupt mapping function of vIOMMU.

The reason for this is that there is no modification to existing PCI MSI
and IOAPIC with the introduction of X2APIC. PCI MSI/IOAPIC can only send
interrupt message containing 8-bit APIC ID, which cannot address >255
cpus. Interrupt remapping supports 32-bit APIC ID and so it's necessary
to enable >255 cpus with x2apic mode.

Both Linux and Windows requires interrupt remapping when cpu number is >255.


1.2 Support VFIO-based user space driver (e.g. DPDK) in the guest
It relies on the l2 translation capability (IOVA->GPA) on
vIOMMU. pIOMMU l2 becomes a shadowing structure of
vIOMMU to isolate DMA requests initiated by user space driver.



1.3 Support guest SVM (Shared Virtual Memory)
It relies on the l1 translation table capability (GVA->GPA) on
vIOMMU. pIOMMU needs to enable both l1 and l2 translation in nested
mode (GVA->GPA->HPA) for passthrough device. IGD passthrough
is the main usage today (to support OpenCL 2.0 SVM feature). In the
future SVM might be used by other I/O devices too.



2. Xen vIOMMU Architecture


* vIOMMU will be inside Xen hypervisor for following factors
1) Avoid round trips between Qemu and Xen hypervisor
2) Ease of integration with the rest of the hypervisor
3) HVMlite/PVH doesn't use Qemu
* Dummy xen-vIOMMU in Qemu as a wrapper of new hypercall to create
/destroy vIOMMU in hypervisor and deal with virtual PCI device's l2
translation.

2.1 l2 translation overview
For Virtual PCI device, dummy xen-vIOMMU does translation in the
Qemu via new 

Re: [Xen-devel] Xen virtual IOMMU high level design doc V3

2016-11-20 Thread Lan, Tianyu



On 11/19/2016 3:43 AM, Julien Grall wrote:

Hi Lan,

On 17/11/2016 09:36, Lan Tianyu wrote:


1) Definition of "struct xen_dmop_viommu_op" as new hypercall parameter.

struct xen_dmop_viommu_op {
u32 cmd;
u32 domid;
u32 viommu_id;
union {
struct {
u32 capabilities;
} query_capabilities;
struct {
/* IN parameters. */
u32 capabilities;
u64 base_address;
struct {
u32 size;
XEN_GUEST_HANDLE_64(uint32) dev_list;
} dev_scope;
/* Out parameters. */
u32 viommu_id;
} create_iommu;
struct {
/* IN parameters. */
u32 vsbdf;


I only gave a quick look through this design document. The new
hypercalls looks arch/device agnostic except this part.

Having a virtual IOMMU on Xen ARM is something we might consider in the
future.

In the case of ARM, a device can either be a PCI device or integrated
device. The latter does not have a sbdf. The IOMMU will usually be
configured with a stream ID (SID) that can be deduced from the sbdf and
hardcoded for integrated device.

So I would rather not tie the interface to PCI and use a more generic
name for this field. Maybe vdevid, which then can be architecture specific.


Hi Julien:
	Thanks for your input. This interface is just for virtual PCI device 
which is called by Qemu. I am not familiar with ARM. Are there any 
non-PCI emulated devices for arm in Qemu which need to be covered by vIOMMU?


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V3

2016-11-22 Thread Lan Tianyu
On 2016年11月21日 21:41, Andrew Cooper wrote:
> On 17/11/16 15:36, Lan Tianyu wrote:
>> 3.2 l2 translation
>> 1) For virtual PCI device
>> Xen dummy xen-vIOMMU in Qemu translates IOVA to target GPA via new
>> hypercall when DMA operation happens.
>>
>> When guest triggers a invalidation operation, there maybe in-fly DMA
>> request for virtual device has been translated by vIOMMU and return back
>> Qemu. Before vIOMMU tells invalidation completed, it's necessary to make
>> sure in-fly DMA operation is completed.
>>
>> When IOMMU driver invalidates IOTLB, it also will wait until the
>> invalidation completion. We may use this to drain in-fly DMA operation
>> for virtual device.
>>
>> Guest triggers invalidation operation and trip into vIOMMU in
>> hypervisor to flush cache data. After this, it should go to Qemu to
>> drain in-fly DMA translation.
>>
>> To do that, dummy vIOMMU in Qemu registers the same MMIO region as
>> vIOMMU's and emulation part of invalidation operation in Xen hypervisor
>> returns X86EMUL_UNHANDLEABLE after flush cache. MMIO emulation part is
>> supposed to send event to Qemu and dummy vIOMMU get a chance to starts a
>> thread to drain in-fly DMA and return emulation done.
>>
>> Guest polls IVT(invalidate IOTLB) bit in the IOTLB invalidate register
>> until it's cleared after triggering invalidation. Dummy vIOMMU in Qemu
>> notifies hypervisor drain operation completed via hypercall, vIOMMU
>> clears IVT bit and guest finish invalidation operation.
> 
> Having the guest poll will be very inefficient.  If the invalidation
> does need to reach qemu, it will be a very long time until it
> completes.  Is there no interrupt based mechanism which can be used? 
> That way the guest can either handle it asynchronous itself, or block
> waiting on an interrupt, both of which are better than having it just
> spinning.
> 

Hi Andrew:
VTD provides interrupt event for Queue invalidation completion. So guest
can select poll or interrupt mode to wait for invalidation completion. I
found Linux Intel IOMMU driver just used poll mode and so used it for
example. Regardless of poll and interrupt mode, guest will wait for
invalidation completion and we just need to make sure to finish draining
in-fly DMA before clearing invalidation completion bit.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V3

2016-11-22 Thread Lan Tianyu
On 2016年11月21日 15:05, Tian, Kevin wrote:
>> If someone add "intel_iommu=on" kernel parameter manually, IOMMU driver
>> > will panic guest because it can't enable DMA remapping function via gcmd
>> > register and "Translation Enable Status" bit in gsts register is never
>> > set by vIOMMU. This shows actual vIOMMU status that there is no l2
>> > translation support and warn user should not enable l2 translation.
> The rationale of section 3.5 is confusing. Do you mean sth. like below?
> 
> - We can first do IRQ remapping, because DMA remapping (l1/l2) and 
> IRQ remapping can be enabled separately according to VT-d spec. Enabling 
> of DMA remapping will be first emulated as a failure, which may lead
> to guest kernel panic if intel_iommu is turned on in the guest. But it's
> not a big problem because major distributions have DMA remapping
> disabled by default while IRQ remapping is enabled.
> 
> - For DMA remapping, likely you'll enable L2 translation first (there is
> no capability bit) with L1 translation disabled (there is a SVM capability 
> bit). 
> 
> If yes, maybe we can break this design into 3 parts too, so both
> design review and implementation side can move forward step by
> step?
> 

Yes, we may implement IRQ remapping first. I will break this design into
3 parts(interrupt remapping, L2 translation and L1 translation). IRQ
remapping will be first one to be sent out for detail discussion.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc V3

2016-11-23 Thread Lan Tianyu
On 2016年11月22日 18:24, Jan Beulich wrote:
 On 17.11.16 at 16:36,  wrote:
>> 2) Build ACPI DMAR table in toolstack
>> Now tool stack can boot ACPI DMAR table according VM configure and pass
>> though it to hvmloader via xenstore ACPI PT channel. But the vIOMMU MMIO
>> region is managed by Qemu and it's need to be populated into DMAR
>> table. We may hardcore an address in both Qemu and toolstack and use the 
>> same address to create vIOMMU and build DMAR table.
> 
> Let's try to avoid any new hard coding of values. Both tool stack
> and qemu ought to be able to retrieve a suitable address range
> from the hypervisor. Or if the tool stack was to allocate it, it could
> tell qemu.
> 
> Jan
> 

Hi Jan:
The address range is allocated by Qemu or toolstack and pass to
hypervisor when create vIOMMU. The vIOMMU's address range should be
under PCI address sapce and so we need to reserve a piece of PCI region
for vIOMMU in the toolstack. Then, populate base address in the vDMAR
table and tell Qemu the region via new xenstore interface if we want to
create vIOMMU in the Qemu dummy hypercall wrapper.

Another point, I am not sure whether we can create/destroy vIOMMU
directly in toolstack because virtual device models usually are handled
by Qemu. If yes, we don't need new Xenstore interface. In this case, the
dummy vIOMMU in Qemu will just cover L2 translation for virtual device.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-11-23 Thread Lan Tianyu
On 2016年11月24日 12:09, Edgar E. Iglesias wrote:
 Hi,
 > > >
 > > > I have a few questions.
 > > >
 > > > If I understand correctly, you'll be emulating an Intel IOMMU in Xen.
 > > > So guests will essentially create intel iommu style page-tables.
 > > >
 > > > If we were to use this on Xen/ARM, we would likely be modelling an 
 > > > ARM
 > > > SMMU as a vIOMMU. Since Xen on ARM does not use QEMU for emulation, 
 > > > the
 > > > hypervisor OPs for QEMUs xen dummy IOMMU queries would not really be 
 > > > used.
 > > > Do I understand this correctly?
>>> > > 
>>> > > I think they could be called from the toolstack. This is why I was
>>> > > saying in the other thread that the hypercalls should be general enough
>>> > > that QEMU is not the only caller.
>>> > > 
>>> > > For PVH and ARM guests, the toolstack should be able to setup the vIOMMU
>>> > > on behalf of the guest without QEMU intervention.
> OK, I see. Or, I think I understand, not sure :-)
> 
> In QEMU when someone changes mappings in an IOMMU there will be a notifier
> to tell caches upstream that mappings have changed. I think we will need to
> prepare for that. I.e when TCG CPUs sit behind an IOMMU.

For Xen side, we may notify pIOMMU driver about mapping change via
calling pIOMMU driver's API in vIOMMU.

> 
> Another area that may need change is that on ARM we need the map-query to 
> return
> the memory attributes for the given mapping. Today QEMU or any emulator 
> doesn't use it much but in the future things may change.
> 
> For SVM, whe will also need to deal with page-table faults by the IOMMU.
> So I think there will need to be a channel from Xen to Guesrt to report these.

Yes, vIOMMU should forward the page-fault event to guest. For VTD side,
we will trigger VTD's interrupt to notify guest about the event.

> 
> For example, what happens when a guest assigned DMA unit page-faults?
> Xen needs to know how to forward this fault back to guest for fixup and the
> guest needs to be able to fix it and tell the device that it's OK to contine.
> E.g PCI PRI or similar.
> 
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen virtual IOMMU high level design doc

2016-11-24 Thread Lan, Tianyu



On 11/24/2016 9:37 PM, Edgar E. Iglesias wrote:

On Thu, Nov 24, 2016 at 02:49:41PM +0800, Lan Tianyu wrote:

On 2016年11月24日 12:09, Edgar E. Iglesias wrote:

Hi,


I have a few questions.

If I understand correctly, you'll be emulating an Intel IOMMU in Xen.
So guests will essentially create intel iommu style page-tables.

If we were to use this on Xen/ARM, we would likely be modelling an ARM
SMMU as a vIOMMU. Since Xen on ARM does not use QEMU for emulation, the
hypervisor OPs for QEMUs xen dummy IOMMU queries would not really be used.
Do I understand this correctly?


I think they could be called from the toolstack. This is why I was
saying in the other thread that the hypercalls should be general enough
that QEMU is not the only caller.

For PVH and ARM guests, the toolstack should be able to setup the vIOMMU
on behalf of the guest without QEMU intervention.

OK, I see. Or, I think I understand, not sure :-)

In QEMU when someone changes mappings in an IOMMU there will be a notifier
to tell caches upstream that mappings have changed. I think we will need to
prepare for that. I.e when TCG CPUs sit behind an IOMMU.


For Xen side, we may notify pIOMMU driver about mapping change via
calling pIOMMU driver's API in vIOMMU.


I was refering to the other way around. When a guest modifies the mappings
for a vIOMMU, the driver domain with QEMU and vDevices needs to be notified.

I couldn't find any mention of this in the document...


Qemu side won't have iotlb cache and all DMA translation info are in the 
hypervisor. All vDevice's DMA requests are passed to hypervisor, 
hypervisor returns back translated address and then Qemu finish the DMA 
operation finally.


There is a race condition between iotlb invalidation operation and 
vDevices' in-fly DMA. We proposed a solution in "3.2 l2 translation - 
For virtual PCI device". We hope to take advantage of current ioreq 
mechanism to achieve something like notifier.


Both vIOMMU in hypervisor and dummy vIOMMU in Qemu register the same 
MMIO region. When there is a invalidation MMIO access and hypervisor 
want to notify Qemu, vIOMMU's MMIO handler returns X86EMUL_UNHANDLEABLE 
and io emulation handler is supposed to send IO request to Qemu. Dummy 
vIOMMU in Qemu receives the event and start to drain in-fly DMA 
operation.










Another area that may need change is that on ARM we need the map-query to return
the memory attributes for the given mapping. Today QEMU or any emulator
doesn't use it much but in the future things may change.


What about the mem attributes?
It's very likely we'll add support for memory attributes for IOMMU's in QEMU
at some point.
Emulated IOMMU's will thus have the ability to modify attributes (i.e 
SourceID's,
cacheability, etc). Perhaps we could allocate or reserve an uint64_t
for attributes TBD later in the query struct.


Sounds like you hope to extend capability variable in the query struct 
to uint64_t to support more future feature, right?


I have added "permission" variable in struct l2_translation to return 
vIOMMU's memory access permission for vDevice's DMA request. No sure it 
can meet your requirement.








For SVM, whe will also need to deal with page-table faults by the IOMMU.
So I think there will need to be a channel from Xen to Guesrt to report these.


Yes, vIOMMU should forward the page-fault event to guest. For VTD side,
we will trigger VTD's interrupt to notify guest about the event.


OK, Cool.

Perhaps you should document how this (and the map/unmap notifiers) will work?


This is VTD specific to deal with some fault events and just like some 
other virtual device models emulate its interrupt. So I didn't put this 
in this design document.


For mapping change, please see the fist comments.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-06-26 Thread Lan, Tianyu

On 6/8/2016 4:11 PM, Tian, Kevin wrote:

It makes sense... I thought you used this security issue against
placing vIOMMU in Qemu, which made me a bit confused earlier. :-)

We are still thinking feasibility of some staging plan, e.g. first
implementing some vIOMMU features w/o dependency on root-complex in
Xen (HVM only) and then later enabling full vIOMMU feature w/
root-complex in Xen (covering HVMLite). If we can reuse most code
between two stages while shorten time-to-market by half (e.g. from
2yr to 1yr), it's still worthy of pursuing. will report back soon
once the idea is consolidated...

Thanks Kevin



After discussion with Kevin, we draft a staging plan of implementing
vIOMMU in Xen based on Qemu host bridge. Both virtual devices and
passthough devices use one vIOMMU in Xen. Your comments are very 
appreciated.


1. Enable Q35 support in the hvmloader.
In the real world, VTD support starts from Q35 and OS may have such
assumption that VTD only exists on the Q35 or newer platform.
Q35 support seems necessary for vIOMMU support.

In regardless of Q35 host bridge in the Qemu or Xen hypervisor,
hvmloader needs to be compatible with Q35 and build Q35 ACPI tables.

Qemu already has Q35 emulation and so the hvmloader job can start with
Qemu. When host bridge in Xen is ready, these changes also can be reused.

2. Implement vIOMMU in Xen based on Qemu host bridge.
Add a new device type "Xen iommu" in the Qemu as a wrapper of vIOMMU
hypercalls to communicate with Xen vIOMMU.

It's in charge of:
1) Query vIOMMU capability(E,G interrupt remapping, DMA translation, SVM
and so on)
2) Create vIOMMU with predefined base address of IOMMU unit regs
3) Notify hvmloader to populate related content in the ACPI DMAR
table.(Add vIOMMU info to struct hvm_info_table)
4) Deal with DMA translation request of virtual devices and return
back translated address.
5) Attach/detach hotplug device from vIOMMU


New hypercalls for vIOMMU that are also necessary when host bridge in Xen.
1) Query vIOMMU capability
2) Create vIOMMU(IOMMU unit reg base as params)
3) Virtual device's DMA translation
4) Attach/detach hotplug device from VIOMMU


All IOMMU emulations will be done in Xen
1) DMA translation
2) Interrupt remapping
3) Shared Virtual Memory (SVM)

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-07-05 Thread Lan, Tianyu

Hi Stefano, Andrew and Jan:
Could you give us more guides here to move forward virtual iommu 
development? Thanks.


On 6/29/2016 11:04 AM, Tian, Kevin wrote:

From: Lan, Tianyu
Sent: Sunday, June 26, 2016 9:43 PM

On 6/8/2016 4:11 PM, Tian, Kevin wrote:

It makes sense... I thought you used this security issue against
placing vIOMMU in Qemu, which made me a bit confused earlier. :-)

We are still thinking feasibility of some staging plan, e.g. first
implementing some vIOMMU features w/o dependency on root-complex in
Xen (HVM only) and then later enabling full vIOMMU feature w/
root-complex in Xen (covering HVMLite). If we can reuse most code
between two stages while shorten time-to-market by half (e.g. from
2yr to 1yr), it's still worthy of pursuing. will report back soon
once the idea is consolidated...

Thanks Kevin



After discussion with Kevin, we draft a staging plan of implementing
vIOMMU in Xen based on Qemu host bridge. Both virtual devices and
passthough devices use one vIOMMU in Xen. Your comments are very
appreciated.


The rationale here is to separate BIOS structures from actual vIOMMU
emulation. vIOMMU will be always emulated in Xen hypervisor, regardless of
where Q35 emulation is done or whether it's HVM or HVMLite. The staging
plan is more for the BIOS structure reporting which is Q35 specific. For now
we first target Qemu Q35 emulation, with a set of vIOMMU ops introduced
as Tianyu listed below to help interact between Qemu and Xen. Later when
Xen Q35 emulation is ready, the reporting can be done in Xen.

The main limitation of this model is on DMA emulation of Qemu virtual
devices, which needs to query Xen vIOMMU for every virtual DMA. It is
possibly fine for virtual devices which are normally not for performance
critical usages. Also there may be some chance to cache some translations
within Qemu like thru ATS (may not worthy of it though...).



1. Enable Q35 support in the hvmloader.
In the real world, VTD support starts from Q35 and OS may have such
assumption that VTD only exists on the Q35 or newer platform.
Q35 support seems necessary for vIOMMU support.

In regardless of Q35 host bridge in the Qemu or Xen hypervisor,
hvmloader needs to be compatible with Q35 and build Q35 ACPI tables.

Qemu already has Q35 emulation and so the hvmloader job can start with
Qemu. When host bridge in Xen is ready, these changes also can be reused.

2. Implement vIOMMU in Xen based on Qemu host bridge.
Add a new device type "Xen iommu" in the Qemu as a wrapper of vIOMMU
hypercalls to communicate with Xen vIOMMU.

It's in charge of:
1) Query vIOMMU capability(E,G interrupt remapping, DMA translation, SVM
and so on)
2) Create vIOMMU with predefined base address of IOMMU unit regs
3) Notify hvmloader to populate related content in the ACPI DMAR
table.(Add vIOMMU info to struct hvm_info_table)
4) Deal with DMA translation request of virtual devices and return
back translated address.
5) Attach/detach hotplug device from vIOMMU


New hypercalls for vIOMMU that are also necessary when host bridge in Xen.
1) Query vIOMMU capability
2) Create vIOMMU(IOMMU unit reg base as params)
3) Virtual device's DMA translation
4) Attach/detach hotplug device from VIOMMU


We don't need 4). Hotplug device is automatically handled by the vIOMMU
with INCLUDE_ALL flag set (which should be the case if we only have one
vIOMMU in Xen). We don't need further notify this event to Xen vIOMMU.

And once we have Xen Q35 emulation in place, possibly only 3) is required
then.




All IOMMU emulations will be done in Xen
1) DMA translation
2) Interrupt remapping
3) Shared Virtual Memory (SVM)


Please let us know your thoughts. If no one has explicit objection based
on above rough idea, we'll go to write the high level design doc for more
detail discussion.

Thanks
Kevin



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Discussion about virtual iommu support for Xen guest

2016-07-05 Thread Lan, Tianyu



On 7/5/2016 9:57 PM, Jan Beulich wrote:

On 05.07.16 at 15:37,  wrote:

Hi Stefano, Andrew and Jan:
Could you give us more guides here to move forward virtual iommu
development? Thanks.


Due to ...


On 6/29/2016 11:04 AM, Tian, Kevin wrote:

Please let us know your thoughts. If no one has explicit objection based
on above rough idea, we'll go to write the high level design doc for more
detail discussion.


... this I actually expected we'd get to see something, rather than
our input being waited for.


OK. I get it. Because no response, double confirm we are on the
right way.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 0/4] Qemu: Add Xen vIOMMU support

2017-03-20 Thread Lan Tianyu
On 2017年03月20日 19:38, Paolo Bonzini wrote:
> Fair enough, though I'd be worried about increasing the attack surface
> of the hypervisor.  For KVM, for example, IOMMU emulation requires using
> the "split irqchip" feature to move the PIC and IOAPIC out of the kernel
> and back to QEMU.

Yes, just like Roger mentioned we also need to support no-qemu mode on
Xen and this is tradeoff result.

> 
> Also, I think this series is missing changes to support IOMMU
> translation in the vIOMMU device model.

Yes, this series just enabled vIOMMU's irq remapping function and we
need to pass virtual device's DMA request to Xen hypervisor for
translation when enable DMA translation.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 00/23] xen/vIOMMU: Add vIOMMU support with irq remapping fucntion on Intel platform

2017-03-20 Thread Lan Tianyu
On 2017年03月20日 22:23, Roger Pau Monné wrote:
> Thanks! So you add all this vIOMMU code, but the maximum number of allowed
> vCPUs for HVM guests is still limited to 128 (HVM_MAX_VCPUS is not touched). 
> Is
> there any missing pieces in order to bump this?

To increase vcpu number, we need to change APIC ID rule and now it's
APICID = VCPUID * 2. Andrew's CPUID improvement will change it and so
our following patches of increasing vcpu number will base on Andrew's job.


> 
> Also, have you tested if this series works with PVH guests? Boris added PVH
> support to Linux not long ago, so you should be able to test it just by 
> picking
> the latest Linux kernel.

Our patchset just targets hvm guest and it will not work for PV guest.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 00/23] xen/vIOMMU: Add vIOMMU support with irq remapping fucntion on Intel platform

2017-03-20 Thread Lan Tianyu
On 2017年03月21日 10:28, Lan Tianyu wrote:
> On 2017年03月20日 22:23, Roger Pau Monné wrote:
>> Thanks! So you add all this vIOMMU code, but the maximum number of allowed
>> vCPUs for HVM guests is still limited to 128 (HVM_MAX_VCPUS is not touched). 
>> Is
>> there any missing pieces in order to bump this?
> 
> To increase vcpu number, we need to change APIC ID rule and now it's
> APICID = VCPUID * 2. Andrew's CPUID improvement will change it and so
> our following patches of increasing vcpu number will base on Andrew's job.
> 
> 
>>
>> Also, have you tested if this series works with PVH guests? Boris added PVH
>> support to Linux not long ago, so you should be able to test it just by 
>> picking
>> the latest Linux kernel.
> 
> Our patchset just targets hvm guest and it will not work for PV guest. 

New hypercalls introduced by this patchset also can reuse for PVH to
enable vIOMMU. This patchset relies on Qemu Xen-vIOMMU device model to
create/destroy vIOMMU. If we want to enable DMA translation for hvm
guest later, virtual device's DMA request would be passed from Qemu to
Xen hypervisor and the device model in Qemu is necessary.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 1/23] VIOMMU: Add vIOMMU helper functions to create, destroy and query capabilities

2017-03-22 Thread Lan Tianyu
Hi  Julien:
Thanks for review.

On 2017年03月22日 03:56, Julien Grall wrote:
> ===
> 
> diff --git a/xen/include/public/viommu.h b/xen/include/public/viommu.h
> new file mode 100644
> index 000..ca2419b
> 
> --- /dev/null
> 
> +++ b/xen/include/public/viommu.h
> 
> @@ -0,0 +1,9 @@
> 
> +/*
> +·*·include/public/viommu.h
> +·*
> +·*·Copyright·(c)·2017·Intel·Corporation
> +·*·Author:·Lan·Tianyu·
> +·*
> +·*·This·program·is·free·software;·you·can·redistribute·it·and/or·modify·it
> +·*·under·the·terms·and·conditions·of·the·GNU·General·Public·License,
> +·*·version·2,·as·published·by·the·Free·Software·Foundation.
> 
>  obj-y += vmap.o
>  obj-y += vsprintf.o
>  obj-y += wait.o
> +obj-y += viommu.o
> I see very little point to enable viommu by default on all architecture. 
> This is x86 specific and I am yet sure how we would be able to use it on 
> ARM as the current series rely on QEMU. Also this is waste space in 
> struct domain.

XEN_DMOP_create/destroy_viommu hypercalls we introduced are generic for
all platforms and can use in toolstack to create/destroy vIOMMU rather
than just in Qemu. This takes PVH case into account which also don't use
Qemu.


> I would prefer if you introduce a Kconfig that would be select by x86 only.
> Regards,


> Public headers sould not be GPLv2 otherwise it will cause some trouble 
> for non-GPLv2 OS. See the license in xen/include/public/COPYING.

Yes, it should be MIT license.
> 
> Regards.
> 
> -- Julien Grall


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 1/23] VIOMMU: Add vIOMMU helper functions to create, destroy and query capabilities

2017-03-22 Thread Lan, Tianyu

On 3/22/2017 4:36 PM, Tian, Kevin wrote:

From: Julien Grall [mailto:julien.gr...@arm.com]
Sent: Wednesday, March 22, 2017 3:57 AM



diff --git a/xen/common/Makefile b/xen/common/Makefile index
0fed30b..b58de63 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -60,6 +60,7 @@ obj-y += vm_event.o
 obj-y += vmap.o
 obj-y += vsprintf.o
 obj-y += wait.o
+obj-y += viommu.o


I see very little point to enable viommu by default on all architecture.
This is x86 specific and I am yet sure how we would be able to use it on ARM
as the current series rely on QEMU. Also this is waste space in struct domain.

I would prefer if you introduce a Kconfig that would be select by x86 only.

Regards,



Also viommu.c is too generic. Each vendor should has his own
implementation. better change to vvtd.c (and make more sense
move to hvm)


Hi Kevin:
vIommu is an abstract layer and we have added vvtd.c under hvm 
directory in the following patch. vvtd will register its callbacks to 
vIOMMU layer. This works just like IOMMU core and VTD driver.




Thanks
Kevin



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 1/23] VIOMMU: Add vIOMMU helper functions to create, destroy and query capabilities

2017-03-22 Thread Lan, Tianyu

On 3/22/2017 7:40 PM, Julien Grall wrote:

Hello,

On 22/03/17 08:45, Lan Tianyu wrote:

Hi  Julien:
Thanks for review.

On 2017年03月22日 03:56, Julien Grall wrote:

===

diff --git a/xen/include/public/viommu.h b/xen/include/public/viommu.h
new file mode 100644
index 000..ca2419b

--- /dev/null

+++ b/xen/include/public/viommu.h

@@ -0,0 +1,9 @@

+/*
+·*·include/public/viommu.h
+·*
+·*·Copyright·(c)·2017·Intel·Corporation
+·*·Author:·Lan·Tianyu·
+·*
+·*·This·program·is·free·software;·you·can·redistribute·it·and/or·modify·it

+·*·under·the·terms·and·conditions·of·the·GNU·General·Public·License,
+·*·version·2,·as·published·by·the·Free·Software·Foundation.

 obj-y += vmap.o
 obj-y += vsprintf.o
 obj-y += wait.o
+obj-y += viommu.o
I see very little point to enable viommu by default on all architecture.
This is x86 specific and I am yet sure how we would be able to use it on
ARM as the current series rely on QEMU. Also this is waste space in
struct domain.


XEN_DMOP_create/destroy_viommu hypercalls we introduced are generic for
all platforms and can use in toolstack to create/destroy vIOMMU rather
than just in Qemu. This takes PVH case into account which also don't use
Qemu.


I am afraid that none of the DMOP you suggested in this series will fit
for ARM.

For instance it is not possible to select via DMOP_CREATE the kind of
vIOMMU (e.g SMMUv2, SMMUv3, IPMMU-VMSA...).


Thanks for your information. I am not sure whether we can introduce arch 
specific hypercalls for different vIOMMU implementations and So try to 
make it more general. To support more type vIOMMUs or more vIOMMU 
subfeature, we may extend input parameter structure.




To be clear, I am not asking to get this code ready for ARM, but at
least we need to make sure the API could be easily extended. During the
discussion on the design documented it was suggested to add a
iommu_version field to make it "future proof".


Sure. That's very good suggestion. Sorry, I missed that in this series. 
and thought "capability" field in struct xen_dm_op_create_viommu is 
enough for other vendors to extend more sub features. Will change it.




Also, I was not asking to move this code in arch/x86 but not compiling
the code on ARM by default as it is currently unusable.


Sure. Will change it.



Regards,



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu operations in libxc

2017-04-14 Thread Lan, Tianyu

Hi Paul:
Sorry for later response.

On 3/31/2017 3:57 AM, Chao Gao wrote:

On Wed, Mar 29, 2017 at 09:08:06AM +, Paul Durrant wrote:

-Original Message-
From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of
Chao Gao
Sent: 29 March 2017 01:40
To: Wei Liu 
Cc: Lan Tianyu ; Kevin Tian ;
Ian Jackson ; xen-devel@lists.xen.org
Subject: Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu
operations in libxc

Tianyu is on vacation this two weeks, so I will try to address
some comments on this series.

On Tue, Mar 28, 2017 at 05:24:03PM +0100, Wei Liu wrote:

On Fri, Mar 17, 2017 at 07:27:05PM +0800, Lan Tianyu wrote:

From: Chao Gao 

In previous patch, we introduce a common vIOMMU layer. In our design,
we create/destroy vIOMMU through DMOP interface instead of creating

it

according to a config flag of domain. It makes it is possible
to create vIOMMU in device model or in tool stack.



I've not been following this closely so apologies if this has already been 
asked...

Why would you need to create a vIOMMU instance in an external device model.
Since the toolstack should be in control of the device model configuration why 
would it not know in advance that one was required?


I assume your question is why we don't create a vIOMMU instance via hypercall 
in toolstack.
I think creating in toolstack is also ok and is easier to be reused by pvh.

If Tianyu has no concern about this, will move this part to toolstack.


We can move create/destroy vIOMMU in the tool stack but we still need to 
add such dummy vIOMMU device model in Qemu to pass virtual device's DMA 
request into Xen hypervisor. Qemu is required to use DMOP hypercall and 
tool stack may use domctl hyercall. vIOMMU hypercalls will be divided 
into two part.


Domctl:
create, destroy and query.
DMOP:
vDev's DMA related operations.

Is this OK?



Thanks,
Chao



 Paul


The following toolstack code is to add XEN_DMOP_viommu_XXX syscalls:


Hypercalls, not syscalls.


 - query capabilities of vIOMMU emulated by Xen
 - create vIOMMU in Xen hypervisor with base address, capability
 - destroy vIOMMU specified by viommu_id

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 tools/libs/devicemodel/core.c   | 69

+

 tools/libs/devicemodel/include/xendevicemodel.h | 35 +
 tools/libs/devicemodel/libxendevicemodel.map|  3 ++
 tools/libxc/include/xenctrl_compat.h|  5 ++
 tools/libxc/xc_devicemodel_compat.c | 18 +++
 5 files changed, 130 insertions(+)

diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
index a85cb49..aee1150 100644
--- a/tools/libs/devicemodel/core.c
+++ b/tools/libs/devicemodel/core.c


Bear in mind that this library is stable, so whatever ends up here can
change in the future.

This is not saying the following code is problematic. It is just a
general FYI.

Obviously the toolstack side is going to follow the hypervisor
interface, so I will do a detailed review later.


Sure. If the hypervisor interface settles down, we can inform you.




+int xendevicemodel_viommu_destroy(
+xendevicemodel_handle *dmod, domid_t dom, uint32_t viommu_id);
 #endif /* __XEN_TOOLS__ */

 #endif /* XENDEVICEMODEL_H */
diff --git a/tools/libs/devicemodel/libxendevicemodel.map

b/tools/libs/devicemodel/libxendevicemodel.map

index 45c773e..c2e0968 100644
--- a/tools/libs/devicemodel/libxendevicemodel.map
+++ b/tools/libs/devicemodel/libxendevicemodel.map
@@ -17,6 +17,9 @@ VERS_1.0 {
xendevicemodel_modified_memory;
xendevicemodel_set_mem_type;
xendevicemodel_inject_event;
+   xendevicemodel_viommu_query_cap;
+   xendevicemodel_viommu_create;
+   xendevicemodel_viommu_destroy;
xendevicemodel_restrict;
xendevicemodel_close;


I suppose this series is going to miss 4.9.

Please add these functions to VERS_1.1.


Yes. We will fix this.




local: *; /* Do not expose anything by default */
diff --git a/tools/libxc/include/xenctrl_compat.h

b/tools/libxc/include/xenctrl_compat.h

index 040e7b2..315c45d 100644
--- a/tools/libxc/include/xenctrl_compat.h
+++ b/tools/libxc/include/xenctrl_compat.h
@@ -164,6 +164,11 @@ int xc_hvm_set_mem_type(
 int xc_hvm_inject_trap(
 xc_interface *xch, domid_t domid, int vcpu, uint8_t vector,
 uint8_t type, uint32_t error_code, uint8_t insn_len, uint64_t cr2);
+int xc_viommu_query_cap(xc_interface *xch, domid_t dom, uint64_t

*cap);

+int xc_viommu_create(
+xc_interface *xch, domid_t dom, uint64_t base_addr, uint64_t cap,
+uint32_t *viommu_id);
+int xc_viommu_destroy(xc_interface *xch, domid_t dom, uint32_t

viommu_id);


 #endif /* XC_WANT_COMPAT_DEVICEMODEL_API */

diff --git a/tools/libxc/xc_devicemodel_compat.c

b/tools/libxc/xc_devicemodel_compat.c

index e4edeea..62f703a 100644
--- a/tools/libxc/xc_devicemodel_

Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu operations in libxc

2017-04-17 Thread Lan Tianyu
On 2017年04月17日 19:08, Wei Liu wrote:
> On Fri, Apr 14, 2017 at 11:38:15PM +0800, Lan, Tianyu wrote:
>> Hi Paul:
>>  Sorry for later response.
>>
>> On 3/31/2017 3:57 AM, Chao Gao wrote:
>>> On Wed, Mar 29, 2017 at 09:08:06AM +, Paul Durrant wrote:
>>>>> -Original Message-
>>>>> From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of
>>>>> Chao Gao
>>>>> Sent: 29 March 2017 01:40
>>>>> To: Wei Liu 
>>>>> Cc: Lan Tianyu ; Kevin Tian ;
>>>>> Ian Jackson ; xen-devel@lists.xen.org
>>>>> Subject: Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu
>>>>> operations in libxc
>>>>>
>>>>> Tianyu is on vacation this two weeks, so I will try to address
>>>>> some comments on this series.
>>>>>
>>>>> On Tue, Mar 28, 2017 at 05:24:03PM +0100, Wei Liu wrote:
>>>>>> On Fri, Mar 17, 2017 at 07:27:05PM +0800, Lan Tianyu wrote:
>>>>>>> From: Chao Gao 
>>>>>>>
>>>>>>> In previous patch, we introduce a common vIOMMU layer. In our design,
>>>>>>> we create/destroy vIOMMU through DMOP interface instead of creating
>>>>> it
>>>>>>> according to a config flag of domain. It makes it is possible
>>>>>>> to create vIOMMU in device model or in tool stack.
>>>>>>>
>>>>
>>>> I've not been following this closely so apologies if this has already been 
>>>> asked...
>>>>
>>>> Why would you need to create a vIOMMU instance in an external device model.
>>>> Since the toolstack should be in control of the device model configuration 
>>>> why would it not know in advance that one was required?
>>>
>>> I assume your question is why we don't create a vIOMMU instance via 
>>> hypercall in toolstack.
>>> I think creating in toolstack is also ok and is easier to be reused by pvh.
>>>
>>> If Tianyu has no concern about this, will move this part to toolstack.
>>
>> We can move create/destroy vIOMMU in the tool stack but we still need to add
>> such dummy vIOMMU device model in Qemu to pass virtual device's DMA request
>> into Xen hypervisor. Qemu is required to use DMOP hypercall and tool stack
>> may use domctl hyercall. vIOMMU hypercalls will be divided into two part.
>>
>> Domctl:
>>  create, destroy and query.
>> DMOP:
>>  vDev's DMA related operations.
>>
>> Is this OK?
>>
> 
> Why are they divided into two libraries? Can't they be in DMOP at the
> same time?

Yes, we can use DMOP for all vIOMMU hyercalls if it's necessary to keep
unified vIOMMU hyercall type. In theory, DMOP dedicates to be used by
Qemu but we also can use it in tool stack. If we move create, destroy
and query operation to tool stack, it isn't necessary to use DMOP for
them since only tool stack will call them. This is why I said we could
use domctl for these operations. Both two ways will not affect function
implementation. Which one it's better from your view? :)


> 
> Just asking questions, not suggesting it should be done one way or the
> other.  Sorry if there are some obvious reasons that I missed.
> 
> Wei.
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 2/23] DMOP: Introduce new DMOP commands for vIOMMU support

2017-04-18 Thread Lan Tianyu
Hi Konrad:
Thanks for your review.

On 2017年04月17日 22:36, Konrad Rzeszutek Wilk wrote:
> On Fri, Mar 17, 2017 at 07:27:02PM +0800, Lan Tianyu wrote:
>> This patch is to introduce create, destroy and query capabilities
>> command for vIOMMU. vIOMMU layer will deal with requests and call
>> arch vIOMMU ops.
>>
>> Signed-off-by: Lan Tianyu 
>> ---
>>  xen/arch/x86/hvm/dm.c  | 29 +
>>  xen/include/public/hvm/dm_op.h | 39 +++
>>  2 files changed, 68 insertions(+)
>>
>> diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
>> index 2122c45..2b28f70 100644
>> --- a/xen/arch/x86/hvm/dm.c
>> +++ b/xen/arch/x86/hvm/dm.c
>> @@ -491,6 +491,35 @@ static int dm_op(domid_t domid,
>>  break;
>>  }
>>  
>> +case XEN_DMOP_create_viommu:
>> +{
>> +struct xen_dm_op_create_viommu *data =
>> +&op.u.create_viommu;
>> +
>> +rc = viommu_create(d, data->base_address, data->length, 
>> data->capabilities);
>> +if (rc >= 0) {
> 
> The style guide is is to have a space here and { on a newline.

Yes, will fix.

> 
>> +data->viommu_id = rc;
>> +rc = 0;
>> +}
>> +break;
>> +}
> 
> Newline here..
> 
> 
>> +case XEN_DMOP_destroy_viommu:
>> +{
>> +const struct xen_dm_op_destroy_viommu *data =
>> +&op.u.destroy_viommu;
>> +
>> +rc = viommu_destroy(d, data->viommu_id);
>> +break;
>> +}
> 
> Ahem?
>> +case XEN_DMOP_query_viommu_caps:
>> +{
>> +struct xen_dm_op_query_viommu_caps *data =
>> +&op.u.query_viommu_caps;
>> +
>> +data->caps = viommu_query_caps(d);
>> +rc = 0;
>> +break;
>> +}
> 
> And here.
>>  default:
>>  rc = -EOPNOTSUPP;
>>  break;
>> diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h
>> index f54cece..b8c7359 100644
>> --- a/xen/include/public/hvm/dm_op.h
>> +++ b/xen/include/public/hvm/dm_op.h
>> @@ -318,6 +318,42 @@ struct xen_dm_op_inject_msi {
>>  uint64_aligned_t addr;
>>  };
>>  
>> +/*
>> + * XEN_DMOP_create_viommu: Create vIOMMU device.
>> + */
>> +#define XEN_DMOP_create_viommu 15
>> +
>> +struct xen_dm_op_create_viommu {
>> +/* IN - MMIO base address of vIOMMU */
> 
> Any limit? Can it be zero?

In current patchset, base address is allocated by toolstack and passed
to Qemu to create vIOMMU in hyervisor. Toolstack should make sure the
range won't be conflicted with other resource.

> 
>> +uint64_t base_address;
>> +/* IN - Length of MMIO region */
> 
> Any restrictions? Can it be say 2 bytes? Or is this in page-size granularity?

From the VTD spec, register size must be an integer multiple of 4KB and
I think the vIOMMU device model(E,G vvtd) in hypervisor should check the
lengh. Different vendor may have different restriction.

> 
>> +uint64_t length;
>> +/* IN - Capabilities with which we want to create */
>> +uint64_t capabilities;
> 
> That sounds like some form of flags?

Yes, this patchset just introduces interrupt remapping flag and other
vendor also can use it to add new features.

> 
>> +/* OUT - vIOMMU identity */
>> +uint32_t viommu_id;
>> +};
>> +
>> +/*
>> + * XEN_DMOP_destroy_viommu: Destroy vIOMMU device.
>> + */
>> +#define XEN_DMOP_destroy_viommu 16
>> +
>> +struct xen_dm_op_destroy_viommu {
>> +/* OUT - vIOMMU identity */
> 
> Out? Not in?

Sorry, it should be OUT parameter.

> 
>> +uint32_t viommu_id;
>> +};
>> +
>> +/*
>> + * XEN_DMOP_q_viommu: Query vIOMMU capabilities.
>> + */
>> +#define XEN_DMOP_query_viommu_caps 17
>> +
>> +struct xen_dm_op_query_viommu_caps {
>> +/* OUT - vIOMMU Capabilities*/
> 
> Don't you need to also mention which vIOMMU? As you
> could have potentially many of them?

If we want to support different vendors' vIOMMU, it's necessary to do
that and we need to introduce a new field "vIOMMU type" (E,G Intel, AMD
and ARM IOMMU).


> 
>> +uint64_t caps;
>> +};
>> +
>>  struct xen_dm_op {
>>  uint32_t op;
>>  uint32_t pad;
>> @@ -336,6 +372,9 @@ struct xen_dm_op {
>>  struct xen_dm_op_set_mem_type set_mem_type;
>>  struct xen_dm_op_inject_event inject_event;
>>  struct xen_dm_op_inject_msi inject_msi;
>> +struct xen_dm_op_create_viommu create_viommu;
>> +struct xen_dm_op_destroy_viommu destroy_viommu;
>> +struct xen_dm_op_query_viommu_caps query_viommu_caps;
>>  } u;
>>  };
>>  
>> -- 
>> 1.8.3.1
>>
>>
>> ___
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> https://lists.xen.org/xen-devel


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 3/23] VIOMMU: Add irq request callback to deal with irq remapping

2017-04-18 Thread Lan Tianyu
On 2017年04月17日 22:39, Konrad Rzeszutek Wilk wrote:
> On Fri, Mar 17, 2017 at 07:27:03PM +0800, Lan Tianyu wrote:
>> This patch is to add irq request callback for platform implementation
>> to deal with irq remapping request.
>>
>> Signed-off-by: Lan Tianyu 
>> ---
>>  xen/common/viommu.c  | 11 +++
>>  xen/include/asm-arm/viommu.h |  4 
>>  xen/include/asm-x86/viommu.h | 15 +++
>>  xen/include/xen/viommu.h |  8 
>>  4 files changed, 38 insertions(+)
>>
>> diff --git a/xen/common/viommu.c b/xen/common/viommu.c
>> index 4c1c788..62c66db 100644
>> --- a/xen/common/viommu.c
>> +++ b/xen/common/viommu.c
>> @@ -87,6 +87,17 @@ u64 viommu_query_caps(struct domain *d)
>>  return info->ops->query_caps(d);
>>  }
>>  
>> +int viommu_handle_irq_request(struct domain *d,
>> +struct irq_remapping_request *request)
>> +{
>> +struct viommu_info *info = &d->viommu;
>> +
>> +if ( !info || !info->ops || !info->ops->handle_irq_request)
> 
> You are missing an space at the end.

Yes, will fix.

>> +return -EINVAL;
>> +
>> +return info->ops->handle_irq_request(d, request);
>> +}
>> +
>>  /*
>>   * Local variables:
>>   * mode: C
>> diff --git a/xen/include/asm-arm/viommu.h b/xen/include/asm-arm/viommu.h
>> index ef6a60b..6a81ecb 100644
>> --- a/xen/include/asm-arm/viommu.h
>> +++ b/xen/include/asm-arm/viommu.h
>> @@ -22,6 +22,10 @@
>>  
>>  #include 
>>  
>> +struct irq_remapping_request
>> +{
>> +};
>> +
>>  static inline const struct viommu_ops *viommu_get_ops(void)
>>  {
>>  return NULL;
>> diff --git a/xen/include/asm-x86/viommu.h b/xen/include/asm-x86/viommu.h
>> index efb435f..b6e01a5 100644
>> --- a/xen/include/asm-x86/viommu.h
>> +++ b/xen/include/asm-x86/viommu.h
>> @@ -23,6 +23,21 @@
>>  #include 
>>  #include 
>>  
>> +struct irq_remapping_request
>> +{
>> +u8 type;
>> +u16 source_id;
>> +union {
>> +/* MSI */
>> +struct {
>> +u64 addr;
>> +u32 data;
>> +} msi;
>> +/* Redirection Entry in IOAPIC */
>> +u64 rte;
>> +} msg;
>> +};
> 
> Will this work right? As in with the default padding and such?

Sorry. Could you elaborate this?

>> +
>>  static inline const struct viommu_ops *viommu_get_ops(void)
>>  {
>>  return NULL;
>> diff --git a/xen/include/xen/viommu.h b/xen/include/xen/viommu.h
>> index a0abbdf..246b29d 100644
>> --- a/xen/include/xen/viommu.h
>> +++ b/xen/include/xen/viommu.h
>> @@ -24,6 +24,10 @@
>>  
>>  #define NR_VIOMMU_PER_DOMAIN 1
>>  
>> +/* IRQ request type */
>> +#define VIOMMU_REQUEST_IRQ_MSI  0
>> +#define VIOMMU_REQUEST_IRQ_APIC 1
> 
> What is this used for?

This is to designate interrupt type of irq remapping request which
contains in the structure irq_remapping_request. The vIOMMU device model
uses it to parse request data.

>> +
>>  struct viommu {
>>  u64 base_address;
>>  u64 length;
>> @@ -36,6 +40,8 @@ struct viommu_ops {
>>  u64 (*query_caps)(struct domain *d);
>>  int (*create)(struct domain *d, struct viommu *viommu);
>>  int (*destroy)(struct viommu *viommu);
>> +int (*handle_irq_request)(struct domain *d,
>> +  struct irq_remapping_request *request);
>>  };
>>  
>>  struct viommu_info {
>> @@ -48,6 +54,8 @@ int viommu_init_domain(struct domain *d);
>>  int viommu_create(struct domain *d, u64 base_address, u64 length, u64 caps);
>>  int viommu_destroy(struct domain *d, u32 viommu_id);
>>  u64 viommu_query_caps(struct domain *d);
>> +int viommu_handle_irq_request(struct domain *d,
>> +  struct irq_remapping_request *request);
>>  
>>  #endif /* __XEN_VIOMMU_H__ */
>>  
>> -- 
>> 1.8.3.1
>>
>>
>> ___
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> https://lists.xen.org/xen-devel


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 00/23] xen/vIOMMU: Add vIOMMU support with irq remapping fucntion on Intel platform

2017-04-18 Thread Lan Tianyu
On 2017年04月17日 22:41, Konrad Rzeszutek Wilk wrote:
> On Mon, Mar 20, 2017 at 02:23:02PM +, Roger Pau Monné wrote:
>> On Fri, Mar 17, 2017 at 07:27:00PM +0800, Lan Tianyu wrote:
>>> This patchset is to introduce vIOMMU framework and add virtual VTD's
>>> interrupt remapping support according "Xen virtual IOMMU high level
>>> design doc 
>>> V3"(https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.xenproject.org_archives_html_xen-2Ddevel_&d=DwIGaQ&c=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10&r=wAkdPB9j1dAH7AI494B5wFV3Jws7EfB2Q3Sw-K-88Rk&m=7dZfaODS8zbwpYC0vm7gKQXyM8pBPxfGpz8QMDQzU2k&s=3hxzmHH4X0gz9Oz5_PYoOmWFTkyETYTFPCqJ9iXD910&e=
>>>  
>>> 2016-11/msg01391.html).
> 
> It would be awesome if that was as a patch in docs/misc/

Will do that.

> 
> Thanks.
> 
>>>
>>> - vIOMMU framework
>>> New framework provides viommu_ops and help functions to abstract
>>> vIOMMU operations(E,G create, destroy, handle irq remapping request
>>> and so on). Vendors(Intel, ARM, AMD and son) can implement their
>>> vIOMMU callbacks.
>>>
>>> - Xen vIOMMU device model in Qemu 
>>> It's in charge of create/destroy vIOMMU in hypervisor via new vIOMMU
>>> DMOP hypercalls. It will be required to pass virtual devices DMA
>>> request to hypervisor when enable IOVA(DMA request without PASID)
>>> function.
>>>
>>> - Virtual VTD
>>> In this patchset, we enable irq remapping function and covers both
>>> MSI and IOAPIC interrupts. Don't support post interrupt mode emulation
>>> and post interrupt mode enabled on host with virtual VTD. Will add
>>> later.   
>>>
>>> Chao Gao (19):
>>>   Tools/libxc: Add viommu operations in libxc
>>>   Tools/libacpi: Add DMA remapping reporting (DMAR) ACPI table
>>> structures
>>>   Tools/libacpi: Add new fields in acpi_config to build DMAR table
>>>   Tools/libacpi: Add a user configurable parameter to control vIOMMU
>>> attributes
>>>   Tools/libxl: Inform device model to create a guest with a vIOMMU
>>> device
>>>   x86/hvm: Introduce a emulated VTD for HVM
>>>   X86/vvtd: Add MMIO handler for VVTD
>>>   X86/vvtd: Set Interrupt Remapping Table Pointer through GCMD
>>>   X86/vvtd: Process interrupt remapping request
>>>   X86/vvtd: decode interrupt attribute from IRTE
>>>   X86/vioapic: Hook interrupt delivery of vIOAPIC
>>>   X86/vvtd: Enable Queued Invalidation through GCMD
>>>   X86/vvtd: Enable Interrupt Remapping through GCMD
>>>   x86/vpt: Get interrupt vector through a vioapic interface
>>>   passthrough: move some fields of hvm_gmsi_info to a sub-structure
>>>   Tools/libxc: Add a new interface to bind msi-ir with pirq
>>>   X86/vmsi: Hook guest MSI injection
>>>   X86/vvtd: Handle interrupt translation faults
>>>   X86/vvtd: Add queued invalidation (QI) support
>>>
>>> Lan Tianyu (4):
>>>   VIOMMU: Add vIOMMU helper functions to create, destroy and query
>>> capabilities
>>>   DMOP: Introduce new DMOP commands for vIOMMU support
>>>   VIOMMU: Add irq request callback to deal with irq remapping
>>>   VIOMMU: Add get irq info callback to convert irq remapping request
>>>
>>>  tools/libacpi/acpi2_0.h |   45 +
>>>  tools/libacpi/build.c   |   58 ++
>>>  tools/libacpi/libacpi.h |   12 +
>>>  tools/libs/devicemodel/core.c   |   69 ++
>>>  tools/libs/devicemodel/include/xendevicemodel.h |   35 +
>>>  tools/libs/devicemodel/libxendevicemodel.map|3 +
>>>  tools/libxc/include/xenctrl.h   |   17 +
>>>  tools/libxc/include/xenctrl_compat.h|5 +
>>>  tools/libxc/xc_devicemodel_compat.c |   18 +
>>>  tools/libxc/xc_domain.c |   55 +
>>>  tools/libxl/libxl_create.c  |   12 +-
>>>  tools/libxl/libxl_dm.c  |9 +
>>>  tools/libxl/libxl_dom.c |   85 ++
>>>  tools/libxl/libxl_types.idl |8 +
>>>  tools/xl/xl_parse.c |   54 +
>>>  xen/arch/x86/Makefile   |1 +
>>>  xen/arch/x86/hvm/Makefile   |1 +
>>>  xen/arch/x86/hvm/dm.c   |   29 +
>>>  xen/arch/x86/hvm/irq.c  |   10 +
>>&

Re: [Xen-devel] [RFC PATCH 15/23] X86/vioapic: Hook interrupt delivery of vIOAPIC

2017-04-18 Thread Lan Tianyu
On 2017年04月17日 22:43, Konrad Rzeszutek Wilk wrote:
> On Fri, Mar 17, 2017 at 07:27:15PM +0800, Lan Tianyu wrote:
>> From: Chao Gao 
>>
>> When irq remapping enabled, IOAPIC Redirection Entry maybe is in remapping
>> format. If that, generate a irq_remapping_request and send it to domain.
>>
>> Signed-off-by: Chao Gao 
>> Signed-off-by: Lan Tianyu 
>> ---
>>  xen/arch/x86/Makefile  |  1 +
>>  xen/arch/x86/hvm/vioapic.c | 10 ++
>>  xen/arch/x86/viommu.c  | 30 ++
>>  xen/include/asm-x86/viommu.h   |  3 +++
>>  xen/include/public/arch-x86/hvm/save.h |  1 +
>>  5 files changed, 45 insertions(+)
>>  create mode 100644 xen/arch/x86/viommu.c
>>
>> diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
>> index f75eca0..d49f8c8 100644
>> --- a/xen/arch/x86/Makefile
>> +++ b/xen/arch/x86/Makefile
>> @@ -66,6 +66,7 @@ obj-y += usercopy.o
>>  obj-y += x86_emulate.o
>>  obj-$(CONFIG_TBOOT) += tboot.o
>>  obj-y += hpet.o
>> +obj-y += viommu.o
>>  obj-y += vm_event.o
>>  obj-y += xstate.o
>>  
>> diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
>> index fdbb21f..6a00644 100644
>> --- a/xen/arch/x86/hvm/vioapic.c
>> +++ b/xen/arch/x86/hvm/vioapic.c
>> @@ -30,6 +30,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -285,9 +286,18 @@ static void vioapic_deliver(struct hvm_hw_vioapic 
>> *vioapic, int irq)
>>  struct domain *d = vioapic_domain(vioapic);
>>  struct vlapic *target;
>>  struct vcpu *v;
>> +struct irq_remapping_request request;
>>  
>>  ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
>>  
>> +if ( vioapic->redirtbl[irq].ir.format )
>> +{
>> +irq_request_ioapic_fill(&request, vioapic->id,
>> +vioapic->redirtbl[irq].bits);
>> +viommu_handle_irq_request(d, &request);
>> +return;
>> +}
>> +
>>  HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
>>  "dest=%x dest_mode=%x delivery_mode=%x "
>>  "vector=%x trig_mode=%x",
>> diff --git a/xen/arch/x86/viommu.c b/xen/arch/x86/viommu.c
>> new file mode 100644
>> index 000..ef78d3b
>> --- /dev/null
>> +++ b/xen/arch/x86/viommu.c
>> @@ -0,0 +1,30 @@
>> +/*
>> + * viommu.c
>> + *
>> + * virtualize IOMMU.
>> + *
>> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public
>> + * License along with this program; If not, see 
>> <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include 
>> +
>> +void irq_request_ioapic_fill(struct irq_remapping_request *req,
>> + uint32_t ioapic_id, uint64_t rte)
>> +{
>> +ASSERT(req);
>> +req->type = VIOMMU_REQUEST_IRQ_APIC;
>> +req->source_id = ioapic_id;
>> +req->msg.rte = rte;
> 
> Considering we get 'req' from the stack and it may have garbage, would
> it be good to fill out the rest of the entries with sensible values? Or
> is there no need for that?

Both AMD and Intel will use the function to pass interrupt remapping
request. I am afraid different vendors may have different IOAPIC
remapping format. How about to parse and check remapping request data in
the vendor vIOMMU device module(E,G vvtd)? :)

>> +}
> 
> This being a new file, you should probably include the nice
> editor configuration block.

OK. Will add it.

> 
>> diff --git a/xen/include/asm-x86/viommu.h b/xen/include/asm-x86/viommu.h
>> index 0b25f34..fcf3c24 100644
>> --- a/xen/include/asm-x86/viommu.h
>> +++ b/xen/include/asm-x86/viommu.h
>> @@ -49,6 +49,9 @@ struct irq_remapping_request
>>  } msg;
>>  };
>>  
>> +void irq_request_ioapic_fill(struct irq_remapping_request *req,
>

Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu operations in libxc

2017-04-18 Thread Lan Tianyu
On 2017年04月18日 17:08, Paul Durrant wrote:
>> -Original Message-
>> From: Lan, Tianyu [mailto:tianyu@intel.com]
>> Sent: 14 April 2017 16:38
>> To: Paul Durrant ; Wei Liu ;
>> Kevin Tian ; Ian Jackson ;
>> xen-devel@lists.xen.org
>> Subject: Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu
>> operations in libxc
>>
>> Hi Paul:
>>  Sorry for later response.
>>
>> On 3/31/2017 3:57 AM, Chao Gao wrote:
>>> On Wed, Mar 29, 2017 at 09:08:06AM +, Paul Durrant wrote:
>>>>> -Original Message-
>>>>> From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf
>> Of
>>>>> Chao Gao
>>>>> Sent: 29 March 2017 01:40
>>>>> To: Wei Liu 
>>>>> Cc: Lan Tianyu ; Kevin Tian
>> ;
>>>>> Ian Jackson ; xen-devel@lists.xen.org
>>>>> Subject: Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu
>>>>> operations in libxc
>>>>>
>>>>> Tianyu is on vacation this two weeks, so I will try to address
>>>>> some comments on this series.
>>>>>
>>>>> On Tue, Mar 28, 2017 at 05:24:03PM +0100, Wei Liu wrote:
>>>>>> On Fri, Mar 17, 2017 at 07:27:05PM +0800, Lan Tianyu wrote:
>>>>>>> From: Chao Gao 
>>>>>>>
>>>>>>> In previous patch, we introduce a common vIOMMU layer. In our
>> design,
>>>>>>> we create/destroy vIOMMU through DMOP interface instead of
>> creating
>>>>> it
>>>>>>> according to a config flag of domain. It makes it is possible
>>>>>>> to create vIOMMU in device model or in tool stack.
>>>>>>>
>>>>
>>>> I've not been following this closely so apologies if this has already been
>> asked...
>>>>
>>>> Why would you need to create a vIOMMU instance in an external device
>> model.
>>>> Since the toolstack should be in control of the device model configuration
>> why would it not know in advance that one was required?
>>>
>>> I assume your question is why we don't create a vIOMMU instance via
>> hypercall in toolstack.
>>> I think creating in toolstack is also ok and is easier to be reused by pvh.
>>>
>>> If Tianyu has no concern about this, will move this part to toolstack.
>>
>> We can move create/destroy vIOMMU in the tool stack but we still need to
>> add such dummy vIOMMU device model in Qemu to pass virtual device's
>> DMA
>> request into Xen hypervisor.
> 
> Not quite sure I understand this. The QEMu device model does not 'pass DMA 
> requests' as such, it maps guest RAM and reads or writes to emulate DMA, 
> right? So, what's needed is a mechanism to map guest RAM by 'bus address'... 
> i.e. an address that will need to be translated through the vIOMMU mappings. 
> This is just an evolution of the current 'priv mapping' operations that allow 
> guest RAM to be mapped by guest physical address. So you don't need a vIOMMU 
> 'device model' as such, do you?


Guest also may enable DMA protection mechanism in linux kernel which
limits address space of emulated device and this depends on the vIOMMU's
DMA translation function. In vIOMMU's MMIO emulation part is in the Xen
hypersior and the guest shadow IO page table will be only in the
hypervisor. To translate emulated device's DMA request. It's necessary
to pass the DMA request to hypervisor.

So far we don't support DMA translation and so doesn't pass DMA request.

Map/umap guest memory already support in Qemu and just like emulated
device model access guest memory. Qemu also provides vIOMMU hook to
receive DMA request and return target guest address. vIOMMU framework
will read/write target address. What we need to do is to translate DMA
request to target address according shadow IO page table in the hypervisor.



> 
>> Qemu is required to use DMOP hypercall and
>> tool stack may use domctl hyercall. vIOMMU hypercalls will be divided
>> into two part.
>>
>> Domctl:
>>  create, destroy and query.
>> DMOP:
>>  vDev's DMA related operations.
> 
> Yes, the mapping/unmapping operations should be DMOPs and IMO should be 
> designed such that they can be unified with replacements for current 'priv 
> map' ops such that QEMU can use the same function call, but with different 
> address space identifiers (i.e. bus address, guest physical address, etc.). 
> BTW, I say 'etc.' becau

Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu operations in libxc

2017-04-19 Thread Lan Tianyu
On 2017年04月18日 22:15, Paul Durrant wrote:
>> -Original Message-
> [snip]
>>> > >
>>> > > Not quite sure I understand this. The QEMu device model does not 'pass
>> > DMA requests' as such, it maps guest RAM and reads or writes to emulate
>> > DMA, right? So, what's needed is a mechanism to map guest RAM by 'bus
>> > address'... i.e. an address that will need to be translated through the
>> > vIOMMU mappings. This is just an evolution of the current 'priv mapping'
>> > operations that allow guest RAM to be mapped by guest physical address. So
>> > you don't need a vIOMMU 'device model' as such, do you?
>> > 
>> > 
>> > Guest also may enable DMA protection mechanism in linux kernel which
>> > limits address space of emulated device and this depends on the vIOMMU's
>> > DMA translation function. In vIOMMU's MMIO emulation part is in the Xen
>> > hypersior and the guest shadow IO page table will be only in the
>> > hypervisor. To translate emulated device's DMA request. It's necessary
>> > to pass the DMA request to hypervisor.
>> > 
> What do you mean by DMA request though? Are you intending to make some form 
> of hypercall to read or write guest memory? If so then why not introduce a 
> call to map the guest memory (via bus address) and read or write directly.

Such "DMA request" in Qemu vIOMMU framework just contains IOVA(IO
virtual address) and write/read flag. vIOMMU device model just
translates IOVA to GPA and then return back to vIOMMU core which will be
in charge of memory access. So hyercall we want to introduce is to
translate IOVA to GPA.

The data to write and target address to store read data aren't passed to
vIOMMU device model and we can't perform read/write directly there.

>> > So far we don't support DMA translation and so doesn't pass DMA request.
>> > 
> Indeed. We map guest memory using guest physical address because, without an 
> emulated IOMMU, guest physical address === bus address. This is why I suggest 
> a new mapping operation rather than 'passing a DMA request' to the hypervisor.
>
>> > Map/umap guest memory already support in Qemu and just like emulated
>> > device model access guest memory. Qemu also provides vIOMMU hook to
>> > receive DMA request and return target guest address. vIOMMU framework
>> > will read/write target address.
> That's the part I don't get... why have the vIOMMU code do the reads and 
> writes? Why not have it provide a mapping function and then have the device 
> model in QEMU read and write directly as it does now?
> 

Actually it's common interface in Qemu to read/write guest memory. The
code will check whether there is a vIOMMU translation callback or not
before performing read/write. If yes, call the callback and vIOMMU
device model translate IOVA to GPA and then do read/write operation.


>> > What we need to do is to translate DMA
>> > request to target address according shadow IO page table in the hypervisor.
>> > 
> Yes, so the mapping has to be done by the hypervisor (as is the case for priv 
> mapping or grant mapping) but the memory accesses themselves can be done 
> directly by the device model in QEMU.

Yes.

> 
>> > 
>> > 
>>> > >
 > >> Qemu is required to use DMOP hypercall and
 > >> tool stack may use domctl hyercall. vIOMMU hypercalls will be divided
 > >> into two part.
 > >>
 > >> Domctl:
 > >>   create, destroy and query.
 > >> DMOP:
 > >>   vDev's DMA related operations.
>>> > >
>>> > > Yes, the mapping/unmapping operations should be DMOPs and IMO
>> > should be designed such that they can be unified with replacements for
>> > current 'priv map' ops such that QEMU can use the same function call, but
>> > with different address space identifiers (i.e. bus address, guest physical
>> > address, etc.). BTW, I say 'etc.' because we should also consider mapping 
>> > the
>> > ioreq pages from Xen using the same call - with a dedicated address space
>> > identifier - as well.
>>> > >
>> > 
>> > So you agree to divide vIOMMU's hypercalls into two parts(DMOP and
>> > Domctl), right?
>> > 
> Yes, I agree with the logic of the split.
> 
>   Cheers,
> 
>Paul
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu operations in libxc

2017-05-11 Thread Lan Tianyu
On 2017年05月11日 20:35, Wei Liu wrote:
> On Mon, Apr 17, 2017 at 08:01:56PM +0800, Lan Tianyu wrote:
>> On 2017年04月17日 19:08, Wei Liu wrote:
>>> On Fri, Apr 14, 2017 at 11:38:15PM +0800, Lan, Tianyu wrote:
>>>> Hi Paul:
>>>>Sorry for later response.
>>>>
>>>> On 3/31/2017 3:57 AM, Chao Gao wrote:
>>>>> On Wed, Mar 29, 2017 at 09:08:06AM +, Paul Durrant wrote:
>>>>>>> -Original Message-
>>>>>>> From: Xen-devel [mailto:xen-devel-boun...@lists.xen.org] On Behalf Of
>>>>>>> Chao Gao
>>>>>>> Sent: 29 March 2017 01:40
>>>>>>> To: Wei Liu 
>>>>>>> Cc: Lan Tianyu ; Kevin Tian 
>>>>>>> ;
>>>>>>> Ian Jackson ; xen-devel@lists.xen.org
>>>>>>> Subject: Re: [Xen-devel] [RFC PATCH 5/23] Tools/libxc: Add viommu
>>>>>>> operations in libxc
>>>>>>>
>>>>>>> Tianyu is on vacation this two weeks, so I will try to address
>>>>>>> some comments on this series.
>>>>>>>
>>>>>>> On Tue, Mar 28, 2017 at 05:24:03PM +0100, Wei Liu wrote:
>>>>>>>> On Fri, Mar 17, 2017 at 07:27:05PM +0800, Lan Tianyu wrote:
>>>>>>>>> From: Chao Gao 
>>>>>>>>>
>>>>>>>>> In previous patch, we introduce a common vIOMMU layer. In our design,
>>>>>>>>> we create/destroy vIOMMU through DMOP interface instead of creating
>>>>>>> it
>>>>>>>>> according to a config flag of domain. It makes it is possible
>>>>>>>>> to create vIOMMU in device model or in tool stack.
>>>>>>>>>
>>>>>>
>>>>>> I've not been following this closely so apologies if this has already 
>>>>>> been asked...
>>>>>>
>>>>>> Why would you need to create a vIOMMU instance in an external device 
>>>>>> model.
>>>>>> Since the toolstack should be in control of the device model 
>>>>>> configuration why would it not know in advance that one was required?
>>>>>
>>>>> I assume your question is why we don't create a vIOMMU instance via 
>>>>> hypercall in toolstack.
>>>>> I think creating in toolstack is also ok and is easier to be reused by 
>>>>> pvh.
>>>>>
>>>>> If Tianyu has no concern about this, will move this part to toolstack.
>>>>
>>>> We can move create/destroy vIOMMU in the tool stack but we still need to 
>>>> add
>>>> such dummy vIOMMU device model in Qemu to pass virtual device's DMA request
>>>> into Xen hypervisor. Qemu is required to use DMOP hypercall and tool stack
>>>> may use domctl hyercall. vIOMMU hypercalls will be divided into two part.
>>>>
>>>> Domctl:
>>>>create, destroy and query.
>>>> DMOP:
>>>>vDev's DMA related operations.
>>>>
>>>> Is this OK?
>>>>
>>>
>>> Why are they divided into two libraries? Can't they be in DMOP at the
>>> same time?
>>
>> Yes, we can use DMOP for all vIOMMU hyercalls if it's necessary to keep
>> unified vIOMMU hyercall type. In theory, DMOP dedicates to be used by
>> Qemu but we also can use it in tool stack. If we move create, destroy
>> and query operation to tool stack, it isn't necessary to use DMOP for
>> them since only tool stack will call them. This is why I said we could
>> use domctl for these operations. Both two ways will not affect function
>> implementation. Which one it's better from your view? :)
>>
> 
> 
> After reading the subthread I think I agree with Paul. I.e. please
> separate them.
> 

Sure. Will update.

-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH V3 1/3] Xen: Increase hap/shadow page pool size to support more vcpus support

2017-09-18 Thread Lan Tianyu
Hi Wei:

On 2017年09月18日 21:06, Wei Liu wrote:
> On Wed, Sep 13, 2017 at 12:52:47AM -0400, Lan Tianyu wrote:
>> This patch is to increase page pool size when max vcpu number is larger
>> than 128.
>>
>> Signed-off-by: Lan Tianyu 
>> ---
>>  xen/arch/arm/domain.c|  5 +
>>  xen/arch/x86/domain.c| 25 +
>>  xen/common/domctl.c  |  3 +++
>>  xen/include/xen/domain.h |  2 ++
>>  4 files changed, 35 insertions(+)
>>
>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>> index 6512f01..94cf70b 100644
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -824,6 +824,11 @@ int arch_vcpu_reset(struct vcpu *v)
>>  return 0;
>>  }
>>  
>> +int arch_domain_set_max_vcpus(struct domain *d)
>> +{
>> +return 0;
>> +}
>> +
>>  static int relinquish_memory(struct domain *d, struct page_list_head *list)
>>  {
>>  struct page_info *page, *tmp;
>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>> index dbddc53..0e230f9 100644
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -1161,6 +1161,31 @@ int arch_vcpu_reset(struct vcpu *v)
>>  return 0;
>>  }
>>  
>> +int arch_domain_set_max_vcpus(struct domain *d)
> 
> The name doesn't match what the function does.
> 

I originally hoped to introduce a hook for each arch when set max vcpus.
Each arch function can do customized thing and so named
"arch_domain_set_max_vcpus".

How about "arch_domain_setup_vcpus_resource"?


>> +{
>> +int ret;
>> +
>> +/* Increase page pool in order to support more vcpus. */
>> +if ( d->max_vcpus > 128 )
>> +{
>> +unsigned long nr_pages;
>> +
>> +if (hap_enabled(d))
> 
> Coding style.

Will update. Thanks.

> 
>> +nr_pages = 1024;
>> +else
>> +nr_pages = 4096;
>> +
>> +ret = paging_set_allocation(d, nr_pages, NULL);
> 
> Does this work on PV guests?


Sorry. This code should not run for PV guest. Will add a domain type
check here.

> 
>> +if ( ret != 0 )
>> +{
>> +paging_set_allocation(d, 0, NULL);
>> +return ret;
>> +}
>> +}
>> +
>> +return 0;
>> +}
>> +
>>  long
>>  arch_do_vcpu_op(
>>  int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
>> index 42658e5..64357a3 100644
>> --- a/xen/common/domctl.c
>> +++ b/xen/common/domctl.c
>> @@ -631,6 +631,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
>> u_domctl)
>>  d->max_vcpus = max;
>>  }
>>  
>> +if ( arch_domain_set_max_vcpus(d) < 0)
> 
> != 0 please.
> 

Sure.

>> +goto maxvcpu_out;
>> +
>>  for ( i = 0; i < max; i++ )
>>  {
>>  if ( d->vcpu[i] != NULL )
>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
>> index 347f264..e1ece3a 100644
>> --- a/xen/include/xen/domain.h
>> +++ b/xen/include/xen/domain.h
>> @@ -81,6 +81,8 @@ void arch_dump_domain_info(struct domain *d);
>>  
>>  int arch_vcpu_reset(struct vcpu *);
>>  
>> +int arch_domain_set_max_vcpus(struct domain *d);
>> +
>>  extern spinlock_t vcpu_alloc_lock;
>>  bool_t domctl_lock_acquire(void);
>>  void domctl_lock_release(void);
>> -- 
>> 1.8.3.1
>>


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC PATCH V3 1/3] Xen: Increase hap/shadow page pool size to support more vcpus support

2017-09-21 Thread Lan Tianyu
On 2017年09月20日 23:13, Wei Liu wrote:
> On Tue, Sep 19, 2017 at 11:06:26AM +0800, Lan Tianyu wrote:
>> Hi Wei:
>>
>> On 2017年09月18日 21:06, Wei Liu wrote:
>>> On Wed, Sep 13, 2017 at 12:52:47AM -0400, Lan Tianyu wrote:
>>>> This patch is to increase page pool size when max vcpu number is larger
>>>> than 128.
>>>>
>>>> Signed-off-by: Lan Tianyu 
>>>> ---
>>>>  xen/arch/arm/domain.c|  5 +
>>>>  xen/arch/x86/domain.c| 25 +
>>>>  xen/common/domctl.c  |  3 +++
>>>>  xen/include/xen/domain.h |  2 ++
>>>>  4 files changed, 35 insertions(+)
>>>>
>>>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>>>> index 6512f01..94cf70b 100644
>>>> --- a/xen/arch/arm/domain.c
>>>> +++ b/xen/arch/arm/domain.c
>>>> @@ -824,6 +824,11 @@ int arch_vcpu_reset(struct vcpu *v)
>>>>  return 0;
>>>>  }
>>>>  
>>>> +int arch_domain_set_max_vcpus(struct domain *d)
>>>> +{
>>>> +return 0;
>>>> +}
>>>> +
>>>>  static int relinquish_memory(struct domain *d, struct page_list_head 
>>>> *list)
>>>>  {
>>>>  struct page_info *page, *tmp;
>>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>>> index dbddc53..0e230f9 100644
>>>> --- a/xen/arch/x86/domain.c
>>>> +++ b/xen/arch/x86/domain.c
>>>> @@ -1161,6 +1161,31 @@ int arch_vcpu_reset(struct vcpu *v)
>>>>  return 0;
>>>>  }
>>>>  
>>>> +int arch_domain_set_max_vcpus(struct domain *d)
>>>
>>> The name doesn't match what the function does.
>>>
>>
>> I originally hoped to introduce a hook for each arch when set max vcpus.
>> Each arch function can do customized thing and so named
>> "arch_domain_set_max_vcpus".
>>
>> How about "arch_domain_setup_vcpus_resource"?
> 
> Before you go away and do a lot of work, please let us think about if
> this is the right approach first.

Sure. This idea that increase page pool when set max vcpu is from Jan.
Jan, Could you help to check whether current patch is right approach?
Thanks.

> 
> We are close to freeze, with the amount of patches we receive everyday
> RFC patch like this one is low on my (can't speak for others) priority
> list. I am not sure when I will be able to get back to this, but do ping
> us if you want to know where things stand.
> 


-- 
Best regards
Tianyu Lan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 1/29] Xen/doc: Add Xen virtual IOMMU doc

2017-09-22 Thread Lan Tianyu
This patch is to add Xen virtual IOMMU doc to introduce motivation,
framework, vIOMMU hypercall and xl configuration.

Signed-off-by: Lan Tianyu 
---
 docs/misc/viommu.txt | 136 +++
 1 file changed, 136 insertions(+)
 create mode 100644 docs/misc/viommu.txt

diff --git a/docs/misc/viommu.txt b/docs/misc/viommu.txt
new file mode 100644
index 000..348e8c4
--- /dev/null
+++ b/docs/misc/viommu.txt
@@ -0,0 +1,136 @@
+Xen virtual IOMMU
+
+Motivation
+==
+Enable more than 128 vcpu support
+
+The current requirements of HPC cloud service requires VM with a high
+number of CPUs in order to achieve high performance in parallel
+computing.
+
+To support >128 vcpus, X2APIC mode in guest is necessary because legacy
+APIC(XAPIC) just supports 8-bit APIC ID. The APIC ID used by Xen is
+CPU ID * 2 (ie: CPU 127 has APIC ID 254, which is the last one available
+in xAPIC mode) and so it only can support 128 vcpus at most. x2APIC mode
+supports 32-bit APIC ID and it requires the interrupt remapping functionality
+of a vIOMMU if the guest wishes to route interrupts to all available vCPUs
+
+The reason for this is that there is no modification for existing PCI MSI
+and IOAPIC when introduce X2APIC. PCI MSI/IOAPIC can only send interrupt
+message containing 8-bit APIC ID, which cannot address cpus with >254
+APIC ID. Interrupt remapping supports 32-bit APIC ID and so it's necessary
+for >128 vcpus support.
+
+
+vIOMMU Architecture
+===
+vIOMMU device model is inside Xen hypervisor for following factors
+1) Avoid round trips between Qemu and Xen hypervisor
+2) Ease of integration with the rest of hypervisor
+3) HVMlite/PVH doesn't use Qemu
+
+* Interrupt remapping overview.
+Interrupts from virtual devices and physical devices are delivered
+to vLAPIC from vIOAPIC and vMSI. vIOMMU needs to remap interrupt during
+this procedure.
+
++---+
+|Qemu   |VM |
+|   | ++|
+|   | |  Device driver ||
+|   | ++---+|
+|   |  ^|
+|   ++  | ++---+|
+|   | Virtual device |  | |  IRQ subsystem ||
+|   +---++  | ++---+|
+|   |   |  ^|
+|   |   |  ||
++---+---+
+|hypervisor |  | VIRQ   |
+|   |+-++   |
+|   ||  vLAPIC  |   |
+|   |VIRQ+-++   |
+|   |  ^|
+|   |  ||
+|   |+-++   |
+|   ||  vIOMMU  |   |
+|   |+-++   |
+|   |  ^|
+|   |  ||
+|   |+-++   |
+|   ||   vIOAPIC/vMSI   |   |
+|   |++++   |
+|   | ^^|
+|   +-+||
+|  ||
++---+
+HW |IRQ
++---+
+|   PCI Device  |
++---+
+
+
+vIOMMU hypercall
+
+Introduce a new domctl hypercall "xen_domctl_viommu_op" to create/destroy
+vIOMMUs.
+
+* vIOMMU hypercall parameter structure
+
+/* vIOMMU type - specify vendor vIOMMU device model */
+#define VIOMMU_TYPE_INTEL_VTD 0
+
+/* vIOMMU capabilities */
+#define VIOMMU_CAP_IRQ_REMAPPING  (1u << 0)
+
+struct xen_domctl_viommu_op {
+uint32_t cmd;
+#define XEN_DOMCTL_create_viommu  0
+#define XEN_DOMCTL_destroy_viommu 1
+union {
+struct {
+/* IN - vIOMMU type  */
+uint64_t viommu_type;
+/* IN - MMIO base address of vIOMMU. */
+uint64_t base_address;
+/* IN - Capabilities with which we want to create */
+uint64_t capabilities;
+/* OUT - vIOMMU identity */
+uint32_t viommu_id;
+} create_viommu;
+
+struct {
+/* IN - vIOMMU identity */
+uint32_t viommu_id;
+} destroy_viommu;
+} u;
+};
+
+- XEN_DOMCTL_create_viommu
+Create vIOMMU device with vIOMMU_type, capabilities and MMIO base
+address. Hypervisor allocates viommu_id for new vIOMMU instance 

[Xen-devel] [PATCH V3 3/29] DOMCTL: Introduce new DOMCTL commands for vIOMMU support

2017-09-22 Thread Lan Tianyu
This patch is to introduce create, destroy and query capabilities
command for vIOMMU. vIOMMU layer will deal with requests and call
arch vIOMMU ops.

Signed-off-by: Lan Tianyu 
---
 xen/common/domctl.c |  6 ++
 xen/common/viommu.c | 30 ++
 xen/include/public/domctl.h | 42 ++
 xen/include/xen/viommu.h|  2 ++
 4 files changed, 80 insertions(+)

diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 42658e5..7e28237 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -1149,6 +1149,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
 copyback = 1;
 break;
 
+#ifdef CONFIG_VIOMMU
+case XEN_DOMCTL_viommu_op:
+ret = viommu_domctl(d, &op->u.viommu_op, ©back);
+break;
+#endif
+
 default:
 ret = arch_do_domctl(op, d, u_domctl);
 break;
diff --git a/xen/common/viommu.c b/xen/common/viommu.c
index 64d91e6..55feb5d 100644
--- a/xen/common/viommu.c
+++ b/xen/common/viommu.c
@@ -133,6 +133,36 @@ static int viommu_create(struct domain *d, uint64_t type,
 return 0;
 }
 
+int viommu_domctl(struct domain *d, struct xen_domctl_viommu_op *op,
+  bool *need_copy)
+{
+int rc = -EINVAL;
+
+if ( !viommu_enabled() )
+return -ENODEV;
+
+switch ( op->cmd )
+{
+case XEN_DOMCTL_create_viommu:
+rc = viommu_create(d, op->u.create.viommu_type,
+   op->u.create.base_address,
+   op->u.create.capabilities,
+   &op->u.create.viommu_id);
+if ( !rc )
+*need_copy = true;
+break;
+
+case XEN_DOMCTL_destroy_viommu:
+rc = viommu_destroy_domain(d);
+break;
+
+default:
+return -ENOSYS;
+}
+
+return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 50ff58f..68854b6 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1163,6 +1163,46 @@ struct xen_domctl_psr_cat_op {
 typedef struct xen_domctl_psr_cat_op xen_domctl_psr_cat_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cat_op_t);
 
+/*  vIOMMU helper
+ *
+ *  vIOMMU interface can be used to create/destroy vIOMMU and
+ *  query vIOMMU capabilities.
+ */
+
+/* vIOMMU type - specify vendor vIOMMU device model */
+#define VIOMMU_TYPE_INTEL_VTD   0
+
+/* vIOMMU capabilities */
+#define VIOMMU_CAP_IRQ_REMAPPING  (1u << 0)
+
+struct xen_domctl_viommu_op {
+uint32_t cmd;
+#define XEN_DOMCTL_create_viommu  0
+#define XEN_DOMCTL_destroy_viommu 1
+union {
+struct {
+/* IN - vIOMMU type */
+uint64_t viommu_type;
+/* 
+ * IN - MMIO base address of vIOMMU. vIOMMU device models
+ * are in charge of to check base_address.
+ */
+uint64_t base_address;
+/* IN - Capabilities with which we want to create */
+uint64_t capabilities;
+/* OUT - vIOMMU identity */
+uint32_t viommu_id;
+} create;
+
+struct {
+/* IN - vIOMMU identity */
+uint32_t viommu_id;
+} destroy;
+} u;
+};
+typedef struct xen_domctl_viommu_op xen_domctl_viommu_op;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_viommu_op);
+
 struct xen_domctl {
 uint32_t cmd;
 #define XEN_DOMCTL_createdomain   1
@@ -1240,6 +1280,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_monitor_op77
 #define XEN_DOMCTL_psr_cat_op78
 #define XEN_DOMCTL_soft_reset79
+#define XEN_DOMCTL_viommu_op 80
 #define XEN_DOMCTL_gdbsx_guestmemio1000
 #define XEN_DOMCTL_gdbsx_pausevcpu 1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu   1002
@@ -1302,6 +1343,7 @@ struct xen_domctl {
 struct xen_domctl_psr_cmt_oppsr_cmt_op;
 struct xen_domctl_monitor_opmonitor_op;
 struct xen_domctl_psr_cat_oppsr_cat_op;
+struct xen_domctl_viommu_op viommu_op;
 uint8_t pad[128];
 } u;
 };
diff --git a/xen/include/xen/viommu.h b/xen/include/xen/viommu.h
index 636a2a3..baa8ab7 100644
--- a/xen/include/xen/viommu.h
+++ b/xen/include/xen/viommu.h
@@ -43,6 +43,8 @@ static inline bool viommu_enabled(void)
 
 int viommu_register_type(uint64_t type, struct viommu_ops *ops);
 int viommu_destroy_domain(struct domain *d);
+int viommu_domctl(struct domain *d, struct xen_domctl_viommu_op *op,
+  bool_t *need_copy);
 #else
 static inline int viommu_register_type(uint64_t type, struct viommu_ops *ops)
 {
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 2/29] VIOMMU: Add vIOMMU helper functions to create, destroy vIOMMU instance

2017-09-22 Thread Lan Tianyu
This patch is to introduce an abstract layer for arch vIOMMU implementation
to deal with requests from dom0. Arch vIOMMU code needs to provide callback
to do create and destroy operation.

Signed-off-by: Lan Tianyu 
---
 docs/misc/xen-command-line.markdown |   7 ++
 xen/arch/x86/Kconfig|   1 +
 xen/common/Kconfig  |   3 +
 xen/common/Makefile |   1 +
 xen/common/domain.c |   4 +
 xen/common/viommu.c | 144 
 xen/include/xen/sched.h |   8 ++
 xen/include/xen/viommu.h|  63 
 8 files changed, 231 insertions(+)
 create mode 100644 xen/common/viommu.c
 create mode 100644 xen/include/xen/viommu.h

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 9797c8d..dfd1db5 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1825,3 +1825,10 @@ mode.
 > Default: `true`
 
 Permit use of the `xsave/xrstor` instructions.
+
+### viommu
+> `= `
+
+> Default: `false`
+
+Permit use of viommu interface to create and destroy viommu device model.
diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 30c2769..1f1de96 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -23,6 +23,7 @@ config X86
select HAS_PDX
select NUMA
select VGA
+   select VIOMMU
 
 config ARCH_DEFCONFIG
string
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index dc8e876..2ad2c8d 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -49,6 +49,9 @@ config HAS_CHECKPOLICY
string
option env="XEN_HAS_CHECKPOLICY"
 
+config VIOMMU
+   bool
+
 config KEXEC
bool "kexec support"
default y
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 39e2614..da32f71 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -56,6 +56,7 @@ obj-y += time.o
 obj-y += timer.o
 obj-y += trace.o
 obj-y += version.o
+obj-$(CONFIG_VIOMMU) += viommu.o
 obj-y += virtual_region.o
 obj-y += vm_event.o
 obj-y += vmap.o
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5aebcf2..cdb1c9d 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -814,6 +814,10 @@ static void complete_domain_destroy(struct rcu_head *head)
 
 sched_destroy_domain(d);
 
+#ifdef CONFIG_VIOMMU
+viommu_destroy_domain(d);
+#endif
+
 /* Free page used by xen oprofile buffer. */
 #ifdef CONFIG_XENOPROF
 free_xenoprof_pages(d);
diff --git a/xen/common/viommu.c b/xen/common/viommu.c
new file mode 100644
index 000..64d91e6
--- /dev/null
+++ b/xen/common/viommu.c
@@ -0,0 +1,144 @@
+/*
+ * common/viommu.c
+ *
+ * Copyright (c) 2017 Intel Corporation
+ * Author: Lan Tianyu 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+bool __read_mostly opt_viommu;
+boolean_param("viommu", opt_viommu);
+
+static DEFINE_SPINLOCK(type_list_lock);
+static LIST_HEAD(type_list);
+
+struct viommu_type {
+uint64_t type;
+struct viommu_ops *ops;
+struct list_head node;
+};
+
+int viommu_destroy_domain(struct domain *d)
+{
+int ret;
+
+if ( !d->viommu )
+return -EINVAL;
+
+ret = d->viommu->ops->destroy(d->viommu);
+if ( ret < 0 )
+return ret;
+
+xfree(d->viommu);
+d->viommu = NULL;
+return 0;
+}
+
+static struct viommu_type *viommu_get_type(uint64_t type)
+{
+struct viommu_type *viommu_type = NULL;
+
+spin_lock(&type_list_lock);
+list_for_each_entry( viommu_type, &type_list, node )
+{
+if ( viommu_type->type == type )
+{
+spin_unlock(&type_list_lock);
+return viommu_type;
+}
+}
+spin_unlock(&type_list_lock);
+
+return NULL;
+}
+
+int viommu_register_type(uint64_t type, struct viommu_ops *ops)
+{
+struct viommu_type *viommu_type = NULL;
+
+if ( !viommu_enabled() )
+return -ENODEV;
+
+if ( viommu_get_type(type) )
+return -EEXIST;
+
+viommu_type = xzalloc(struct viommu_type);
+if ( !viommu_type )
+return -ENOMEM;
+
+viommu_type->type = type;
+viommu_type->ops = ops;
+
+spin_lock(&type_list_lock);
+list_add_tail(&viommu_type->node, &type_list);
+

[Xen-devel] [PATCH V3 7/29] tools/libxl: build DMAR table for a guest with one virtual VTD

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

A new logic is added to build ACPI DMAR table in tool stack for a guest
with one virtual VTD and pass through it to guest via existing mechanism. If
there already are ACPI tables needed to pass through, we joint the tables.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 

---
v3:
 - build dmar and initialize related acpi_modules struct in
 libxl_x86_acpi.c, keeping in accordance with pvh.

---
 tools/libxl/libxl_x86.c  |  3 +-
 tools/libxl/libxl_x86_acpi.c | 98 ++--
 2 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 455f6f0..23c9a55 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -381,8 +381,7 @@ int libxl__arch_domain_finalise_hw_description(libxl__gc 
*gc,
 {
 int rc = 0;
 
-if ((info->type == LIBXL_DOMAIN_TYPE_HVM) &&
-(info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_NONE)) {
+if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
 rc = libxl__dom_load_acpi(gc, info, dom);
 if (rc != 0)
 LOGE(ERROR, "libxl_dom_load_acpi failed");
diff --git a/tools/libxl/libxl_x86_acpi.c b/tools/libxl/libxl_x86_acpi.c
index 1761756..adf02f4 100644
--- a/tools/libxl/libxl_x86_acpi.c
+++ b/tools/libxl/libxl_x86_acpi.c
@@ -16,6 +16,7 @@
 #include "libxl_arch.h"
 #include 
 #include 
+#include "libacpi/acpi2_0.h"
 #include "libacpi/libacpi.h"
 
 #include 
@@ -161,9 +162,9 @@ out:
 return rc;
 }
 
-int libxl__dom_load_acpi(libxl__gc *gc,
- const libxl_domain_build_info *b_info,
- struct xc_dom_image *dom)
+static int libxl__dom_load_acpi_pvh(libxl__gc *gc,
+const libxl_domain_build_info *b_info,
+struct xc_dom_image *dom)
 {
 struct acpi_config config = {0};
 struct libxl_acpi_ctxt libxl_ctxt;
@@ -236,6 +237,97 @@ out:
 return rc;
 }
 
+static void *acpi_memalign(struct acpi_ctxt *ctxt, uint32_t size,
+   uint32_t align)
+{
+int ret;
+void *ptr;
+
+ret = posix_memalign(&ptr, align, size);
+if (ret != 0 || !ptr)
+return NULL;
+
+return ptr;
+}
+
+/*
+ * For hvm, we don't need build acpi in libxl. Instead, it's built in 
hvmloader.
+ * But if one hvm has virtual VTD(s), we build DMAR table for it and joint this
+ * table with existing content in acpi_modules in order to employ HVM
+ * firmware pass-through mechanism to pass-through DMAR table.
+ */
+static int libxl__dom_load_acpi_hvm(libxl__gc *gc,
+const libxl_domain_build_info *b_info,
+struct xc_dom_image *dom)
+{
+struct acpi_config config = { 0 };
+struct acpi_ctxt ctxt;
+void *table;
+uint32_t len;
+
+if ((b_info->type != LIBXL_DOMAIN_TYPE_HVM) ||
+(b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_NONE) ||
+(b_info->num_viommus != 1) ||
+(b_info->viommu[0].type != LIBXL_VIOMMU_TYPE_INTEL_VTD))
+return 0;
+
+ctxt.mem_ops.alloc = acpi_memalign;
+ctxt.mem_ops.v2p = virt_to_phys;
+ctxt.mem_ops.free = acpi_mem_free;
+
+if (libxl_defbool_val(b_info->viommu[0].intremap))
+config.iommu_intremap_supported = true;
+/* x2apic is always enabled since in no case we must disable it */
+config.iommu_x2apic_supported = true;
+config.iommu_base_addr = b_info->viommu[0].base_addr;
+
+/* IOAPIC id and PSEUDO BDF */
+config.ioapic_id = 1;
+config.ioapic_bus = 0xff;
+config.ioapic_devfn = 0x0;
+
+config.host_addr_width = 39;
+
+table = construct_dmar(&ctxt, &config);
+if ( !table )
+return ERROR_NOMEM;
+len = ((struct acpi_header *)table)->length;
+
+if (len) {
+libxl__ptr_add(gc, table);
+if (!dom->acpi_modules[0].data) {
+dom->acpi_modules[0].data = table;
+dom->acpi_modules[0].length = len;
+} else {
+/* joint tables */
+void *newdata;
+
+newdata = libxl__malloc(gc, len + dom->acpi_modules[0].length);
+memcpy(newdata, dom->acpi_modules[0].data,
+   dom->acpi_modules[0].length);
+memcpy(newdata + dom->acpi_modules[0].length, table, len);
+
+free(dom->acpi_modules[0].data);
+dom->acpi_modules[0].data = newdata;
+dom->acpi_modules[0].length += len;
+}
+}
+return 0;
+}
+
+int libxl__dom_load_acpi(libxl__gc *gc,
+ const libxl_domain_build_info *b_info,
+ struct xc_dom_image *dom)
+{
+
+if (b_info->type != LIBXL_DOMAIN_TYPE_HVM)
+return 0;
+
+if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_NONE)
+return li

[Xen-devel] [PATCH V3 6/29] tools/libxl: Add a user configurable parameter to control vIOMMU attributes

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

A field, viommu_info, is added to struct libxl_domain_build_info. Several
attributes can be specified by guest config file for virtual IOMMU. These
attributes are used for DMAR construction and vIOMMU creation.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 

---
v3:
 - allow an array of viommu other than only one viommu to present to guest.
 During domain building, an error would be raised for
 multiple viommus case since we haven't implemented this yet.
 - provide a libxl__viommu_set_default() for viommu

---
 docs/man/xl.cfg.pod.5.in| 27 +++
 tools/libxl/libxl_create.c  | 52 +
 tools/libxl/libxl_types.idl | 12 +++
 tools/xl/xl_parse.c | 52 -
 4 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/docs/man/xl.cfg.pod.5.in b/docs/man/xl.cfg.pod.5.in
index 79cb2ea..9cd7dd7 100644
--- a/docs/man/xl.cfg.pod.5.in
+++ b/docs/man/xl.cfg.pod.5.in
@@ -1547,6 +1547,33 @@ 
L<http://www.microsoft.com/en-us/download/details.aspx?id=30707>
 
 =back 
 
+=item B
+
+Specifies the vIOMMUs which are to be provided to the guest.
+
+B has the form C where:
+
+=over 4
+
+=item B
+
+Possible Bs are:
+
+=over 4
+
+=item B
+
+Currently there is only one valid type:
+
+(x86 only) "intel_vtd" means providing a emulated Intel VT-d to the guest.
+
+=item B
+
+Specifies whether the vIOMMU should support interrupt remapping
+and default 'true'.
+
+=back
+
 =head3 Guest Virtual Time Controls
 
 =over 4
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 9123585..decd7a8 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -27,6 +27,8 @@
 
 #include 
 
+#define VIOMMU_VTD_BASE_ADDR0xfed9ULL
+
 int libxl__domain_create_info_setdefault(libxl__gc *gc,
  libxl_domain_create_info *c_info)
 {
@@ -59,6 +61,47 @@ void libxl__rdm_setdefault(libxl__gc *gc, 
libxl_domain_build_info *b_info)
 LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT;
 }
 
+static int libxl__viommu_set_default(libxl__gc *gc,
+ libxl_domain_build_info *b_info)
+{
+int i;
+
+if (!b_info->num_viommus)
+return 0;
+
+for (i = 0; i < b_info->num_viommus; i++) {
+libxl_viommu_info *viommu = &b_info->viommu[i];
+
+if (libxl_defbool_is_default(viommu->intremap))
+libxl_defbool_set(&viommu->intremap, true);
+
+if (!libxl_defbool_val(viommu->intremap)) {
+LOGE(ERROR, "Cannot create one virtual VTD without intremap");
+return ERROR_INVAL;
+}
+
+if (viommu->type == LIBXL_VIOMMU_TYPE_INTEL_VTD) {
+/*
+ * If there are multiple vIOMMUs, we need arrange all vIOMMUs to
+ * avoid overlap. Put a check here in case we get here for multiple
+ * vIOMMUs case.
+ */
+if (b_info->num_viommus > 1) {
+LOGE(ERROR, "Multiple vIOMMUs support is under 
implementation");
+return ERROR_INVAL;
+}
+
+/* Set default values to unexposed fields */
+viommu->base_addr = VIOMMU_VTD_BASE_ADDR;
+
+/* Set desired capbilities */
+viommu->cap = VIOMMU_CAP_IRQ_REMAPPING;
+}
+}
+
+return 0;
+}
+
 int libxl__domain_build_info_setdefault(libxl__gc *gc,
 libxl_domain_build_info *b_info)
 {
@@ -214,6 +257,9 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
 
 libxl__arch_domain_build_info_acpi_setdefault(b_info);
 
+if (libxl__viommu_set_default(gc, b_info))
+return ERROR_FAIL;
+
 switch (b_info->type) {
 case LIBXL_DOMAIN_TYPE_HVM:
 if (b_info->shadow_memkb == LIBXL_MEMKB_DEFAULT)
@@ -890,6 +936,12 @@ static void initiate_domain_create(libxl__egc *egc,
 goto error_out;
 }
 
+if (d_config->b_info.num_viommus > 1) {
+ret = ERROR_INVAL;
+LOGD(ERROR, domid, "Cannot support multiple vIOMMUs");
+goto error_out;
+}
+
 ret = libxl__domain_create_info_setdefault(gc, &d_config->c_info);
 if (ret) {
 LOGD(ERROR, domid, "Unable to set domain create info defaults");
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 173d70a..286c960 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -450,6 +450,17 @@ libxl_altp2m_mode = Enumeration("altp2m_mode", [
 (3, "limited"),
 ], init_val = "LIBXL_ALTP2M_MODE_DISABLED")
 
+libxl_viommu_type = Enumeration("viommu_type", [
+(1, "intel_vtd"),
+])
+
+libxl_viommu_info = Struct("viommu_info", [
+("type"

[Xen-devel] [PATCH V3 00/29]

2017-09-22 Thread Lan Tianyu
Change since v2:
   1) Remove vIOMMU hypercall of query capabilities and introduce when 
necessary.
   2) Remove length field of vIOMMU create parameter of vIOMMU hypercall
   3) Introduce irq remapping mode callback to vIOMMU framework and vIOMMU 
device models
can check irq remapping mode by vendor specific ways.
   4) Update vIOMMU docs.
   5) Other changes please see patches' change logs.

Change since v1:
   1) Fix coding style issues
   2) Add definitions for vIOMMU type and capabilities
   3) Change vIOMMU kconfig and select vIOMMU default on x86
   4) Put vIOMMU creation in libxl__arch_domain_create()
   5) Make vIOMMU structure of tool stack more general for both PV and HVM.

Change since RFC v2:
   1) Move vvtd.c to drivers/passthrough/vtd directroy. 
   2) Make vIOMMU always built in on x86
   3) Add new boot cmd "viommu" to enable viommu function
   4) Fix some code stype issues.

Change since RFC v1:
   1) Add Xen virtual IOMMU doc docs/misc/viommu.txt
   2) Move vIOMMU hypercall of create/destroy vIOMMU and query  
capabilities from dmop to domctl suggested by Paul Durrant. Because
these hypercalls can be done in tool stack and more VM mode(E,G PVH
or other modes don't use Qemu) can be benefit.
   3) Add check of input MMIO address and length.
   4) Add iommu_type in vIOMMU hypercall parameter to specify
vendor vIOMMU device model(E,G Intel VTD, AMD or ARM IOMMU. So far
only support Intel VTD).
   5) Add save and restore support for vvtd


This patchset is to introduce vIOMMU framework and add virtual VTD's
interrupt remapping support according "Xen virtual IOMMU high level
design doc V3"(https://lists.xenproject.org/archives/html/xen-devel/
2016-11/msg01391.html).

- vIOMMU framework
New framework provides viommu_ops and help functions to abstract
vIOMMU operations(E,G create, destroy, handle irq remapping request
and so on). Vendors(Intel, ARM, AMD and son) can implement their
vIOMMU callbacks.

- Virtual VTD
We enable irq remapping function and covers both
MSI and IOAPIC interrupts. Don't support post interrupt mode emulation
and post interrupt mode enabled on host with virtual VTD. will add
later.

Repo:
https://github.com/lantianyu/Xen/tree/xen_viommu_v3


Chao Gao (23):
  tools/libacpi: Add DMA remapping reporting (DMAR) ACPI table
structures
  tools/libacpi: Add new fields in acpi_config for DMAR table
  tools/libxl: Add a user configurable parameter to control vIOMMU
attributes
  tools/libxl: build DMAR table for a guest with one virtual VTD
  tools/libxl: create vIOMMU during domain construction
  tools/libxc: Add viommu operations in libxc
  vtd: add and align register definitions
  x86/hvm: Introduce a emulated VTD for HVM
  x86/vvtd: Add MMIO handler for VVTD
  x86/vvtd: Set Interrupt Remapping Table Pointer through GCMD
  x86/vvtd: Enable Interrupt Remapping through GCMD
  x86/vvtd: Process interrupt remapping request
  x86/vvtd: decode interrupt attribute from IRTE
  x86/vvtd: add a helper function to decide the interrupt format
  x86/vioapic: Hook interrupt delivery of vIOAPIC
  x86/vioapic: extend vioapic_get_vector() to support remapping format
RTE
  passthrough: move some fields of hvm_gmsi_info to a sub-structure
  tools/libxc: Add a new interface to bind remapping format msi with
pirq
  x86/vmsi: Hook delivering remapping format msi to guest
  x86/vvtd: Handle interrupt translation faults
  x86/vvtd: Enable Queued Invalidation through GCMD
  x86/vvtd: Add queued invalidation (QI) support
  x86/vvtd: save and restore emulated VT-d

Lan Tianyu (6):
  Xen/doc: Add Xen virtual IOMMU doc
  VIOMMU: Add vIOMMU helper functions to create, destroy vIOMMU instance
  DOMCTL: Introduce new DOMCTL commands for vIOMMU support
  VIOMMU: Add irq request callback to deal with irq remapping
  VIOMMU: Add get irq info callback to convert irq remapping request
  VIOMMU: Introduce callback of checking irq remapping mode

 docs/man/xl.cfg.pod.5.in   |   27 +
 docs/misc/viommu.txt   |  136 
 docs/misc/xen-command-line.markdown|7 +
 tools/libacpi/acpi2_0.h|   61 ++
 tools/libacpi/build.c  |   53 ++
 tools/libacpi/libacpi.h|   12 +
 tools/libxc/Makefile   |1 +
 tools/libxc/include/xenctrl.h  |   21 +
 tools/libxc/xc_domain.c|   53 ++
 tools/libxc/xc_viommu.c|   64 ++
 tools/libxl/libxl_create.c |   52 ++
 tools/libxl/libxl_types.idl|   12 +
 tools/libxl/libxl_x86.c|   20 +-
 tools/libxl/libxl_x86_acpi.c   |   98 ++-
 tools/xl/xl_parse.c|   52 +-
 xen/arch/x86/Kconfig   |1 +
 xen/arch/x86/hvm/irq.c |7 +
 xen/arch/x86/hvm/vioapic.c |   26 +-
 xen/arch/x86/hvm/vmsi.c|   18 +-
 xen/common/K

[Xen-devel] [PATCH V3 4/29] tools/libacpi: Add DMA remapping reporting (DMAR) ACPI table structures

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

Add dmar table structure according Chapter 8 "BIOS Considerations" of
VTd spec Rev. 2.4.

VTd 
spec:http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 tools/libacpi/acpi2_0.h | 61 +
 1 file changed, 61 insertions(+)

diff --git a/tools/libacpi/acpi2_0.h b/tools/libacpi/acpi2_0.h
index 2619ba3..758a823 100644
--- a/tools/libacpi/acpi2_0.h
+++ b/tools/libacpi/acpi2_0.h
@@ -422,6 +422,65 @@ struct acpi_20_slit {
 };
 
 /*
+ * DMA Remapping Table header definition (DMAR)
+ */
+
+/*
+ * DMAR Flags.
+ */
+#define ACPI_DMAR_INTR_REMAP(1 << 0)
+#define ACPI_DMAR_X2APIC_OPT_OUT(1 << 1)
+
+struct acpi_dmar {
+struct acpi_header header;
+uint8_t host_address_width;
+uint8_t flags;
+uint8_t reserved[10];
+};
+
+/*
+ * Device Scope Types
+ */
+#define ACPI_DMAR_DEVICE_SCOPE_PCI_ENDPOINT 0x01
+#define ACPI_DMAR_DEVICE_SCOPE_PCI_SUB_HIERARACHY   0x01
+#define ACPI_DMAR_DEVICE_SCOPE_IOAPIC   0x03
+#define ACPI_DMAR_DEVICE_SCOPE_HPET 0x04
+#define ACPI_DMAR_DEVICE_SCOPE_ACPI_NAMESPACE_DEVICE0x05
+
+struct dmar_device_scope {
+uint8_t type;
+uint8_t length;
+uint8_t reserved[2];
+uint8_t enumeration_id;
+uint8_t bus;
+uint16_t path[0];
+};
+
+/*
+ * DMA Remapping Hardware Unit Types
+ */
+#define ACPI_DMAR_TYPE_HARDWARE_UNIT0x00
+#define ACPI_DMAR_TYPE_RESERVED_MEMORY  0x01
+#define ACPI_DMAR_TYPE_ATSR 0x02
+#define ACPI_DMAR_TYPE_HARDWARE_AFFINITY0x03
+#define ACPI_DMAR_TYPE_ANDD 0x04
+
+/*
+ * DMA Remapping Hardware Unit Flags. All other bits are reserved and must be 
0.
+ */
+#define ACPI_DMAR_INCLUDE_PCI_ALL   (1 << 0)
+
+struct acpi_dmar_hardware_unit {
+uint16_t type;
+uint16_t length;
+uint8_t flags;
+uint8_t reserved;
+uint16_t pci_segment;
+uint64_t base_address;
+struct dmar_device_scope scope[0];
+};
+
+/*
  * Table Signatures.
  */
 #define ACPI_2_0_RSDP_SIGNATURE ASCII64('R','S','D',' ','P','T','R',' ')
@@ -435,6 +494,7 @@ struct acpi_20_slit {
 #define ACPI_2_0_WAET_SIGNATURE ASCII32('W','A','E','T')
 #define ACPI_2_0_SRAT_SIGNATURE ASCII32('S','R','A','T')
 #define ACPI_2_0_SLIT_SIGNATURE ASCII32('S','L','I','T')
+#define ACPI_2_0_DMAR_SIGNATURE ASCII32('D','M','A','R')
 
 /*
  * Table revision numbers.
@@ -449,6 +509,7 @@ struct acpi_20_slit {
 #define ACPI_1_0_FADT_REVISION 0x01
 #define ACPI_2_0_SRAT_REVISION 0x01
 #define ACPI_2_0_SLIT_REVISION 0x01
+#define ACPI_2_0_DMAR_REVISION 0x01
 
 #pragma pack ()
 
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 11/29] x86/hvm: Introduce a emulated VTD for HVM

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

This patch adds create/destroy function for the emulated VTD
and adapts it to the common VIOMMU abstraction.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 xen/drivers/passthrough/vtd/Makefile |   7 +-
 xen/drivers/passthrough/vtd/iommu.h  |  23 +-
 xen/drivers/passthrough/vtd/vvtd.c   | 147 +++
 3 files changed, 170 insertions(+), 7 deletions(-)
 create mode 100644 xen/drivers/passthrough/vtd/vvtd.c

diff --git a/xen/drivers/passthrough/vtd/Makefile 
b/xen/drivers/passthrough/vtd/Makefile
index f302653..163c7fe 100644
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -1,8 +1,9 @@
 subdir-$(CONFIG_X86) += x86
 
-obj-y += iommu.o
 obj-y += dmar.o
-obj-y += utils.o
-obj-y += qinval.o
 obj-y += intremap.o
+obj-y += iommu.o
+obj-y += qinval.o
 obj-y += quirks.o
+obj-y += utils.o
+obj-$(CONFIG_VIOMMU) += vvtd.o
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index d7e433e..ef038c9 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -66,6 +66,12 @@
 #define VER_MAJOR(v)(((v) & 0xf0) >> 4)
 #define VER_MINOR(v)((v) & 0x0f)
 
+/* Supported Adjusted Guest Address Widths */
+#define DMA_CAP_SAGAW_SHIFT 8
+ /* 39-bit AGAW, 3-level page-table */
+#define DMA_CAP_SAGAW_39bit (0x2ULL << DMA_CAP_SAGAW_SHIFT)
+#define DMA_CAP_ND_64K  6ULL
+
 /*
  * Decoding Capability Register
  */
@@ -74,6 +80,7 @@
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
 #define cap_num_fault_regs(c)  c) >> 40) & 0xff) + 1)
+#define cap_set_num_fault_regs(c)  c) - 1) & 0xff) << 40)
 #define cap_pgsel_inv(c)   (((c) >> 39) & 1)
 
 #define cap_super_page_val(c)  (((c) >> 34) & 0xf)
@@ -85,11 +92,13 @@
 #define cap_sps_1tb(c) ((c >> 37) & 1)
 
 #define cap_fault_reg_offset(c)c) >> 24) & 0x3ff) * 16)
+#define cap_set_fault_reg_offset(c) c) / 16) & 0x3ff) << 24 )
 
 #define cap_isoch(c)(((c) >> 23) & 1)
 #define cap_qos(c)(((c) >> 22) & 1)
 #define cap_mgaw(c)c) >> 16) & 0x3f) + 1)
-#define cap_sagaw(c)(((c) >> 8) & 0x1f)
+#define cap_set_mgaw(c) c) - 1) & 0x3f) << 16)
+#define cap_sagaw(c)(((c) >> DMA_CAP_SAGAW_SHIFT) & 0x1f)
 #define cap_caching_mode(c)(((c) >> 7) & 1)
 #define cap_phmr(c)(((c) >> 6) & 1)
 #define cap_plmr(c)(((c) >> 5) & 1)
@@ -104,10 +113,16 @@
 #define ecap_niotlb_iunits(e)e) >> 24) & 0xff) + 1)
 #define ecap_iotlb_offset(e) e) >> 8) & 0x3ff) * 16)
 #define ecap_coherent(e) ((e >> 0) & 0x1)
-#define ecap_queued_inval(e) ((e >> 1) & 0x1)
+#define DMA_ECAP_QI_SHIFT1
+#define DMA_ECAP_QI  (1ULL << DMA_ECAP_QI_SHIFT)
+#define ecap_queued_inval(e) ((e >> DMA_ECAP_QI_SHIFT) & 0x1)
 #define ecap_dev_iotlb(e)((e >> 2) & 0x1)
-#define ecap_intr_remap(e)   ((e >> 3) & 0x1)
-#define ecap_eim(e)  ((e >> 4) & 0x1)
+#define DMA_ECAP_IR_SHIFT3
+#define DMA_ECAP_IR  (1ULL << DMA_ECAP_IR_SHIFT)
+#define ecap_intr_remap(e)   ((e >> DMA_ECAP_IR_SHIFT) & 0x1)
+#define DMA_ECAP_EIM_SHIFT   4
+#define DMA_ECAP_EIM (1ULL << DMA_ECAP_EIM_SHIFT)
+#define ecap_eim(e)  ((e >> DMA_ECAP_EIM_SHIFT) & 0x1)
 #define ecap_cache_hints(e)  ((e >> 5) & 0x1)
 #define ecap_pass_thru(e)((e >> 6) & 0x1)
 #define ecap_snp_ctl(e)  ((e >> 7) & 0x1)
diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
b/xen/drivers/passthrough/vtd/vvtd.c
new file mode 100644
index 000..c851ec7
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -0,0 +1,147 @@
+/*
+ * vvtd.c
+ *
+ * virtualize VTD for HVM.
+ *
+ * Copyright (C) 2017 Chao Gao, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "iommu.h"
+
+/* Supported ca

[Xen-devel] [PATCH V3 8/29] tools/libxl: create vIOMMU during domain construction

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

If guest is configured to have a vIOMMU, create it during domain construction.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 

---
v3:
 - Remove the process of querying capabilities.
---
 tools/libxl/libxl_x86.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 23c9a55..25cae5f 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -341,8 +341,25 @@ int libxl__arch_domain_create(libxl__gc *gc, 
libxl_domain_config *d_config,
 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_HVM) {
 unsigned long shadow = DIV_ROUNDUP(d_config->b_info.shadow_memkb,
1024);
+int i;
+
 xc_shadow_control(ctx->xch, domid, XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
   NULL, 0, &shadow, 0, NULL);
+
+for (i = 0; i < d_config->b_info.num_viommus; i++) {
+uint32_t id;
+libxl_viommu_info *viommu = d_config->b_info.viommu + i;
+
+if (viommu->type == LIBXL_VIOMMU_TYPE_INTEL_VTD) {
+ret = xc_viommu_create(ctx->xch, domid, VIOMMU_TYPE_INTEL_VTD,
+   viommu->base_addr, viommu->cap, &id);
+if (ret) {
+LOGED(ERROR, domid, "create vIOMMU fail");
+ret = ERROR_FAIL;
+goto out;
+}
+}
+}
 }
 
 if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_PV &&
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 17/29] x86/vvtd: add a helper function to decide the interrupt format

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

Different platform may use different method to distinguish
remapping format interrupt and normal format interrupt.

Intel uses one bit in IOAPIC RTE or MSI address register to
indicate the interrupt is remapping format. vvtd will handle
all the interrupts when .check_irq_remapping() return true.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 xen/drivers/passthrough/vtd/vvtd.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
b/xen/drivers/passthrough/vtd/vvtd.c
index 5e22ace..bd1cadd 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -536,6 +536,28 @@ static int vvtd_get_irq_info(struct domain *d,
 return 0;
 }
 
+/* Probe whether the interrupt request is an remapping format */
+static bool vvtd_is_remapping(struct domain *d,
+  struct arch_irq_remapping_request *irq)
+{
+if ( irq->type == VIOMMU_REQUEST_IRQ_APIC )
+{
+struct IO_APIC_route_remap_entry rte = { .val = irq->msg.rte };
+
+return rte.format;
+}
+else if ( irq->type == VIOMMU_REQUEST_IRQ_MSI )
+{
+struct msi_msg_remap_entry msi_msg =
+{ .address_lo = { .val = irq->msg.msi.addr } };
+
+return msi_msg.address_lo.format;
+}
+ASSERT_UNREACHABLE();
+
+return 0;
+}
+
 static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
 {
 uint64_t cap = cap_set_num_fault_regs(1ULL) |
@@ -607,7 +629,8 @@ struct viommu_ops vvtd_hvm_vmx_ops = {
 .create = vvtd_create,
 .destroy = vvtd_destroy,
 .handle_irq_request = vvtd_handle_irq_request,
-.get_irq_info = vvtd_get_irq_info
+.get_irq_info = vvtd_get_irq_info,
+.check_irq_remapping = vvtd_is_remapping
 };
 
 static int vvtd_register(void)
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 13/29] x86/vvtd: Set Interrupt Remapping Table Pointer through GCMD

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

Software sets this field to set/update the interrupt remapping table pointer
used by hardware. The interrupt remapping table pointer is specified through
the Interrupt Remapping Table Address (IRTA_REG) register.

This patch emulates this operation and adds some new fields in VVTD to track
info (e.g. the table's gfn and max supported entries) of interrupt remapping
table.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 

---
v3:
 - ignore unaligned r/w of vt-d hardware registers and return X86EMUL_OK
---
 xen/drivers/passthrough/vtd/iommu.h | 12 ++-
 xen/drivers/passthrough/vtd/vvtd.c  | 69 +
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index ef038c9..a0d5ec8 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -153,6 +153,8 @@
 #define DMA_GCMD_IRE(((u64)1) << 25)
 #define DMA_GCMD_SIRTP  (((u64)1) << 24)
 #define DMA_GCMD_CFI(((u64)1) << 23)
+/* mask of one-shot bits */
+#define DMA_GCMD_ONE_SHOT_MASK 0x96ff 
 
 /* GSTS_REG */
 #define DMA_GSTS_TES(((u64)1) << 31)
@@ -162,9 +164,17 @@
 #define DMA_GSTS_WBFS   (((u64)1) << 27)
 #define DMA_GSTS_QIES   (((u64)1) <<26)
 #define DMA_GSTS_IRES   (((u64)1) <<25)
-#define DMA_GSTS_SIRTPS (((u64)1) << 24)
+#define DMA_GSTS_SIRTPS_SHIFT   24
+#define DMA_GSTS_SIRTPS (((u64)1) << DMA_GSTS_SIRTPS_SHIFT)
 #define DMA_GSTS_CFIS   (((u64)1) <<23)
 
+/* IRTA_REG */
+/* The base of 4KB aligned interrupt remapping table */
+#define DMA_IRTA_ADDR(val)  ((val) & ~0xfffULL)
+/* The size of remapping table is 2^(x+1), where x is the size field in IRTA */
+#define DMA_IRTA_S(val) (val & 0xf)
+#define DMA_IRTA_SIZE(val)  (1UL << (DMA_IRTA_S(val) + 1))
+
 /* PMEN_REG */
 #define DMA_PMEN_EPM(((u32)1) << 31)
 #define DMA_PMEN_PRS(((u32)1) << 0)
diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
b/xen/drivers/passthrough/vtd/vvtd.c
index a3002c3..6736956 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -32,6 +32,13 @@
 /* Supported capabilities by vvtd */
 unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;
 
+struct hvm_hw_vvtd_status {
+uint32_t eim_enabled : 1;
+uint32_t irt_max_entry;
+/* Interrupt remapping table base gfn */
+uint64_t irt;
+};
+
 union hvm_hw_vvtd_regs {
 uint32_t data32[256];
 uint64_t data64[128];
@@ -43,6 +50,8 @@ struct vvtd {
 uint64_t length;
 /* Point back to the owner domain */
 struct domain *domain;
+
+struct hvm_hw_vvtd_status status;
 union hvm_hw_vvtd_regs *regs;
 struct page_info *regs_page;
 };
@@ -70,6 +79,11 @@ struct vvtd *domain_vvtd(struct domain *d)
 return (d->viommu) ? d->viommu->priv : NULL;
 }
 
+static inline void vvtd_set_bit(struct vvtd *vvtd, uint32_t reg, int nr)
+{
+__set_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
+}
+
 static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
 {
 vtd->regs->data32[reg/sizeof(uint32_t)] = value;
@@ -91,6 +105,44 @@ static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, 
uint32_t reg)
 return vtd->regs->data64[reg/sizeof(uint64_t)];
 }
 
+static void vvtd_handle_gcmd_sirtp(struct vvtd *vvtd, uint32_t val)
+{
+uint64_t irta = vvtd_get_reg_quad(vvtd, DMAR_IRTA_REG);
+
+if ( !(val & DMA_GCMD_SIRTP) )
+return;
+
+vvtd->status.irt = DMA_IRTA_ADDR(irta) >> PAGE_SHIFT;
+vvtd->status.irt_max_entry = DMA_IRTA_SIZE(irta);
+vvtd->status.eim_enabled = !!(irta & IRTA_EIME);
+vvtd_info("Update IR info (addr=%lx eim=%d size=%d).",
+  vvtd->status.irt, vvtd->status.eim_enabled,
+  vvtd->status.irt_max_entry);
+vvtd_set_bit(vvtd, DMAR_GSTS_REG, DMA_GSTS_SIRTPS_SHIFT);
+}
+
+static int vvtd_write_gcmd(struct vvtd *vvtd, uint32_t val)
+{
+uint32_t orig = vvtd_get_reg(vvtd, DMAR_GSTS_REG);
+uint32_t changed;
+
+orig = orig & DMA_GCMD_ONE_SHOT_MASK;   /* reset the one-shot bits */
+changed = orig ^ val;
+
+if ( !changed )
+return X86EMUL_OKAY;
+
+if ( changed & (changed - 1) )
+vvtd_info("Guest attempts to write %x to GCMD (current GSTS is %x)," 
+  "it would lead to update multiple fields",
+  val, orig);
+
+if ( changed & DMA_GCMD_SIRTP )
+vvtd_handle_gcmd_sirtp(vvtd, val);
+
+return X86EMUL_OKAY;
+}
+
 static int vvtd_in_range(struct vcpu *v, unsigned long addr)
 {
 struct vvtd *vvtd = domain_vvtd(v->domain);
@@ -135,12 +187,17 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
 {
 switch ( offset )
 {
+case DMAR_GCMD_REG:
+return vvtd_write_gcmd(v

[Xen-devel] [PATCH V3 14/29] x86/vvtd: Enable Interrupt Remapping through GCMD

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

Software writes this field to enable/disable interrupt reampping. This patch
emulate IRES field of GCMD.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 xen/drivers/passthrough/vtd/iommu.h |  3 ++-
 xen/drivers/passthrough/vtd/vvtd.c  | 30 +-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index a0d5ec8..703726f 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -163,7 +163,8 @@
 #define DMA_GSTS_AFLS   (((u64)1) << 28)
 #define DMA_GSTS_WBFS   (((u64)1) << 27)
 #define DMA_GSTS_QIES   (((u64)1) <<26)
-#define DMA_GSTS_IRES   (((u64)1) <<25)
+#define DMA_GSTS_IRES_SHIFT 25
+#define DMA_GSTS_IRES   (((u64)1) << DMA_GSTS_IRES_SHIFT)
 #define DMA_GSTS_SIRTPS_SHIFT   24
 #define DMA_GSTS_SIRTPS (((u64)1) << DMA_GSTS_SIRTPS_SHIFT)
 #define DMA_GSTS_CFIS   (((u64)1) <<23)
diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
b/xen/drivers/passthrough/vtd/vvtd.c
index 6736956..a0f63e9 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -33,7 +33,8 @@
 unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;
 
 struct hvm_hw_vvtd_status {
-uint32_t eim_enabled : 1;
+uint32_t eim_enabled : 1,
+ intremap_enabled : 1;
 uint32_t irt_max_entry;
 /* Interrupt remapping table base gfn */
 uint64_t irt;
@@ -84,6 +85,11 @@ static inline void vvtd_set_bit(struct vvtd *vvtd, uint32_t 
reg, int nr)
 __set_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
 }
 
+static inline void vvtd_clear_bit(struct vvtd *vvtd, uint32_t reg, int nr)
+{
+__clear_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
+}
+
 static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
 {
 vtd->regs->data32[reg/sizeof(uint32_t)] = value;
@@ -105,6 +111,23 @@ static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, 
uint32_t reg)
 return vtd->regs->data64[reg/sizeof(uint64_t)];
 }
 
+static void vvtd_handle_gcmd_ire(struct vvtd *vvtd, uint32_t val)
+{
+vvtd_info("%sable Interrupt Remapping",
+  (val & DMA_GCMD_IRE) ? "En" : "Dis");
+
+if ( val & DMA_GCMD_IRE )
+{
+vvtd->status.intremap_enabled = true;
+vvtd_set_bit(vvtd, DMAR_GSTS_REG, DMA_GSTS_IRES_SHIFT);
+}
+else
+{
+vvtd->status.intremap_enabled = false;
+vvtd_clear_bit(vvtd, DMAR_GSTS_REG, DMA_GSTS_IRES_SHIFT);
+}
+}
+
 static void vvtd_handle_gcmd_sirtp(struct vvtd *vvtd, uint32_t val)
 {
 uint64_t irta = vvtd_get_reg_quad(vvtd, DMAR_IRTA_REG);
@@ -112,6 +135,9 @@ static void vvtd_handle_gcmd_sirtp(struct vvtd *vvtd, 
uint32_t val)
 if ( !(val & DMA_GCMD_SIRTP) )
 return;
 
+if ( vvtd->status.intremap_enabled )
+vvtd_info("Update Interrupt Remapping Table when active\n");
+
 vvtd->status.irt = DMA_IRTA_ADDR(irta) >> PAGE_SHIFT;
 vvtd->status.irt_max_entry = DMA_IRTA_SIZE(irta);
 vvtd->status.eim_enabled = !!(irta & IRTA_EIME);
@@ -139,6 +165,8 @@ static int vvtd_write_gcmd(struct vvtd *vvtd, uint32_t val)
 
 if ( changed & DMA_GCMD_SIRTP )
 vvtd_handle_gcmd_sirtp(vvtd, val);
+if ( changed & DMA_GCMD_IRE )
+vvtd_handle_gcmd_ire(vvtd, val);
 
 return X86EMUL_OKAY;
 }
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 18/29] VIOMMU: Add irq request callback to deal with irq remapping

2017-09-22 Thread Lan Tianyu
This patch is to add irq request callback for platform implementation
to deal with irq remapping request.

Signed-off-by: Lan Tianyu 
---
 xen/common/viommu.c  | 15 +
 xen/include/asm-x86/viommu.h | 72 
 xen/include/xen/viommu.h | 11 +++
 3 files changed, 98 insertions(+)
 create mode 100644 xen/include/asm-x86/viommu.h

diff --git a/xen/common/viommu.c b/xen/common/viommu.c
index 55feb5d..b517158 100644
--- a/xen/common/viommu.c
+++ b/xen/common/viommu.c
@@ -163,6 +163,21 @@ int viommu_domctl(struct domain *d, struct 
xen_domctl_viommu_op *op,
 return rc;
 }
 
+int viommu_handle_irq_request(struct domain *d,
+  struct arch_irq_remapping_request *request)
+{
+struct viommu *viommu = d->viommu;
+
+if ( !viommu )
+return -EINVAL;
+
+ASSERT(viommu->ops);
+if ( !viommu->ops->handle_irq_request )
+return -EINVAL;
+
+return viommu->ops->handle_irq_request(d, request);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/asm-x86/viommu.h b/xen/include/asm-x86/viommu.h
new file mode 100644
index 000..366fbb6
--- /dev/null
+++ b/xen/include/asm-x86/viommu.h
@@ -0,0 +1,72 @@
+/*
+ * include/asm-x86/viommu.h
+ *
+ * Copyright (c) 2017 Intel Corporation.
+ * Author: Lan Tianyu 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __ARCH_X86_VIOMMU_H__
+#define __ARCH_X86_VIOMMU_H__
+
+/* IRQ request type */
+#define VIOMMU_REQUEST_IRQ_MSI  0
+#define VIOMMU_REQUEST_IRQ_APIC 1
+
+struct arch_irq_remapping_request
+{
+union {
+/* MSI */
+struct {
+uint64_t addr;
+uint32_t data;
+} msi;
+/* Redirection Entry in IOAPIC */
+uint64_t rte;
+} msg;
+uint16_t source_id;
+uint8_t type;
+};
+
+static inline void irq_request_ioapic_fill(struct arch_irq_remapping_request 
*req,
+   uint32_t ioapic_id, uint64_t rte)
+{
+ASSERT(req);
+req->type = VIOMMU_REQUEST_IRQ_APIC;
+req->source_id = ioapic_id;
+req->msg.rte = rte;
+}
+
+static inline void irq_request_msi_fill(struct arch_irq_remapping_request *req,
+uint32_t source_id, uint64_t addr,
+uint32_t data)
+{
+ASSERT(req);
+req->type = VIOMMU_REQUEST_IRQ_MSI;
+req->source_id = source_id;
+req->msg.msi.addr = addr;
+req->msg.msi.data = data;
+}
+
+#endif /* __ARCH_X86_VIOMMU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/viommu.h b/xen/include/xen/viommu.h
index baa8ab7..230f6b1 100644
--- a/xen/include/xen/viommu.h
+++ b/xen/include/xen/viommu.h
@@ -21,10 +21,13 @@
 #define __XEN_VIOMMU_H__
 
 struct viommu;
+struct arch_irq_remapping_request;
 
 struct viommu_ops {
 int (*create)(struct domain *d, struct viommu *viommu);
 int (*destroy)(struct viommu *viommu);
+int (*handle_irq_request)(struct domain *d,
+  struct arch_irq_remapping_request *request);
 };
 
 struct viommu {
@@ -45,11 +48,19 @@ int viommu_register_type(uint64_t type, struct viommu_ops 
*ops);
 int viommu_destroy_domain(struct domain *d);
 int viommu_domctl(struct domain *d, struct xen_domctl_viommu_op *op,
   bool_t *need_copy);
+int viommu_handle_irq_request(struct domain *d,
+  struct arch_irq_remapping_request *request);
 #else
 static inline int viommu_register_type(uint64_t type, struct viommu_ops *ops)
 {
 return -EINVAL;
 }
+static inline int
+viommu_handle_irq_request(struct domain *d,
+  struct arch_irq_remapping_request *request)
+{
+return -EINVAL;
+}
 #endif
 
 #endif /* __XEN_VIOMMU_H__ */
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 9/29] tools/libxc: Add viommu operations in libxc

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

This patch adds XEN_DOMCTL_viommu_op hypercall. This hypercall
comprises two sub-commands:
- create(): create a vIOMMU in Xen, given viommu type, register-set
location and capabilities
- destroy(): destroy a vIOMMU specified by viommu_id

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
v3:
 - Remove API for querying viommu capabilities
 - Remove pointless cast
 - Polish commit message
 - Coding style
---
 tools/libxc/Makefile  |  1 +
 tools/libxc/include/xenctrl.h |  4 +++
 tools/libxc/xc_viommu.c   | 64 +++
 3 files changed, 69 insertions(+)
 create mode 100644 tools/libxc/xc_viommu.c

diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index 9a019e8..7d8c4b4 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -51,6 +51,7 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.c
 CTRL_SRCS-y   += xc_evtchn_compat.c
 CTRL_SRCS-y   += xc_gnttab_compat.c
 CTRL_SRCS-y   += xc_devicemodel_compat.c
+CTRL_SRCS-y   += xc_viommu.c
 
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c xc_suspend.c
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 43151cb..bedca1f 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2501,6 +2501,10 @@ enum xc_static_cpu_featuremask {
 const uint32_t *xc_get_static_cpu_featuremask(enum xc_static_cpu_featuremask);
 const uint32_t *xc_get_feature_deep_deps(uint32_t feature);
 
+int xc_viommu_create(xc_interface *xch, domid_t dom, uint64_t type,
+ uint64_t base_addr, uint64_t cap, uint32_t *viommu_id);
+int xc_viommu_destroy(xc_interface *xch, domid_t dom, uint32_t viommu_id);
+
 #endif
 
 int xc_livepatch_upload(xc_interface *xch,
diff --git a/tools/libxc/xc_viommu.c b/tools/libxc/xc_viommu.c
new file mode 100644
index 000..17507c5
--- /dev/null
+++ b/tools/libxc/xc_viommu.c
@@ -0,0 +1,64 @@
+/*
+ * xc_viommu.c
+ *
+ * viommu related API functions.
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License, version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "xc_private.h"
+
+int xc_viommu_create(xc_interface *xch, domid_t dom, uint64_t type,
+ uint64_t base_addr, uint64_t cap, uint32_t *viommu_id)
+{
+int rc;
+
+DECLARE_DOMCTL;
+
+domctl.cmd = XEN_DOMCTL_viommu_op;
+domctl.domain = dom;
+domctl.u.viommu_op.cmd = XEN_DOMCTL_create_viommu;
+domctl.u.viommu_op.u.create.viommu_type = type;
+domctl.u.viommu_op.u.create.base_address = base_addr;
+domctl.u.viommu_op.u.create.capabilities = cap;
+
+rc = do_domctl(xch, &domctl);
+if ( !rc )
+*viommu_id = domctl.u.viommu_op.u.create.viommu_id;
+
+return rc;
+}
+
+int xc_viommu_destroy(xc_interface *xch, domid_t dom, uint32_t viommu_id)
+{
+DECLARE_DOMCTL;
+
+domctl.cmd = XEN_DOMCTL_viommu_op;
+domctl.domain = dom;
+domctl.u.viommu_op.cmd = XEN_DOMCTL_destroy_viommu;
+domctl.u.viommu_op.u.destroy.viommu_id = viommu_id;
+
+return do_domctl(xch, &domctl);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 23/29] passthrough: move some fields of hvm_gmsi_info to a sub-structure

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

No functional change. It is a preparation for introducing new fields in
hvm_gmsi_info to manage remapping format msi bound to a physical msi.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 xen/arch/x86/hvm/vmsi.c   |  4 ++--
 xen/drivers/passthrough/io.c  | 34 ++
 xen/include/asm-x86/hvm/irq.h |  8 ++--
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 9b35e9b..7f21853 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -101,8 +101,8 @@ int vmsi_deliver(
 
 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
 {
-uint32_t flags = pirq_dpci->gmsi.gflags;
-int vector = pirq_dpci->gmsi.gvec;
+uint32_t flags = pirq_dpci->gmsi.legacy.gflags;
+int vector = pirq_dpci->gmsi.legacy.gvec;
 uint8_t dest = (uint8_t)flags;
 bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
 uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ec9f41a..fb44223 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -350,8 +350,8 @@ int pt_irq_create_bind(
 {
 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
HVM_IRQ_DPCI_GUEST_MSI;
-pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
-pirq_dpci->gmsi.gflags = gflags;
+pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
+pirq_dpci->gmsi.legacy.gflags = gflags;
 /*
  * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
  * The 'pirq_cleanup_check' which would free the structure is only
@@ -383,8 +383,8 @@ int pt_irq_create_bind(
 }
 if ( unlikely(rc) )
 {
-pirq_dpci->gmsi.gflags = 0;
-pirq_dpci->gmsi.gvec = 0;
+pirq_dpci->gmsi.legacy.gflags = 0;
+pirq_dpci->gmsi.legacy.gvec = 0;
 pirq_dpci->dom = NULL;
 pirq_dpci->flags = 0;
 pirq_cleanup_check(info, d);
@@ -403,21 +403,22 @@ int pt_irq_create_bind(
 }
 
 /* If pirq is already mapped as vmsi, update guest data/addr. */
-if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
- pirq_dpci->gmsi.gflags != gflags )
+if ( pirq_dpci->gmsi.legacy.gvec != pt_irq_bind->u.msi.gvec ||
+ pirq_dpci->gmsi.legacy.gflags != gflags )
 {
 /* Directly clear pending EOIs before enabling new MSI info. */
 pirq_guest_eoi(info);
 
-pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
-pirq_dpci->gmsi.gflags = gflags;
+}
+pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
+pirq_dpci->gmsi.legacy.gflags = gflags;
 }
 }
 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
-dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
+dest = MASK_EXTR(pirq_dpci->gmsi.legacy.gflags,
  XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
-dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
-delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
+dest_mode = pirq_dpci->gmsi.legacy.gflags & 
XEN_DOMCTL_VMSI_X86_DM_MASK;
+delivery_mode = MASK_EXTR(pirq_dpci->gmsi.legacy.gflags,
   XEN_DOMCTL_VMSI_X86_DELIV_MASK);
 
 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
@@ -430,7 +431,7 @@ int pt_irq_create_bind(
 {
 if ( delivery_mode == dest_LowestPrio )
 vcpu = vector_hashing_dest(d, dest, dest_mode,
-   pirq_dpci->gmsi.gvec);
+   pirq_dpci->gmsi.legacy.gvec);
 if ( vcpu )
 pirq_dpci->gmsi.posted = true;
 }
@@ -440,7 +441,7 @@ int pt_irq_create_bind(
 /* Use interrupt posting if it is supported. */
 if ( iommu_intpost )
 pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
-   info, pirq_dpci->gmsi.gvec);
+   info, pirq_dpci->gmsi.legacy.gvec);
 
 if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
 {
@@ -835,11 +836,12 @@ static int _hvm_dpci_msi_eoi(struct domain *d,
 int vector = (long)arg;
 
 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
- (pirq_dpci->gmsi.gvec == vector) )
+ (pirq_dpci->gmsi.legacy.gvec == vector) )
 {
-  

[Xen-devel] [PATCH V3 5/29] tools/libacpi: Add new fields in acpi_config for DMAR table

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

The BIOS reports the remapping hardware units in a platform to system software
through the DMA Remapping Reporting (DMAR) ACPI table.
New fields are introduces for DMAR table. These new fields are set by
toolstack through parsing guest's config file. construct_dmar() is added to
build DMAR table according to the new fields.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
v3:
 - Remove chip-set specific IOAPIC BDF. Instead, let IOAPIC-related
 info be passed by struct acpi_config.

---
 tools/libacpi/build.c   | 53 +
 tools/libacpi/libacpi.h | 12 +++
 2 files changed, 65 insertions(+)

diff --git a/tools/libacpi/build.c b/tools/libacpi/build.c
index f9881c9..5ee8fcd 100644
--- a/tools/libacpi/build.c
+++ b/tools/libacpi/build.c
@@ -303,6 +303,59 @@ static struct acpi_20_slit *construct_slit(struct 
acpi_ctxt *ctxt,
 return slit;
 }
 
+/*
+ * Only one DMA remapping hardware unit is exposed and all devices
+ * are under the remapping hardware unit. I/O APIC should be explicitly
+ * enumerated.
+ */
+struct acpi_dmar *construct_dmar(struct acpi_ctxt *ctxt,
+ const struct acpi_config *config)
+{
+struct acpi_dmar *dmar;
+struct acpi_dmar_hardware_unit *drhd;
+struct dmar_device_scope *scope;
+unsigned int size;
+unsigned int ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+
+size = sizeof(*dmar) + sizeof(*drhd) + ioapic_scope_size;
+
+dmar = ctxt->mem_ops.alloc(ctxt, size, 16);
+if ( !dmar )
+return NULL;
+
+memset(dmar, 0, size);
+dmar->header.signature = ACPI_2_0_DMAR_SIGNATURE;
+dmar->header.revision = ACPI_2_0_DMAR_REVISION;
+dmar->header.length = size;
+fixed_strcpy(dmar->header.oem_id, ACPI_OEM_ID);
+fixed_strcpy(dmar->header.oem_table_id, ACPI_OEM_TABLE_ID);
+dmar->header.oem_revision = ACPI_OEM_REVISION;
+dmar->header.creator_id   = ACPI_CREATOR_ID;
+dmar->header.creator_revision = ACPI_CREATOR_REVISION;
+dmar->host_address_width = config->host_addr_width - 1;
+if ( config->iommu_intremap_supported )
+dmar->flags |= ACPI_DMAR_INTR_REMAP;
+if ( !config->iommu_x2apic_supported )
+dmar->flags |= ACPI_DMAR_X2APIC_OPT_OUT;
+
+drhd = (struct acpi_dmar_hardware_unit *)((void*)dmar + sizeof(*dmar));
+drhd->type = ACPI_DMAR_TYPE_HARDWARE_UNIT;
+drhd->length = sizeof(*drhd) + ioapic_scope_size;
+drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
+drhd->pci_segment = 0;
+drhd->base_address = config->iommu_base_addr;
+
+scope = &drhd->scope[0];
+scope->type = ACPI_DMAR_DEVICE_SCOPE_IOAPIC;
+scope->length = ioapic_scope_size;
+scope->enumeration_id = config->ioapic_id;
+scope->bus = config->ioapic_bus;
+scope->path[0] = config->ioapic_devfn;
+
+set_checksum(dmar, offsetof(struct acpi_header, checksum), size);
+return dmar;
+}
+
 static int construct_passthrough_tables(struct acpi_ctxt *ctxt,
 unsigned long *table_ptrs,
 int nr_tables,
diff --git a/tools/libacpi/libacpi.h b/tools/libacpi/libacpi.h
index a2efd23..fdd6a78 100644
--- a/tools/libacpi/libacpi.h
+++ b/tools/libacpi/libacpi.h
@@ -20,6 +20,8 @@
 #ifndef __LIBACPI_H__
 #define __LIBACPI_H__
 
+#include 
+
 #define ACPI_HAS_COM1  (1<<0)
 #define ACPI_HAS_COM2  (1<<1)
 #define ACPI_HAS_LPT1  (1<<2)
@@ -96,8 +98,18 @@ struct acpi_config {
 uint32_t ioapic_base_address;
 uint16_t pci_isa_irq_mask;
 uint8_t ioapic_id;
+
+/* Emulated IOMMU features, location and IOAPIC under the scope of IOMMU */
+bool iommu_intremap_supported;
+bool iommu_x2apic_supported;
+uint8_t host_addr_width;
+uint8_t ioapic_bus;
+uint16_t ioapic_devfn;
+uint64_t iommu_base_addr;
 };
 
+struct acpi_dmar *construct_dmar(struct acpi_ctxt *ctxt,
+ const struct acpi_config *config);
 int acpi_build_tables(struct acpi_ctxt *ctxt, struct acpi_config *config);
 
 #endif /* __LIBACPI_H__ */
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 12/29] x86/vvtd: Add MMIO handler for VVTD

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

This patch adds VVTD MMIO handler to deal with MMIO access.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 
---
 xen/drivers/passthrough/vtd/vvtd.c | 91 ++
 1 file changed, 91 insertions(+)

diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
b/xen/drivers/passthrough/vtd/vvtd.c
index c851ec7..a3002c3 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -47,6 +47,29 @@ struct vvtd {
 struct page_info *regs_page;
 };
 
+/* Setting viommu_verbose enables debugging messages of vIOMMU */
+bool __read_mostly viommu_verbose;
+boolean_runtime_param("viommu_verbose", viommu_verbose);
+
+#ifndef NDEBUG
+#define vvtd_info(fmt...) do {\
+if ( viommu_verbose ) \
+gprintk(XENLOG_G_INFO, ## fmt);   \
+} while(0)
+#define vvtd_debug(fmt...) do {   \
+if ( viommu_verbose && printk_ratelimit() )   \
+printk(XENLOG_G_DEBUG fmt);   \
+} while(0)
+#else
+#define vvtd_info(fmt...) do {} while(0)
+#define vvtd_debug(fmt...) do {} while(0)
+#endif
+
+struct vvtd *domain_vvtd(struct domain *d)
+{
+return (d->viommu) ? d->viommu->priv : NULL;
+}
+
 static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
 {
 vtd->regs->data32[reg/sizeof(uint32_t)] = value;
@@ -68,6 +91,73 @@ static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, 
uint32_t reg)
 return vtd->regs->data64[reg/sizeof(uint64_t)];
 }
 
+static int vvtd_in_range(struct vcpu *v, unsigned long addr)
+{
+struct vvtd *vvtd = domain_vvtd(v->domain);
+
+if ( vvtd )
+return (addr >= vvtd->base_addr) &&
+   (addr < vvtd->base_addr + PAGE_SIZE);
+return 0;
+}
+
+static int vvtd_read(struct vcpu *v, unsigned long addr,
+ unsigned int len, unsigned long *pval)
+{
+struct vvtd *vvtd = domain_vvtd(v->domain);
+unsigned int offset = addr - vvtd->base_addr;
+
+vvtd_info("Read offset %x len %d\n", offset, len);
+
+if ( (len != 4 && len != 8) || (offset & (len - 1)) )
+return X86EMUL_OKAY;
+
+if ( len == 4 )
+*pval = vvtd_get_reg(vvtd, offset);
+else
+*pval = vvtd_get_reg_quad(vvtd, offset);
+
+return X86EMUL_OKAY;
+}
+
+static int vvtd_write(struct vcpu *v, unsigned long addr,
+  unsigned int len, unsigned long val)
+{
+struct vvtd *vvtd = domain_vvtd(v->domain);
+unsigned int offset = addr - vvtd->base_addr;
+
+vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
+
+if ( (len != 4 && len != 8) || (offset & (len - 1)) )
+return X86EMUL_OKAY;
+
+if ( len == 4 )
+{
+switch ( offset )
+{
+case DMAR_IEDATA_REG:
+case DMAR_IEADDR_REG:
+case DMAR_IEUADDR_REG:
+case DMAR_FEDATA_REG:
+case DMAR_FEADDR_REG:
+case DMAR_FEUADDR_REG:
+vvtd_set_reg(vvtd, offset, val);
+break;
+
+default:
+break;
+}
+}
+
+return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vvtd_mmio_ops = {
+.check = vvtd_in_range,
+.read = vvtd_read,
+.write = vvtd_write
+};
+
 static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
 {
 uint64_t cap = cap_set_num_fault_regs(1ULL) |
@@ -109,6 +199,7 @@ static int vvtd_create(struct domain *d, struct viommu 
*viommu)
 vvtd_reset(vvtd, viommu->caps);
 vvtd->base_addr = viommu->base_address;
 vvtd->domain = d;
+register_mmio_handler(d, &vvtd_mmio_ops);
 
 viommu->priv = vvtd;
 
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 10/29] vtd: add and align register definitions

2017-09-22 Thread Lan Tianyu
From: Chao Gao 

No functional changes.

Signed-off-by: Chao Gao 
Signed-off-by: Lan Tianyu 

---
 xen/drivers/passthrough/vtd/iommu.h | 54 +
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index 72c1a2e..d7e433e 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -23,31 +23,39 @@
 #include 
 
 /*
- * Intel IOMMU register specification per version 1.0 public spec.
+ * Intel IOMMU register specification per version 2.4 public spec.
  */
 
-#defineDMAR_VER_REG0x0/* Arch version supported by this IOMMU */
-#defineDMAR_CAP_REG0x8/* Hardware supported capabilities */
-#defineDMAR_ECAP_REG0x10/* Extended capabilities supported */
-#defineDMAR_GCMD_REG0x18/* Global command register */
-#defineDMAR_GSTS_REG0x1c/* Global status register */
-#defineDMAR_RTADDR_REG0x20/* Root entry table */
-#defineDMAR_CCMD_REG0x28/* Context command reg */
-#defineDMAR_FSTS_REG0x34/* Fault Status register */
-#defineDMAR_FECTL_REG0x38/* Fault control register */
-#defineDMAR_FEDATA_REG0x3c/* Fault event interrupt data register */
-#defineDMAR_FEADDR_REG0x40/* Fault event interrupt addr register */
-#defineDMAR_FEUADDR_REG 0x44/* Upper address register */
-#defineDMAR_AFLOG_REG0x58/* Advanced Fault control */
-#defineDMAR_PMEN_REG0x64/* Enable Protected Memory Region */
-#defineDMAR_PLMBASE_REG 0x68/* PMRR Low addr */
-#defineDMAR_PLMLIMIT_REG 0x6c/* PMRR low limit */
-#defineDMAR_PHMBASE_REG 0x70/* pmrr high base addr */
-#defineDMAR_PHMLIMIT_REG 0x78/* pmrr high limit */
-#defineDMAR_IQH_REG0x80/* invalidation queue head */
-#defineDMAR_IQT_REG0x88/* invalidation queue tail */
-#defineDMAR_IQA_REG0x90/* invalidation queue addr */
-#defineDMAR_IRTA_REG   0xB8/* intr remap */
+#define DMAR_VER_REG0x0  /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG0x8  /* Hardware supported capabilities */
+#define DMAR_ECAP_REG   0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG   0x18 /* Global command register */
+#define DMAR_GSTS_REG   0x1c /* Global status register */
+#define DMAR_RTADDR_REG 0x20 /* Root entry table */
+#define DMAR_CCMD_REG   0x28 /* Context command reg */
+#define DMAR_FSTS_REG   0x34 /* Fault Status register */
+#define DMAR_FECTL_REG  0x38 /* Fault control register */
+#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG0x44 /* Upper address register */
+#define DMAR_AFLOG_REG  0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG   0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG   0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG   0x78 /* pmrr high limit */
+#define DMAR_IQH_REG0x80 /* invalidation queue head */
+#define DMAR_IQT_REG0x88 /* invalidation queue tail */
+#define DMAR_IQT_REG_HI 0x8c
+#define DMAR_IQA_REG0x90 /* invalidation queue addr */
+#define DMAR_IQA_REG_HI 0x94
+#define DMAR_ICS_REG0x9c /* Invalidation complete status */
+#define DMAR_IECTL_REG  0xa0 /* Invalidation event control */
+#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data */
+#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address */
+#define DMAR_IEUADDR_REG0xac /* Invalidation event address */
+#define DMAR_IRTA_REG   0xb8 /* Interrupt remapping table addr */
+#define DMAR_IRTA_REG_HI0xbc
 
 #define OFFSET_STRIDE(9)
 #define dmar_readl(dmar, reg) readl((dmar) + (reg))
-- 
1.8.3.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


  1   2   3   4   >