Re: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available

2010-07-21 Thread Daniel P. Berrange
On Tue, Jul 20, 2010 at 04:13:06PM -0700, Chris Wright wrote:
> * Alex Williamson (alex.william...@redhat.com) wrote:
> > When supported by the host kernel, we can use read/write on the
> > PCI sysfs resource file for I/O port regions.  This allows us to
> > avoid raw in/out commands and works with deprivileged guests via
> > libvirt.  For uid 0 callers, we use in/out directly to avoid any
> > compatibility issues.
> 
> won't uid 0 test will fail if libvirt launches qemu with user set to
> root (capabilities still get dropped)?

Yes, if the kernel is doing a CAP_SYS_ADMIN check (or similar), then
testing uid==0 is definitely wrong. You'd need to test have(CAP_SYS_ADMIN)
instead. 

REgards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available

2010-07-20 Thread Chris Wright
* Alex Williamson (alex.william...@redhat.com) wrote:
> When supported by the host kernel, we can use read/write on the
> PCI sysfs resource file for I/O port regions.  This allows us to
> avoid raw in/out commands and works with deprivileged guests via
> libvirt.  For uid 0 callers, we use in/out directly to avoid any
> compatibility issues.

won't uid 0 test will fail if libvirt launches qemu with user set to
root (capabilities still get dropped)?

thanks,
-chris
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] device-assignment: Use PCI I/O port sysfs resource file when available

2010-07-20 Thread Alex Williamson
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions.  This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt.  For uid 0 callers, we use in/out directly to avoid any
compatibility issues.

Signed-off-by: Alex Williamson 
---

 Required kernel patch pending here:
 http://www.spinics.net/lists/linux-pci/msg09389.html

 hw/device-assignment.c |  131 
 hw/device-assignment.h |1 
 2 files changed, 99 insertions(+), 33 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..37c1278 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,28 @@ static uint32_t guest_to_host_ioport(AssignedDevRegion 
*region, uint32_t addr)
 return region->u.r_baseport + (addr - region->e_physbase);
 }
 
+static int assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+  uint32_t addr, int len, uint32_t *val,
+  int write)
+{
+if (dev_region->region->resource_fd == -1)
+return -1;
+
+if (write) {
+if (pwrite(dev_region->region->resource_fd, val, len,
+  (addr - dev_region->e_physbase)) != len) {
+return -1;
+}
+} else {
+if (pread(dev_region->region->resource_fd, val, len,
+  (addr - dev_region->e_physbase)) != len) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
 {
@@ -77,7 +99,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t 
addr,
  r_pio, (int)r_access->e_physbase,
  (unsigned long)r_access->u.r_baseport, value);
 
-outb(value, r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 1) != 0) {
+outb(value, r_pio);
+}
 }
 
 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
@@ -90,7 +114,9 @@ static void assigned_dev_ioport_writew(void *opaque, 
uint32_t addr,
   r_pio, (int)r_access->e_physbase,
  (unsigned long)r_access->u.r_baseport, value);
 
-outw(value, r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 1) != 0) {
+outw(value, r_pio);
+}
 }
 
 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
@@ -103,7 +129,9 @@ static void assigned_dev_ioport_writel(void *opaque, 
uint32_t addr,
  r_pio, (int)r_access->e_physbase,
   (unsigned long)r_access->u.r_baseport, value);
 
-outl(value, r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 1) != 0) {
+outl(value, r_pio);
+}
 }
 
 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
@@ -112,7 +140,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, 
uint32_t addr)
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 uint32_t value;
 
-value = inb(r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 0) != 0) {
+value = inb(r_pio);
+}
 
 DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
@@ -127,7 +157,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, 
uint32_t addr)
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 uint32_t value;
 
-value = inw(r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 0) != 0) {
+value = inw(r_pio);
+}
 
 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
@@ -142,7 +174,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, 
uint32_t addr)
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 uint32_t value;
 
-value = inl(r_pio);
+if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 0) != 0) {
+value = inl(r_pio);
+}
 
 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
@@ -305,7 +339,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int 
region_num,
 DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS 
" region_num=%d \n",
   addr, region->u.r_baseport, type, size, region_num);
 
-if (first_map) {
+if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
 
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +620,46 @@ static int assigned_dev_register_regions(PCIRegion 
*io_regions,
  slow_map ? assigned_dev_iomem_map_slow
   : assigned_dev_iomem_map);
 continue;
+} else {
+/* handle port io regions */
+uint32_t val;
+int ret;
+
+/* Test kernel support for ioport resource read/write.  Old
+ * kerne