Re: [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

2011-10-21 Thread Jan Kiszka
On 2011-10-21 00:02, Michael S. Tsirkin wrote:
 Yes. But this still makes an API for acquiring per-vector resources a 
 requirement.

 Yes, but a different one than current use/unuse.
 
 What's wrong with use/unuse as an API? It's already in place
 and virtio calls it.

Not for that purpose. It remains a useless API in the absence of KVM's
requirements.

 
 And it will be an
 optional one, only for those devices that need to establish irq/eventfd
 channels.

 Jan
 
 Not sure this should be up to the device.

The device provides the fd. At least it acquires and associates it.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

2011-10-21 Thread Michael S. Tsirkin
On Fri, Oct 21, 2011 at 09:09:10AM +0200, Jan Kiszka wrote:
 On 2011-10-21 00:02, Michael S. Tsirkin wrote:
  Yes. But this still makes an API for acquiring per-vector resources a 
  requirement.
 
  Yes, but a different one than current use/unuse.
  
  What's wrong with use/unuse as an API? It's already in place
  and virtio calls it.
 
 Not for that purpose.
 It remains a useless API in the absence of KVM's
 requirements.
 

Sorry, I don't understand. This can acquire whatever resources
necessary. It does not seem to make sense to rip it out
only to add a different one back in.

  
  And it will be an
  optional one, only for those devices that need to establish irq/eventfd
  channels.
 
  Jan
  
  Not sure this should be up to the device.
 
 The device provides the fd. At least it acquires and associates it.
 
 Jan

It would surely be beneficial to be able to have a uniform
API so that devices don't need to be recoded to be moved
in this way.

 -- 
 Siemens AG, Corporate Technology, CT T DE IT 1
 Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Jan Kiszka
Currently, MSI messages can only be injected to in-kernel irqchips by
defining a corresponding IRQ route for each message. This is not only
unhandy if the MSI messages are generated on the fly by user space,
IRQ routes are a limited resource that user space as to manage
carefully.

By providing a direct injection with, we can both avoid using up limited
resources and simplify the necessary steps for user land. The API
already provides a channel (flags) to revoke an injected but not yet
delivered message which will become important for in-kernel MSI-X vector
masking support.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 Documentation/virtual/kvm/api.txt |   23 +++
 include/linux/kvm.h   |   15 +++
 virt/kvm/kvm_main.c   |   18 ++
 3 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 7945b0b..f4c3de3 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1383,6 +1383,29 @@ The following flags are defined:
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+4.59 KVM_SET_MSI
+
+Capability: KVM_CAP_SET_MSI
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_msi (in)
+Returns: 0 on success, -1 on error
+
+Directly inject a MSI message. Only valid with in-kernel irqchip that handles
+MSI messages.
+
+struct kvm_msi {
+   __u32 address_lo;
+   __u32 address_hi;
+   __u32 data;
+   __u32 flags;
+   __u8  pad[16];
+};
+
+The following flags are defined:
+
+#define KVM_MSI_FLAG_RAISE (1  0)
+
 4.62 KVM_CREATE_SPAPR_TCE
 
 Capability: KVM_CAP_SPAPR_TCE
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6884054..83875ed 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -557,6 +557,9 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_HIOR 67
 #define KVM_CAP_PPC_PAPR 68
 #define KVM_CAP_S390_GMAP 71
+#ifdef __KVM_HAVE_MSI
+#define KVM_CAP_SET_MSI 72
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -636,6 +639,16 @@ struct kvm_clock_data {
__u32 pad[9];
 };
 
+#define KVM_MSI_FLAG_RAISE (1  0)
+
+struct kvm_msi {
+   __u32 address_lo;
+   __u32 address_hi;
+   __u32 data;
+   __u32 flags;
+   __u8  pad[16];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -696,6 +709,8 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_TSC_CONTROL */
 #define KVM_SET_TSC_KHZ   _IO(KVMIO,  0xa2)
 #define KVM_GET_TSC_KHZ   _IO(KVMIO,  0xa3)
+/* Available with KVM_CAP_SET_MSI */
+#define KVM_SET_MSI   _IOW(KVMIO,  0xa4, struct kvm_msi)
 
 /*
  * ioctls for vcpu fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d9cfb78..0e3a947 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2058,6 +2058,24 @@ static long kvm_vm_ioctl(struct file *filp,
mutex_unlock(kvm-lock);
break;
 #endif
+#ifdef __KVM_HAVE_MSI
+   case KVM_SET_MSI: {
+   struct kvm_kernel_irq_routing_entry route;
+   struct kvm_msi msi;
+
+   r = -EFAULT;
+   if (copy_from_user(msi, argp, sizeof msi))
+   goto out;
+   route.msi.address_lo = msi.address_lo;
+   route.msi.address_hi = msi.address_hi;
+   route.msi.data = msi.data;
+   r = 0;
+   if (msi.flags  KVM_MSI_FLAG_RAISE)
+   r =  kvm_set_msi(route, kvm,
+KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+   break;
+   }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
-- 
1.7.3.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

2011-10-21 Thread Jan Kiszka
On 2011-10-21 09:54, Michael S. Tsirkin wrote:
 On Fri, Oct 21, 2011 at 09:09:10AM +0200, Jan Kiszka wrote:
 On 2011-10-21 00:02, Michael S. Tsirkin wrote:
 Yes. But this still makes an API for acquiring per-vector resources a 
 requirement.

 Yes, but a different one than current use/unuse.

 What's wrong with use/unuse as an API? It's already in place
 and virtio calls it.

 Not for that purpose.
 It remains a useless API in the absence of KVM's
 requirements.

 
 Sorry, I don't understand. This can acquire whatever resources
 necessary. It does not seem to make sense to rip it out
 only to add a different one back in.
 

 And it will be an
 optional one, only for those devices that need to establish irq/eventfd
 channels.

 Jan

 Not sure this should be up to the device.

 The device provides the fd. At least it acquires and associates it.

 Jan
 
 It would surely be beneficial to be able to have a uniform
 API so that devices don't need to be recoded to be moved
 in this way.

The point is that the current API is useless for devices that do not
have to declare any vector to the core. By forcing them to call into
that API, we solve no current problem automatically. We rather need
associate_vector_with_x (and the reverse). And that only for device that
have different backends than user space models.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Sasha Levin
On Fri, 2011-10-21 at 11:19 +0200, Jan Kiszka wrote:
 Currently, MSI messages can only be injected to in-kernel irqchips by
 defining a corresponding IRQ route for each message. This is not only
 unhandy if the MSI messages are generated on the fly by user space,
 IRQ routes are a limited resource that user space as to manage
 carefully.
 
 By providing a direct injection with, we can both avoid using up limited
 resources and simplify the necessary steps for user land. The API
 already provides a channel (flags) to revoke an injected but not yet
 delivered message which will become important for in-kernel MSI-X vector
 masking support.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  Documentation/virtual/kvm/api.txt |   23 +++
  include/linux/kvm.h   |   15 +++
  virt/kvm/kvm_main.c   |   18 ++
  3 files changed, 56 insertions(+), 0 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 7945b0b..f4c3de3 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -1383,6 +1383,29 @@ The following flags are defined:
  If datamatch flag is set, the event will be signaled only if the written 
 value
  to the registered address is equal to datamatch in struct kvm_ioeventfd.
  
 +4.59 KVM_SET_MSI
 +
 +Capability: KVM_CAP_SET_MSI
 +Architectures: x86 ia64
 +Type: vm ioctl
 +Parameters: struct kvm_msi (in)
 +Returns: 0 on success, -1 on error
 +
 +Directly inject a MSI message. Only valid with in-kernel irqchip that handles
 +MSI messages.
 +
 +struct kvm_msi {
 + __u32 address_lo;
 + __u32 address_hi;
 + __u32 data;
 + __u32 flags;
 + __u8  pad[16];
 +};
 +
 +The following flags are defined:
 +
 +#define KVM_MSI_FLAG_RAISE (1  0)
 +
  4.62 KVM_CREATE_SPAPR_TCE
  
  Capability: KVM_CAP_SPAPR_TCE
 diff --git a/include/linux/kvm.h b/include/linux/kvm.h
 index 6884054..83875ed 100644
 --- a/include/linux/kvm.h
 +++ b/include/linux/kvm.h
 @@ -557,6 +557,9 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_PPC_HIOR 67
  #define KVM_CAP_PPC_PAPR 68
  #define KVM_CAP_S390_GMAP 71
 +#ifdef __KVM_HAVE_MSI
 +#define KVM_CAP_SET_MSI 72
 +#endif
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
 @@ -636,6 +639,16 @@ struct kvm_clock_data {
   __u32 pad[9];
  };
  
 +#define KVM_MSI_FLAG_RAISE (1  0)
 +
 +struct kvm_msi {
 + __u32 address_lo;
 + __u32 address_hi;
 + __u32 data;
 + __u32 flags;
 + __u8  pad[16];
 +};
 +

How about defining it as:

struct kvm_msi {
struct msi_msg msi;
__u32 flags;
__u8 pad[16];
};

It would allow keeping everything in a msi_msg all the way from
userspace up to kvm_set_msi()

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Convert one .img disk created with sparse=true to no sparse

2011-10-21 Thread Stephane CHAZELAS
2011-10-21, 07:28(+10), Simon Wilson:
[...]
 If working with a NON-sparse VM img (i.e. originally created with  
 virt-install --nonsparse), does a cp without --sparse=never retain the  
 non-sparse nature of the file, or does it have to be specified?
[...]

See the man page for your cp command. The GNU implementation
of cp tries to replicate the sparseness of the files by
default, so if the source file wasn't sparse, the destination
shouldn't be either.

If in doubt, you can always use dd/cat/pv to avoid the extra
fancy hole punching that cp might or might not do.

-- 
Stephane

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] kvm tools: Allow piping debug output to file descriptor

2011-10-21 Thread Sasha Levin
This patch makes debug output go to a 'debug_fd' instead of stdout.

Doing so allows us to send the output to a different console when
required.

This patch also changes the behaviour of 'kvm debug' to show the debug
output in the console that executed the debug command instead of in the
console of the guest.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/builtin-debug.c   |   13 ++-
 tools/kvm/builtin-run.c |8 +++-
 tools/kvm/include/kvm/kvm-cpu.h |2 +
 tools/kvm/kvm-cpu.c |   78 ++
 4 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/tools/kvm/builtin-debug.c b/tools/kvm/builtin-debug.c
index f744a7e..045dc2c 100644
--- a/tools/kvm/builtin-debug.c
+++ b/tools/kvm/builtin-debug.c
@@ -4,11 +4,14 @@
 #include kvm/kvm.h
 #include kvm/parse-options.h
 #include kvm/kvm-ipc.h
+#include kvm/read-write.h
 
 #include stdio.h
 #include string.h
 #include signal.h
 
+#define BUFFER_SIZE 100
+
 static bool all;
 static int instance;
 static const char *instance_name;
@@ -47,13 +50,21 @@ void kvm_debug_help(void)
 
 static int do_debug(const char *name, int sock)
 {
+   char buff[BUFFER_SIZE];
struct debug_cmd cmd = {KVM_IPC_DEBUG, 0};
int r;
 
-   r = write(sock, cmd, sizeof(cmd));
+   r = xwrite(sock, cmd, sizeof(cmd));
if (r  0)
return r;
 
+   do {
+   r = xread(sock, buff, BUFFER_SIZE);
+   if (r  0)
+   return 0;
+   printf(%.*s, r, buff);
+   } while (r  0);
+
return 0;
 }
 
diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index bbe5a35..2792650 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -456,11 +456,12 @@ static int printout_done;
 static void handle_sigusr1(int sig)
 {
struct kvm_cpu *cpu = current_kvm_cpu;
+   int fd = kvm_cpu__get_debug_fd();
 
if (!cpu)
return;
 
-   printf(\n #\n # vCPU #%ld's dump:\n #\n, cpu-cpu_id);
+   dprintf(fd, \n #\n # vCPU #%ld's dump:\n #\n, cpu-cpu_id);
kvm_cpu__show_registers(cpu);
kvm_cpu__show_code(cpu);
kvm_cpu__show_page_tables(cpu);
@@ -496,6 +497,8 @@ static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
continue;
 
printout_done = 0;
+
+   kvm_cpu__set_debug_fd(fd);
pthread_kill(cpu-thread, SIGUSR1);
/*
 * Wait for the vCPU to dump state before signalling
@@ -506,6 +509,8 @@ static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
mb();
}
 
+   close(fd);
+
serial8250__inject_sysrq(kvm);
 }
 
@@ -539,6 +544,7 @@ panic_kvm:
fprintf(stderr, KVM exit code: 0x%Lu\n,
current_kvm_cpu-kvm_run-hw.hardware_exit_reason);
 
+   kvm_cpu__set_debug_fd(STDOUT_FILENO);
kvm_cpu__show_registers(current_kvm_cpu);
kvm_cpu__show_code(current_kvm_cpu);
kvm_cpu__show_page_tables(current_kvm_cpu);
diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
index 95f3f9d..01540ac 100644
--- a/tools/kvm/include/kvm/kvm-cpu.h
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -37,6 +37,8 @@ void kvm_cpu__run(struct kvm_cpu *vcpu);
 void kvm_cpu__reboot(void);
 int kvm_cpu__start(struct kvm_cpu *cpu);
 
+int kvm_cpu__get_debug_fd(void);
+void kvm_cpu__set_debug_fd(int fd);
 void kvm_cpu__show_code(struct kvm_cpu *vcpu);
 void kvm_cpu__show_registers(struct kvm_cpu *vcpu);
 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu);
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index d7ae5ce..0ad6f3b 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -19,6 +19,18 @@
 extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
 extern __thread struct kvm_cpu *current_kvm_cpu;
 
+static int debug_fd;
+
+void kvm_cpu__set_debug_fd(int fd)
+{
+   debug_fd = fd;
+}
+
+int kvm_cpu__get_debug_fd(void)
+{
+   return debug_fd;
+}
+
 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
 {
return vcpu-sregs.cr0  0x01;
@@ -216,13 +228,13 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 
 static void print_dtable(const char *name, struct kvm_dtable *dtable)
 {
-   printf( %s %016llx  %08hx\n,
+   dprintf(debug_fd,  %s %016llx  %08hx\n,
name, (u64) dtable-base, (u16) dtable-limit);
 }
 
 static void print_segment(const char *name, struct kvm_segment *seg)
 {
-   printf( %s   %04hx  %016llx  %08x  %02hhx%x %x   %x  %x %x 
%x %x\n,
+   dprintf(debug_fd,  %s   %04hx  %016llx  %08x  %02hhx%x %x  
 %x  %x %x %x %x\n,
name, (u16) seg-selector, (u64) seg-base, (u32) seg-limit,
(u8) seg-type, seg-present, seg-dpl, seg-db, seg-s, 
seg-l, seg-g, seg-avl);
 }
@@ -254,14 +266,14 @@ void kvm_cpu__show_registers(struct 

Re: [RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Michael S. Tsirkin
On Fri, Oct 21, 2011 at 11:19:19AM +0200, Jan Kiszka wrote:
 Currently, MSI messages can only be injected to in-kernel irqchips by
 defining a corresponding IRQ route for each message. This is not only
 unhandy if the MSI messages are generated on the fly by user space,
 IRQ routes are a limited resource that user space as to manage
 carefully.
 
 By providing a direct injection with, we can both avoid using up limited
 resources and simplify the necessary steps for user land. The API
 already provides a channel (flags) to revoke an injected but not yet
 delivered message which will become important for in-kernel MSI-X vector
 masking support.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

I would love to see how you envision extending this to add the masking
support at least at the API level, not necessarily the supporting code.

It would seem hard to use flags field for that since MSIX mask is per
device per vector, not per message.
Which gets us back to resource per vector which userspace has to manage
...

interrupt remapping is also per device, so it isn't any easier
with this API.

 ---
  Documentation/virtual/kvm/api.txt |   23 +++
  include/linux/kvm.h   |   15 +++
  virt/kvm/kvm_main.c   |   18 ++
  3 files changed, 56 insertions(+), 0 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 7945b0b..f4c3de3 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -1383,6 +1383,29 @@ The following flags are defined:
  If datamatch flag is set, the event will be signaled only if the written 
 value
  to the registered address is equal to datamatch in struct kvm_ioeventfd.
  
 +4.59 KVM_SET_MSI
 +
 +Capability: KVM_CAP_SET_MSI
 +Architectures: x86 ia64
 +Type: vm ioctl
 +Parameters: struct kvm_msi (in)
 +Returns: 0 on success, -1 on error
 +
 +Directly inject a MSI message. Only valid with in-kernel irqchip that handles
 +MSI messages.
 +
 +struct kvm_msi {
 + __u32 address_lo;
 + __u32 address_hi;
 + __u32 data;
 + __u32 flags;
 + __u8  pad[16];
 +};
 +
 +The following flags are defined:
 +
 +#define KVM_MSI_FLAG_RAISE (1  0)
 +
  4.62 KVM_CREATE_SPAPR_TCE
  
  Capability: KVM_CAP_SPAPR_TCE
 diff --git a/include/linux/kvm.h b/include/linux/kvm.h
 index 6884054..83875ed 100644
 --- a/include/linux/kvm.h
 +++ b/include/linux/kvm.h
 @@ -557,6 +557,9 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_PPC_HIOR 67
  #define KVM_CAP_PPC_PAPR 68
  #define KVM_CAP_S390_GMAP 71
 +#ifdef __KVM_HAVE_MSI
 +#define KVM_CAP_SET_MSI 72
 +#endif
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
 @@ -636,6 +639,16 @@ struct kvm_clock_data {
   __u32 pad[9];
  };
  
 +#define KVM_MSI_FLAG_RAISE (1  0)
 +
 +struct kvm_msi {
 + __u32 address_lo;
 + __u32 address_hi;
 + __u32 data;
 + __u32 flags;
 + __u8  pad[16];
 +};
 +
  /*
   * ioctls for VM fds
   */
 @@ -696,6 +709,8 @@ struct kvm_clock_data {
  /* Available with KVM_CAP_TSC_CONTROL */
  #define KVM_SET_TSC_KHZ   _IO(KVMIO,  0xa2)
  #define KVM_GET_TSC_KHZ   _IO(KVMIO,  0xa3)
 +/* Available with KVM_CAP_SET_MSI */
 +#define KVM_SET_MSI   _IOW(KVMIO,  0xa4, struct kvm_msi)
  
  /*
   * ioctls for vcpu fds
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index d9cfb78..0e3a947 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2058,6 +2058,24 @@ static long kvm_vm_ioctl(struct file *filp,
   mutex_unlock(kvm-lock);
   break;
  #endif
 +#ifdef __KVM_HAVE_MSI
 + case KVM_SET_MSI: {
 + struct kvm_kernel_irq_routing_entry route;
 + struct kvm_msi msi;
 +
 + r = -EFAULT;
 + if (copy_from_user(msi, argp, sizeof msi))
 + goto out;
 + route.msi.address_lo = msi.address_lo;
 + route.msi.address_hi = msi.address_hi;
 + route.msi.data = msi.data;
 + r = 0;
 + if (msi.flags  KVM_MSI_FLAG_RAISE)
 + r =  kvm_set_msi(route, kvm,
 +  KVM_USERSPACE_IRQ_SOURCE_ID, 1);
 + break;
 + }
 +#endif
   default:
   r = kvm_arch_vm_ioctl(filp, ioctl, arg);
   if (r == -ENOTTY)
 -- 
 1.7.3.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Convert one .img disk created with sparse=true to no sparse

2011-10-21 Thread Gonzalo Marcote Peña
Thank you Stefan.
That's exactly what i needed.
I worked perfectly.


2011/10/20 Simon Wilson si...@simonandkate.net:
 - Message from Stefan Hajnoczi stefa...@gmail.com -
   Date: Thu, 20 Oct 2011 07:43:45 -0700
   From: Stefan Hajnoczi stefa...@gmail.com
 Subject: Re: Convert one .img disk created with sparse=true to no sparse
     To: Gonzalo Marcote Peña gonzalomarc...@gmail.com
     Cc: kvm@vger.kernel.org


 On Thu, Oct 20, 2011 at 4:21 AM, Gonzalo Marcote Peña
 gonzalomarc...@gmail.com wrote:

 Hi.
 I have one guest that I created with virt-install comand option
 'sparse=true'.
 As i want to use now this guest for I/O tasks (DDBB) and i want to
 improve performance, I want to convert the disk.img from sparse to no
 sparse (obviusly without format it).
 How can I do this task without loosinf the disk image data?.

 If you are using a raw image file:
 $ cp --sparse=never old-sparse.img new-allocated.img

 This copies the data into the new file and does not try to make zero
 regions sparse.  You can then replace the old file with the new file.

 Stefan
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


 - End message from Stefan Hajnoczi stefa...@gmail.com -

 If working with a NON-sparse VM img (i.e. originally created with
 virt-install --nonsparse), does a cp without --sparse=never retain the
 non-sparse nature of the file, or does it have to be specified?

 Simon


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm tools: Simplify msi message handling

2011-10-21 Thread Sasha Levin
This patch simplifies passing around msi messages by using
'struct kvm_irq_routing_msi' for storing of msi messages instead
of passing all msi parameters around.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/hw/pci-shmem.c|5 +
 tools/kvm/include/kvm/irq.h |3 ++-
 tools/kvm/include/kvm/pci.h |6 ++
 tools/kvm/irq.c |6 ++
 tools/kvm/virtio/pci.c  |   10 ++
 5 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/tools/kvm/hw/pci-shmem.c b/tools/kvm/hw/pci-shmem.c
index 2907a66..780a377 100644
--- a/tools/kvm/hw/pci-shmem.c
+++ b/tools/kvm/hw/pci-shmem.c
@@ -124,10 +124,7 @@ int pci_shmem__get_local_irqfd(struct kvm *kvm)
return fd;
 
if (pci_shmem_pci_device.msix.ctrl  PCI_MSIX_FLAGS_ENABLE) {
-   gsi = irq__add_msix_route(kvm,
- msix_table[0].low,
- msix_table[0].high,
- msix_table[0].data);
+   gsi = irq__add_msix_route(kvm, msix_table[0].msg);
} else {
gsi = pci_shmem_pci_device.irq_line;
}
diff --git a/tools/kvm/include/kvm/irq.h b/tools/kvm/include/kvm/irq.h
index 401bee9..3df795d 100644
--- a/tools/kvm/include/kvm/irq.h
+++ b/tools/kvm/include/kvm/irq.h
@@ -4,6 +4,7 @@
 #include linux/types.h
 #include linux/rbtree.h
 #include linux/list.h
+#include linux/kvm.h
 
 struct kvm;
 
@@ -24,6 +25,6 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line);
 struct rb_node *irq__get_pci_tree(void);
 
 void irq__init(struct kvm *kvm);
-int irq__add_msix_route(struct kvm *kvm, u32 low, u32 high, u32 data);
+int irq__add_msix_route(struct kvm *kvm, struct kvm_irq_routing_msi *msg);
 
 #endif
diff --git a/tools/kvm/include/kvm/pci.h b/tools/kvm/include/kvm/pci.h
index 5ee8005..61753a0 100644
--- a/tools/kvm/include/kvm/pci.h
+++ b/tools/kvm/include/kvm/pci.h
@@ -2,7 +2,7 @@
 #define KVM__PCI_H
 
 #include linux/types.h
-
+#include linux/kvm.h
 #include linux/pci_regs.h
 
 /*
@@ -26,9 +26,7 @@ struct pci_config_address {
 };
 
 struct msix_table {
-   u32 low;
-   u32 high;
-   u32 data;
+   struct kvm_irq_routing_msi msg;
u32 ctrl;
 };
 
diff --git a/tools/kvm/irq.c b/tools/kvm/irq.c
index e35bf18..f1002d8 100644
--- a/tools/kvm/irq.c
+++ b/tools/kvm/irq.c
@@ -167,7 +167,7 @@ void irq__init(struct kvm *kvm)
die(Failed setting GSI routes);
 }
 
-int irq__add_msix_route(struct kvm *kvm, u32 low, u32 high, u32 data)
+int irq__add_msix_route(struct kvm *kvm, struct kvm_irq_routing_msi *msg)
 {
int r;
 
@@ -175,9 +175,7 @@ int irq__add_msix_route(struct kvm *kvm, u32 low, u32 high, 
u32 data)
(struct kvm_irq_routing_entry) {
.gsi = gsi,
.type = KVM_IRQ_ROUTING_MSI,
-   .u.msi.address_lo = low,
-   .u.msi.address_hi = high,
-   .u.msi.data = data,
+   .u.msi = *msg,
};
 
r = ioctl(kvm-vm_fd, KVM_SET_GSI_ROUTING, irq_routing);
diff --git a/tools/kvm/virtio/pci.c b/tools/kvm/virtio/pci.c
index f01851b..73d55a9 100644
--- a/tools/kvm/virtio/pci.c
+++ b/tools/kvm/virtio/pci.c
@@ -126,20 +126,14 @@ static bool virtio_pci__specific_io_out(struct kvm *kvm, 
struct virtio_pci *vpci
case VIRTIO_MSI_CONFIG_VECTOR:
vec = vpci-config_vector = ioport__read16(data);
 
-   gsi = irq__add_msix_route(kvm,
- vpci-msix_table[vec].low,
- vpci-msix_table[vec].high,
- vpci-msix_table[vec].data);
+   gsi = irq__add_msix_route(kvm, 
vpci-msix_table[vec].msg);
 
vpci-config_gsi = gsi;
break;
case VIRTIO_MSI_QUEUE_VECTOR: {
vec = vpci-vq_vector[vpci-queue_selector] = 
ioport__read16(data);
 
-   gsi = irq__add_msix_route(kvm,
- vpci-msix_table[vec].low,
- vpci-msix_table[vec].high,
- vpci-msix_table[vec].data);
+   gsi = irq__add_msix_route(kvm, 
vpci-msix_table[vec].msg);
vpci-gsis[vpci-queue_selector] = gsi;
break;
}
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Jan Kiszka
On 2011-10-21 13:06, Michael S. Tsirkin wrote:
 On Fri, Oct 21, 2011 at 11:19:19AM +0200, Jan Kiszka wrote:
 Currently, MSI messages can only be injected to in-kernel irqchips by
 defining a corresponding IRQ route for each message. This is not only
 unhandy if the MSI messages are generated on the fly by user space,
 IRQ routes are a limited resource that user space as to manage
 carefully.

 By providing a direct injection with, we can both avoid using up limited
 resources and simplify the necessary steps for user land. The API
 already provides a channel (flags) to revoke an injected but not yet
 delivered message which will become important for in-kernel MSI-X vector
 masking support.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 
 I would love to see how you envision extending this to add the masking
 support at least at the API level, not necessarily the supporting code.
 
 It would seem hard to use flags field for that since MSIX mask is per
 device per vector, not per message.
 Which gets us back to resource per vector which userspace has to manage
 ...
 
 interrupt remapping is also per device, so it isn't any easier
 with this API.

Yes, we will need an additional field to associate the message with its
source device. Could be a PCI address or a handle (like the one assigned
devices get) returned on MSI-X kernel region setup. We will need a flag
to declare that address/handle valid, also to tell apart platform MSI
messages (e.g. coming from HPET on x86). I see no obstacles ATM that
prevent doing that on top of this API, do you?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

2011-10-21 Thread Michael S. Tsirkin
On Fri, Oct 21, 2011 at 11:27:48AM +0200, Jan Kiszka wrote:
 On 2011-10-21 09:54, Michael S. Tsirkin wrote:
  On Fri, Oct 21, 2011 at 09:09:10AM +0200, Jan Kiszka wrote:
  On 2011-10-21 00:02, Michael S. Tsirkin wrote:
  Yes. But this still makes an API for acquiring per-vector resources a 
  requirement.
 
  Yes, but a different one than current use/unuse.
 
  What's wrong with use/unuse as an API? It's already in place
  and virtio calls it.
 
  Not for that purpose.
  It remains a useless API in the absence of KVM's
  requirements.
 
  
  Sorry, I don't understand. This can acquire whatever resources
  necessary. It does not seem to make sense to rip it out
  only to add a different one back in.
  
 
  And it will be an
  optional one, only for those devices that need to establish irq/eventfd
  channels.
 
  Jan
 
  Not sure this should be up to the device.
 
  The device provides the fd. At least it acquires and associates it.
 
  Jan
  
  It would surely be beneficial to be able to have a uniform
  API so that devices don't need to be recoded to be moved
  in this way.
 
 The point is that the current API is useless for devices that do not
 have to declare any vector to the core.

Don't assigned devices want this as well?
They handle 0-address vectors specially, and
this hack absolutely doesn't belong in pci core ...

 By forcing them to call into
 that API, we solve no current problem automatically. We rather need
 associate_vector_with_x (and the reverse). And that only for device that
 have different backends than user space models.
 
 Jan

I'll need to think about this, would prefer this series not
to get blocked on this issue. We more or less agreed
to add _use_all/unuse_all for now?

 -- 
 Siemens AG, Corporate Technology, CT T DE IT 1
 Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Michael S. Tsirkin
On Fri, Oct 21, 2011 at 01:51:15PM +0200, Jan Kiszka wrote:
 On 2011-10-21 13:06, Michael S. Tsirkin wrote:
  On Fri, Oct 21, 2011 at 11:19:19AM +0200, Jan Kiszka wrote:
  Currently, MSI messages can only be injected to in-kernel irqchips by
  defining a corresponding IRQ route for each message. This is not only
  unhandy if the MSI messages are generated on the fly by user space,
  IRQ routes are a limited resource that user space as to manage
  carefully.
 
  By providing a direct injection with, we can both avoid using up limited
  resources and simplify the necessary steps for user land. The API
  already provides a channel (flags) to revoke an injected but not yet
  delivered message which will become important for in-kernel MSI-X vector
  masking support.
 
  Signed-off-by: Jan Kiszka jan.kis...@siemens.com
  
  I would love to see how you envision extending this to add the masking
  support at least at the API level, not necessarily the supporting code.
  
  It would seem hard to use flags field for that since MSIX mask is per
  device per vector, not per message.
  Which gets us back to resource per vector which userspace has to manage
  ...
  
  interrupt remapping is also per device, so it isn't any easier
  with this API.
 
 Yes, we will need an additional field to associate the message with its
 source device. Could be a PCI address or a handle (like the one assigned
 devices get) returned on MSI-X kernel region setup. We will need a flag
 to declare that address/handle valid, also to tell apart platform MSI
 messages (e.g. coming from HPET on x86).

I have not thought about remapping a lot yet:
HPET interrupts are not subject to remapping?

 I see no obstacles ATM that
 prevent doing that on top of this API, do you?
 
 Jan

For masking, I think I do. We need to maintain the pending bit
and the io notifiers in kernel, per vector.
An MSI injected with just an address/data pair, without
vector/device info, can't be masked properly.

We get back to maintaining some handle per vector, right?

 -- 
 Siemens AG, Corporate Technology, CT T DE IT 1
 Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[no subject]

2011-10-21 Thread Tobias Daub

subscribe kvm

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] KVM: Introduce direct MSI message injection for in-kernel irqchips

2011-10-21 Thread Jan Kiszka
On 2011-10-21 14:04, Michael S. Tsirkin wrote:
 On Fri, Oct 21, 2011 at 01:51:15PM +0200, Jan Kiszka wrote:
 On 2011-10-21 13:06, Michael S. Tsirkin wrote:
 On Fri, Oct 21, 2011 at 11:19:19AM +0200, Jan Kiszka wrote:
 Currently, MSI messages can only be injected to in-kernel irqchips by
 defining a corresponding IRQ route for each message. This is not only
 unhandy if the MSI messages are generated on the fly by user space,
 IRQ routes are a limited resource that user space as to manage
 carefully.

 By providing a direct injection with, we can both avoid using up limited
 resources and simplify the necessary steps for user land. The API
 already provides a channel (flags) to revoke an injected but not yet
 delivered message which will become important for in-kernel MSI-X vector
 masking support.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

 I would love to see how you envision extending this to add the masking
 support at least at the API level, not necessarily the supporting code.

 It would seem hard to use flags field for that since MSIX mask is per
 device per vector, not per message.
 Which gets us back to resource per vector which userspace has to manage
 ...

 interrupt remapping is also per device, so it isn't any easier
 with this API.

 Yes, we will need an additional field to associate the message with its
 source device. Could be a PCI address or a handle (like the one assigned
 devices get) returned on MSI-X kernel region setup. We will need a flag
 to declare that address/handle valid, also to tell apart platform MSI
 messages (e.g. coming from HPET on x86).
 
 I have not thought about remapping a lot yet:
 HPET interrupts are not subject to remapping?

Looks it is, at least on VT-d: The related VT-d document knows two
non-PCI source IDs, namely legacy pin interrupts and other MSIs. So we
may want a more generic source ID that, for MSI-X in-kernel masking, can
then be associated with a device vector for which we accelerate mask
management.

 
 I see no obstacles ATM that
 prevent doing that on top of this API, do you?

 Jan
 
 For masking, I think I do. We need to maintain the pending bit
 and the io notifiers in kernel, per vector.
 An MSI injected with just an address/data pair, without
 vector/device info, can't be masked properly.
 
 We get back to maintaining some handle per vector, right?

First of all, the common case for in-kernel MSI-X mask management will
be MSI sources that are _not_ injected as address-data pair from user
space but come from in-kernel sources (irqfd or host IRQs, ie. assigned
devices). In contrast, this API here is targeting MSI messages generated
in the hypervisor process (ie. current QEMU device emulation).

Still, the new interface should allow for injecting the other vectors as
well without requiring additional coordination of an in-kernel MSI-X
page vs. user space's view on it. For that reason we need a per vector
handle for that special case. But that will naturally derive from
defining a generic MSI-X in-kernel mask management API. You will have to
specify which device shall be accelerated and how many vectors it has
(at maximum). So a directly injected MSI message for those devices will
have to specify that source tuple (device, vector), but only in that
special case.

Maybe I will sit down now and create a draft for a MSI-X mask
acceleration API. That may help feeling better about this proposal. :)

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 0/2] KVM: remove host and guest pv mmu support

2011-10-21 Thread Marcelo Tosatti
On Thu, Oct 20, 2011 at 05:44:23PM -0700, Chris Wright wrote:
 This feature hasn't been in use for some years now.  The host side bits
 are deprecated for almost a year.  The guest side would only get used
 on old hosts, and it's slower than shadow or hw assisted paging.
 
 Time to remove it.
 
  Documentation/feature-removal-schedule.txt |9 --
  arch/x86/include/asm/kvm_host.h|   13 --
  arch/x86/kernel/kvm.c  |  181 
 
  arch/x86/kvm/mmu.c |  135 -
  arch/x86/kvm/x86.c |   12 --
  5 files changed, 0 insertions(+), 350 deletions(-)

Looks good to me. 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 0/4] Support sending gratuitous by guest

2011-10-21 Thread Jason Wang
We only track primary mac address in qemu and send rarp packets after
migration to notify the switch to update its mac address table. This
may not works when guest have complicated network configurations such
as tagged vlan or ipv6, those connection may lost or stall after
migration.

One method to handle them is snooping the network traffic in qemu and
recording use of mac, but this method would hurt performance and is
impossible for network backend such as vhost.

So in order to solve this issue, the best method is to let guest
instead of qemu to send gratuitous packet. This series first add a
model specific fucntion which can let nic model to implement its own
announce function and then implement a virtio-net specific function to
let guest send the gratitous packet.

Only basic test were done.

Comments are welcomed.

Thanks

---

Jason Wang (4):
  announce self after vm start
  net: export announce_self_create()
  net: model specific announcing support
  virtio-net: notify guest to annouce itself


 hw/virtio-net.c |   20 +++-
 hw/virtio-net.h |2 ++
 migration.c |1 -
 net.c   |   31 +++
 net.h   |3 +++
 savevm.c|   40 +---
 vl.c|1 +
 7 files changed, 61 insertions(+), 37 deletions(-)

-- 
Jason Wang
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 1/4] announce self after vm start

2011-10-21 Thread Jason Wang
We send gratituous packets to let switch to update its mac address
table, this is only done after migration currently because guest may
move to the host with another port connect to switch.

Unfortunately this kind of notification is also needed for continue a
stopped vm as the mac address table entry may not existed because of
aging. This patch solve this by call qemu_announce_self() in
vm_start() instead of in process_incoming_migration(). Through this,
gratituous packets were sent each time when vm starts.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 migration.c |1 -
 vl.c|1 +
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/migration.c b/migration.c
index 77a51ad..3326b02 100644
--- a/migration.c
+++ b/migration.c
@@ -67,7 +67,6 @@ void process_incoming_migration(QEMUFile *f)
 fprintf(stderr, load of migration failed\n);
 exit(0);
 }
-qemu_announce_self();
 DPRINTF(successfully loaded vm state\n);
 
 if (autostart) {
diff --git a/vl.c b/vl.c
index dbf7778..e4408e0 100644
--- a/vl.c
+++ b/vl.c
@@ -1262,6 +1262,7 @@ void vm_start(void)
 vm_state_notify(1, RUN_STATE_RUNNING);
 resume_all_vcpus();
 monitor_protocol_event(QEVENT_RESUME, NULL);
+qemu_announce_self();
 }
 }
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 2/4] net: export announce_self_create()

2011-10-21 Thread Jason Wang
Export and move announce_self_create() to net.c in order to be used by model
specific announcing function.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 net.c|   31 +++
 net.h|1 +
 savevm.c |   32 
 3 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/net.c b/net.c
index d05930c..516ff9e 100644
--- a/net.c
+++ b/net.c
@@ -42,6 +42,37 @@ static QTAILQ_HEAD(, VLANClientState) non_vlan_clients;
 
 int default_net = 1;
 
+#ifndef ETH_P_RARP
+#define ETH_P_RARP 0x8035
+#endif
+#define ARP_HTYPE_ETH 0x0001
+#define ARP_PTYPE_IP 0x0800
+#define ARP_OP_REQUEST_REV 0x3
+
+int announce_self_create(uint8_t *buf, uint8_t *mac_addr)
+{
+/* Ethernet header. */
+memset(buf, 0xff, 6); /* destination MAC addr */
+memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
+*(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
+
+/* RARP header. */
+*(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
+*(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
+*(buf + 18) = 6; /* hardware addr length (ethernet) */
+*(buf + 19) = 4; /* protocol addr length (IPv4) */
+*(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
+memcpy(buf + 22, mac_addr, 6); /* source hw addr */
+memset(buf + 28, 0x00, 4); /* source protocol addr */
+memcpy(buf + 32, mac_addr, 6); /* target hw addr */
+memset(buf + 38, 0x00, 4); /* target protocol addr */
+
+/* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
+memset(buf + 42, 0x00, 18);
+
+return 60; /* len (FCS will be added by hardware) */
+}
+
 /***/
 /* network device redirectors */
 
diff --git a/net.h b/net.h
index 9f633f8..4943d4b 100644
--- a/net.h
+++ b/net.h
@@ -178,5 +178,6 @@ int do_netdev_del(Monitor *mon, const QDict *qdict, QObject 
**ret_data);
 void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd);
 
 int net_handle_fd_param(Monitor *mon, const char *param);
+int announce_self_create(uint8_t *buf, uint8_t *mac_addr);
 
 #endif
diff --git a/savevm.c b/savevm.c
index bf4d0e7..8293ee6 100644
--- a/savevm.c
+++ b/savevm.c
@@ -85,38 +85,6 @@
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
-#ifndef ETH_P_RARP
-#define ETH_P_RARP 0x8035
-#endif
-#define ARP_HTYPE_ETH 0x0001
-#define ARP_PTYPE_IP 0x0800
-#define ARP_OP_REQUEST_REV 0x3
-
-static int announce_self_create(uint8_t *buf,
-   uint8_t *mac_addr)
-{
-/* Ethernet header. */
-memset(buf, 0xff, 6); /* destination MAC addr */
-memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
-*(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
-
-/* RARP header. */
-*(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
-*(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
-*(buf + 18) = 6; /* hardware addr length (ethernet) */
-*(buf + 19) = 4; /* protocol addr length (IPv4) */
-*(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
-memcpy(buf + 22, mac_addr, 6); /* source hw addr */
-memset(buf + 28, 0x00, 4); /* source protocol addr */
-memcpy(buf + 32, mac_addr, 6); /* target hw addr */
-memset(buf + 38, 0x00, 4); /* target protocol addr */
-
-/* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
-memset(buf + 42, 0x00, 18);
-
-return 60; /* len (FCS will be added by hardware) */
-}
-
 static void qemu_announce_self_iter(NICState *nic, void *opaque)
 {
 uint8_t buf[60];

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 3/4] net: model specific announcing support

2011-10-21 Thread Jason Wang
This patch introduce a function pointer in NetClientInfo which is
called during self announcement to do the model specific announcement
such as sending gratuitous packet. Previous method is kept when model
specific announcing fails or without it.

The first user would be virtio-net.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 net.h|2 ++
 savevm.c |8 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net.h b/net.h
index 4943d4b..1845f01 100644
--- a/net.h
+++ b/net.h
@@ -46,6 +46,7 @@ typedef ssize_t (NetReceive)(VLANClientState *, const uint8_t 
*, size_t);
 typedef ssize_t (NetReceiveIOV)(VLANClientState *, const struct iovec *, int);
 typedef void (NetCleanup) (VLANClientState *);
 typedef void (LinkStatusChanged)(VLANClientState *);
+typedef int (NetAnnounce)(VLANClientState *);
 
 typedef struct NetClientInfo {
 net_client_type type;
@@ -57,6 +58,7 @@ typedef struct NetClientInfo {
 NetCleanup *cleanup;
 LinkStatusChanged *link_status_changed;
 NetPoll *poll;
+NetAnnounce *announce;
 } NetClientInfo;
 
 struct VLANClientState {
diff --git a/savevm.c b/savevm.c
index 8293ee6..de6a01a 100644
--- a/savevm.c
+++ b/savevm.c
@@ -89,10 +89,12 @@ static void qemu_announce_self_iter(NICState *nic, void 
*opaque)
 {
 uint8_t buf[60];
 int len;
+NetAnnounce *func = nic-nc.info-announce;
 
-len = announce_self_create(buf, nic-conf-macaddr.a);
-
-qemu_send_packet_raw(nic-nc, buf, len);
+if (func == NULL || func(nic-nc) != 0) {
+len = announce_self_create(buf, nic-conf-macaddr.a);
+qemu_send_packet_raw(nic-nc, buf, len);
+}
 }
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 4/4] virtio-net: notify guest to annouce itself

2011-10-21 Thread Jason Wang
It's hard to track all mac address and its usage (vlan, bondings,
ipv6) in qemu to send gratituous packet in qemu side, so the better
choice is let guest do it.

The patch introduces a new rw config status bit of virtio-net,
VIRTIO_NET_S_ANNOUNCE which is used to notify guest to announce itself
( such as sending gratituous packets ) through config update
interrupt. When gust have done the annoucement, it should clear that
bit.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 hw/virtio-net.c |   20 +++-
 hw/virtio-net.h |2 ++
 2 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 8c2f460..7f844e7 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -95,6 +95,10 @@ static void virtio_net_set_config(VirtIODevice *vdev, const 
uint8_t *config)
 memcpy(n-mac, netcfg.mac, ETH_ALEN);
 qemu_format_nic_info_str(n-nic-nc, n-mac);
 }
+
+if (memcmp(netcfg.status, n-status, sizeof(n-status))) {
+memcpy(n-status, netcfg.status, sizeof(n-status));
+}
 }
 
 static bool virtio_net_started(VirtIONet *n, uint8_t status)
@@ -227,7 +231,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, 
uint32_t features)
 {
 VirtIONet *n = to_virtio_net(vdev);
 
-features |= (1  VIRTIO_NET_F_MAC);
+features |= (1  VIRTIO_NET_F_MAC | 1  VIRTIO_NET_F_GUEST_ANNOUNCE);
 
 if (peer_has_vnet_hdr(n)) {
 tap_using_vnet_hdr(n-nic-nc.peer, 1);
@@ -983,6 +987,19 @@ static void virtio_net_cleanup(VLANClientState *nc)
 n-nic = NULL;
 }
 
+static int virtio_net_announce(VLANClientState *nc)
+{
+VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
+
+if (n-vdev.guest_features  (0x1  VIRTIO_NET_F_GUEST_ANNOUNCE)) {
+n-status |= VIRITO_NET_S_ANNOUNCE;
+virtio_notify_config(n-vdev);
+return 0;
+}
+
+return 1;
+}
+
 static NetClientInfo net_virtio_info = {
 .type = NET_CLIENT_TYPE_NIC,
 .size = sizeof(NICState),
@@ -990,6 +1007,7 @@ static NetClientInfo net_virtio_info = {
 .receive = virtio_net_receive,
 .cleanup = virtio_net_cleanup,
 .link_status_changed = virtio_net_set_link_status,
+.announce = virtio_net_announce,
 };
 
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 4468741..c47bd52 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,8 +44,10 @@
 #define VIRTIO_NET_F_CTRL_RX18  /* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN  19  /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can announce itself */
 
 #define VIRTIO_NET_S_LINK_UP1   /* Link is up */
+#define VIRITO_NET_S_ANNOUNCE   2   /* Announcement is needed */
 
 #define TX_TIMER_INTERVAL 15 /* 150 us */
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 PATCH 5/4 PATCH] virtio-net: send gratuitous packet when needed

2011-10-21 Thread Jason Wang
This make let virtio-net driver can send gratituous packet by a new
config bit - VIRTIO_NET_S_ANNOUNCE in each config update
interrupt. When this bit is set by backend, the driver would schedule
a workqueue to send gratituous packet through NETDEV_NOTIFY_PEERS.

This feature is negotiated through bit VIRTIO_NET_F_GUEST_ANNOUNCE.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/net/virtio_net.c   |   31 ++-
 include/linux/virtio_net.h |2 ++
 2 files changed, 32 insertions(+), 1 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b8225f3..1cdecf7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -71,6 +71,9 @@ struct virtnet_info {
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
 
+   /* Work struct for send gratituous packet. */
+   struct work_struct announce;
+
/* Chain pages by the private ptr. */
struct page *pages;
 
@@ -507,6 +510,13 @@ static void refill_work(struct work_struct *work)
schedule_delayed_work(vi-refill, HZ/2);
 }
 
+static void announce_work(struct work_struct *work)
+{
+   struct virtnet_info *vi = container_of(work, struct virtnet_info,
+  announce);
+   netif_notify_peers(vi-dev);
+}
+
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
@@ -923,11 +933,22 @@ static void virtnet_update_status(struct virtnet_info *vi)
  v, sizeof(v));
 
/* Ignore unknown (future) status bits */
-   v = VIRTIO_NET_S_LINK_UP;
+   v = VIRTIO_NET_S_LINK_UP | VIRTIO_NET_S_ANNOUNCE;
 
if (vi-status == v)
return;
 
+   if (v  VIRTIO_NET_S_ANNOUNCE) {
+   if ((v  VIRTIO_NET_S_LINK_UP) 
+   virtio_has_feature(vi-vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+   schedule_work(vi-announce);
+   v = ~VIRTIO_NET_S_ANNOUNCE;
+vi-vdev-config-set(vi-vdev,
+  offsetof(struct virtio_net_config,
+  status),
+  v, sizeof(v));
+   }
+
vi-status = v;
 
if (vi-status  VIRTIO_NET_S_LINK_UP) {
@@ -937,6 +958,7 @@ static void virtnet_update_status(struct virtnet_info *vi)
netif_carrier_off(vi-dev);
netif_stop_queue(vi-dev);
}
+
 }
 
 static void virtnet_config_changed(struct virtio_device *vdev)
@@ -1016,6 +1038,8 @@ static int virtnet_probe(struct virtio_device *vdev)
goto free;
 
INIT_DELAYED_WORK(vi-refill, refill_work);
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+   INIT_WORK(vi-announce, announce_work);
sg_init_table(vi-rx_sg, ARRAY_SIZE(vi-rx_sg));
sg_init_table(vi-tx_sg, ARRAY_SIZE(vi-tx_sg));
 
@@ -1077,6 +1101,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 unregister:
unregister_netdev(dev);
cancel_delayed_work_sync(vi-refill);
+   if (virtio_has_feature(vi-vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+   cancel_work_sync(vi-announce);
 free_vqs:
vdev-config-del_vqs(vdev);
 free_stats:
@@ -1118,6 +1144,8 @@ static void __devexit virtnet_remove(struct virtio_device 
*vdev)
 
unregister_netdev(vi-dev);
cancel_delayed_work_sync(vi-refill);
+   if(virtio_has_feature(vi-vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+   cancel_work_sync(vi-announce);
 
/* Free unused buffers in both send and recv, if any. */
free_unused_bufs(vi);
@@ -1144,6 +1172,7 @@ static unsigned int features[] = {
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
+   VIRTIO_NET_F_GUEST_ANNOUNCE,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 970d5a2..44a38d6 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -49,8 +49,10 @@
 #define VIRTIO_NET_F_CTRL_RX   18  /* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN 19  /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20  /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
 
 #define VIRTIO_NET_S_LINK_UP   1   /* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE   2   /* Announcement is needed */
 
 struct virtio_net_config {
/* The config defining mac address (if VIRTIO_NET_F_MAC) */

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at