from:"Christian Borntraeger"

Re: [PATCH] KVM: forward declare structs in kvm_types.h

2014-08-29 Thread Christian Borntraeger

On 29/08/14 14:01, Paolo Bonzini wrote:
 Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
 'struct foo' declared inside parameter list warnings (and consequent
 breakage due to conflicting types).
 
 Move them from individual files to a generic place in linux/kvm_types.h.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

I can confirm that s390 still builds and works.

 ---
  arch/arm/include/asm/kvm_host.h |  7 ++-
  arch/arm64/include/asm/kvm_host.h   |  6 ++
  arch/ia64/include/asm/kvm_host.h|  3 ---
  arch/mips/include/asm/kvm_host.h|  5 -
  arch/powerpc/include/asm/kvm_host.h |  5 -
  arch/s390/include/asm/kvm_host.h|  5 +++--
  arch/x86/include/asm/kvm_host.h |  4 
  include/linux/kvm_types.h   | 11 +++
  8 files changed, 18 insertions(+), 28 deletions(-)
 
 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
 index 84291feee9e1..aea259e9431f 100644
 --- a/arch/arm/include/asm/kvm_host.h
 +++ b/arch/arm/include/asm/kvm_host.h
 @@ -19,6 +19,8 @@
  #ifndef __ARM_KVM_HOST_H__
  #define __ARM_KVM_HOST_H__
 
 +#include linux/types.h
 +#include linux/kvm_types.h
  #include asm/kvm.h
  #include asm/kvm_asm.h
  #include asm/kvm_mmio.h
 @@ -40,7 +42,6 @@
 
  #include kvm/arm_vgic.h
 
 -struct kvm_vcpu;
  u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
  int kvm_target_cpu(void);
  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 @@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
   u32 halt_wakeup;
  };
 
 -struct kvm_vcpu_init;
  int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
   const struct kvm_vcpu_init *init);
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
  unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
  int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 -struct kvm_one_reg;
  int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
  int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
  u64 kvm_call_hyp(void *hypfn, ...);
  void force_vm_exit(const cpumask_t *mask);
 
  #define KVM_ARCH_WANT_MMU_NOTIFIER
 -struct kvm;
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
  int kvm_unmap_hva_range(struct kvm *kvm,
   unsigned long start, unsigned long end);
 @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
  int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
  unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
 -struct kvm_one_reg;
  int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg 
 *);
  int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg 
 *);
 
 diff --git a/arch/arm64/include/asm/kvm_host.h 
 b/arch/arm64/include/asm/kvm_host.h
 index 94d8a3c9b644..b5045e3e05f8 100644
 --- a/arch/arm64/include/asm/kvm_host.h
 +++ b/arch/arm64/include/asm/kvm_host.h
 @@ -22,6 +22,8 @@
  #ifndef __ARM64_KVM_HOST_H__
  #define __ARM64_KVM_HOST_H__
 
 +#include linux/types.h
 +#include linux/kvm_types.h
  #include asm/kvm.h
  #include asm/kvm_asm.h
  #include asm/kvm_mmio.h
 @@ -41,7 +43,6 @@
 
  #define KVM_VCPU_MAX_FEATURES 3
 
 -struct kvm_vcpu;
  int kvm_target_cpu(void);
  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
  int kvm_arch_dev_ioctl_check_extension(long ext);
 @@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
   u32 halt_wakeup;
  };
 
 -struct kvm_vcpu_init;
  int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
   const struct kvm_vcpu_init *init);
  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
  unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
  int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 -struct kvm_one_reg;
  int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
  int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
  #define KVM_ARCH_WANT_MMU_NOTIFIER
 -struct kvm;
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
  int kvm_unmap_hva_range(struct kvm *kvm,
   unsigned long start, unsigned long end);
 diff --git a/arch/ia64/include/asm/kvm_host.h 
 b/arch/ia64/include/asm/kvm_host.h
 index 353167d95c66..4729752b7256 100644
 --- a/arch/ia64/include/asm/kvm_host.h
 +++ b/arch/ia64/include/asm/kvm_host.h
 @@ -234,9 +234,6 @@ struct kvm_vm_data {
  #define KVM_REQ_PTC_G32
  #define KVM_REQ_RESUME   33
 
 -struct kvm;
 -struct kvm_vcpu;
 -
  struct kvm_mmio_req {
   uint64_t addr;  /*  physical address*/
   uint64_t size;  /*  size in bytes   */
 diff --git a/arch/mips/include/asm/kvm_host.h 
 b/arch/mips/include/asm/kvm_host.h
 index b4d900acbdb9..0b24d6622ec1 100644
 --- a/arch/mips/include/asm/kvm_host.h
 +++ b/arch/mips/include/asm/kvm_host.h
 @@ -96,11 +96,6 @@
  #define CAUSEB_DC27
  #define CAUSEF_DC

Re: [Qemu-devel] [question] virtio-blk performance degradationhappened with virito-serial

2014-09-01 Thread Christian Borntraeger

On 01/09/14 14:52, Zhang Haoyu wrote:
 Hi, all

 I start a VM with virtio-serial (default ports number: 31), and found that 
 virtio-blk performance degradation happened, about 25%, this problem can 
 be reproduced 100%.
 without virtio-serial:
 4k-read-random 1186 IOPS
 with virtio-serial:
 4k-read-random 871 IOPS

 but if use max_ports=2 option to limit the max number of virio-serial 
 ports, then the IO performance degradation is not so serious, about 5%.

 And, ide performance degradation does not happen with virtio-serial.

 Pretty sure it's related to MSI vectors in use.  It's possible that
 the virtio-serial device takes up all the avl vectors in the guests,
 leaving old-style irqs for the virtio-blk device.

 I don't think so,
 I use iometer to test 64k-read(or write)-sequence case, if I disable the 
 virtio-serial dynamically via device manager-virtio-serial = disable,
 then the performance get promotion about 25% immediately, then I re-enable 
 the virtio-serial via device manager-virtio-serial = enable,
 the performance got back again, very obvious.
 add comments:
 Although the virtio-serial is enabled, I don't use it at all, the degradation 
 still happened.

This is just wild guessing:
If virtio-blk and virtio-serial share an IRQ, the guest operating system has to 
check each virtqueue for activity. Maybe there is some inefficiency doing that.
AFAIK virtio-serial registers 64 virtqueues (on 31 ports + console) even if 
everything is unused.

Christian


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [question] virtio-blk performance degradationhappened with virito-serial

2014-09-01 Thread Christian Borntraeger

On 01/09/14 15:12, Paolo Bonzini wrote:
 Il 01/09/2014 15:09, Christian Borntraeger ha scritto:
 This is just wild guessing:
 If virtio-blk and virtio-serial share an IRQ, the guest operating system has 
 to check each virtqueue for activity. Maybe there is some inefficiency doing 
 that.
 AFAIK virtio-serial registers 64 virtqueues (on 31 ports + console) even if 
 everything is unused.
 
 That could be the case if MSI is disabled.
 
 Paolo
 

Do the windows virtio drivers enable MSIs, in their inf file?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [question] virtio-blk performance degradationhappened with virito-serial

2014-09-01 Thread Christian Borntraeger

On 01/09/14 15:29, Paolo Bonzini wrote:
 Il 01/09/2014 15:22, Christian Borntraeger ha scritto:
 If virtio-blk and virtio-serial share an IRQ, the guest operating system 
 has to check each virtqueue for activity. Maybe there is some inefficiency 
 doing that.
 AFAIK virtio-serial registers 64 virtqueues (on 31 ports + console) even 
 if everything is unused.

 That could be the case if MSI is disabled.

 Do the windows virtio drivers enable MSIs, in their inf file?
 
 It depends on the version of the drivers, but it is a reasonable guess
 at what differs between Linux and Windows.  Haoyu, can you give us the
 output of lspci from a Linux guest?
 
 Paolo

Zhang Haoyu, which virtio drivers did you use?

I just checked the Fedora virtio driver. The INF file does not contain the MSI 
enablement as described in
http://msdn.microsoft.com/en-us/library/windows/hardware/ff544246%28v=vs.85%29.aspx
That would explain the performance issues - given that the link information is 
still true.



Christian





--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [question] virtio-blk performance degradationhappened with virito-serial

2014-09-01 Thread Christian Borntraeger

On 01/09/14 16:03, Christian Borntraeger wrote:
 On 01/09/14 15:29, Paolo Bonzini wrote:
 Il 01/09/2014 15:22, Christian Borntraeger ha scritto:
 If virtio-blk and virtio-serial share an IRQ, the guest operating system 
 has to check each virtqueue for activity. Maybe there is some 
 inefficiency doing that.
 AFAIK virtio-serial registers 64 virtqueues (on 31 ports + console) even 
 if everything is unused.

 That could be the case if MSI is disabled.

 Do the windows virtio drivers enable MSIs, in their inf file?

 It depends on the version of the drivers, but it is a reasonable guess
 at what differs between Linux and Windows.  Haoyu, can you give us the
 output of lspci from a Linux guest?

 Paolo
 
 Zhang Haoyu, which virtio drivers did you use?
 
 I just checked the Fedora virtio driver. The INF file does not contain the 
 MSI enablement as described in
 http://msdn.microsoft.com/en-us/library/windows/hardware/ff544246%28v=vs.85%29.aspx
 That would explain the performance issues - given that the link information 
 is still true.

Sorry, looked at the wrong inf file. The fedora driver does use MSI for serial 
and block.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/2] KVM: s390/mm: Two fixes for master (3.17)

2014-09-02 Thread Christian Borntraeger

Paolo,

this request is against kvm/master and contains two fixes for guests
that use storage keys. 


The following changes since commit ab3f285f227fec62868037e9b1b1fd18294a83b8:

  KVM: s390/mm: try a cow on read only pages for key ops (2014-08-25 14:35:28 
+0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-master-20140902

for you to fetch changes up to 1951497d90d6754201af3e65241a06f9ef6755cd:

  KVM: s390/mm: Fix guest storage key corruption in ptep_set_access_flags 
(2014-09-02 10:30:43 +0200)


KVM: s390/mm: Fix two guest storage key corruptions on paging

Here are two patches that fix issues that were introduced with
commit 0944fe3f4a32 (s390/mm: implement software referenced bits).
This commit introduced additional invalid-valid transitions that
we need to handle to transfer the storage key from/to pgste.


Christian Borntraeger (2):
  KVM: s390/mm: Fix storage key corruption during swapping
  KVM: s390/mm: Fix guest storage key corruption in ptep_set_access_flags

 arch/s390/include/asm/pgtable.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 2/2] KVM: s390/mm: Fix guest storage key corruption in ptep_set_access_flags

2014-09-02 Thread Christian Borntraeger

commit 0944fe3f4a32 (s390/mm: implement software referenced bits)
triggered another paging/storage key corruption. There is an
unhandled invalid-valid pte change where we have to set the real
storage key from the pgste.
When doing paging a guest page might be swapcache or swap and when
faulted in it might be read-only and due to a parallel scan old.
An do_wp_page will make it writeable and young. Due to software
reference tracking this page was invalid and now becomes valid.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Martin Schwidefsky schwidef...@de.ibm.com
Cc: sta...@vger.kernel.org # v3.12+
---
 arch/s390/include/asm/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 32686e8..5efb2fe 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1331,6 +1331,7 @@ static inline int ptep_set_access_flags(struct 
vm_area_struct *vma,
ptep_flush_direct(vma-vm_mm, address, ptep);
 
if (mm_has_pgste(vma-vm_mm)) {
+   pgste_set_key(ptep, pgste, entry, vma-vm_mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 1/2] KVM: s390/mm: Fix storage key corruption during swapping

2014-09-02 Thread Christian Borntraeger

Since 3.12 or more precisely  commit 0944fe3f4a32 (s390/mm:
implement software referenced bits) guest storage keys get
corrupted during paging. This commit added another valid-invalid
translation for page tables - namely ptep_test_and_clear_young.
We have to transfer the storage key into the pgste in that case.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Martin Schwidefsky schwidef...@de.ibm.com
Cc: sta...@vger.kernel.org # v3.12+
---
 arch/s390/include/asm/pgtable.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b76317c..32686e8 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1127,7 +1127,7 @@ static inline int ptep_test_and_clear_young(struct 
vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
 {
pgste_t pgste;
-   pte_t pte;
+   pte_t pte, oldpte;
int young;
 
if (mm_has_pgste(vma-vm_mm)) {
@@ -1135,12 +1135,13 @@ static inline int ptep_test_and_clear_young(struct 
vm_area_struct *vma,
pgste = pgste_ipte_notify(vma-vm_mm, ptep, pgste);
}
 
-   pte = *ptep;
+   oldpte = pte = *ptep;
ptep_flush_direct(vma-vm_mm, addr, ptep);
young = pte_young(pte);
pte = pte_mkold(pte);
 
if (mm_has_pgste(vma-vm_mm)) {
+   pgste = pgste_update_all(oldpte, pgste, vma-vm_mm);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/3] KVM: remove redundant assigment of return value in kvm_dev_ioctl

2014-09-04 Thread Christian Borntraeger

The first statement of kvm_dev_ioctl is
long r = -EINVAL;

No need to reassign the same value.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 virt/kvm/kvm_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a824a0..5ea65d2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2610,9 +2610,8 @@ static long kvm_dev_ioctl(struct file *filp,
long r = -EINVAL;
 
switch (ioctl) {
case KVM_GET_API_VERSION:
-   r = -EINVAL;
if (arg)
goto out;
r = KVM_API_VERSION;
break;
@@ -2622,9 +2621,8 @@ static long kvm_dev_ioctl(struct file *filp,
case KVM_CHECK_EXTENSION:
r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
-   r = -EINVAL;
if (arg)
goto out;
r = PAGE_SIZE; /* struct kvm_run */
 #ifdef CONFIG_X86
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3] KVM: remove redundant check of in_spin_loop

2014-09-04 Thread Christian Borntraeger

The expression `vcpu-spin_loop.in_spin_loop' is always true,
because it is evaluated only when the condition
`!vcpu-spin_loop.in_spin_loop' is false.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 virt/kvm/kvm_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7176929..0a824a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1768,10 +1768,9 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct 
kvm_vcpu *vcpu)
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
bool eligible;
 
eligible = !vcpu-spin_loop.in_spin_loop ||
-   (vcpu-spin_loop.in_spin_loop 
-vcpu-spin_loop.dy_eligible);
+   vcpu-spin_loop.dy_eligible;
 
if (vcpu-spin_loop.in_spin_loop)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu-spin_loop.dy_eligible);
 
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/3] cleanup of redundant statements

2014-09-04 Thread Christian Borntraeger

Paolo,

I was playing with some static code checkers. Here is some fallout
from the kvm common code. Only minor things that are not real error,
just redundant statements.

One could argue here and there that these statement make the code easier
to understand. So, please have a look and either drop or apply the patches.

Christian Borntraeger (3):
  KVM: remove redundant check of in_spin_loop
  KVM: remove redundant assigment of return value in kvm_dev_ioctl
  KVM: remove redundant assignments in __kvm_set_memory_region

 virt/kvm/kvm_main.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3] KVM: remove redundant assignments in __kvm_set_memory_region

2014-09-04 Thread Christian Borntraeger

__kvm_set_memory_region sets r to EINVAL very early.
Doing it again is not necessary. The same is true later on, where
r is assigned -ENOMEM twice.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 virt/kvm/kvm_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5ea65d2..2d868ad 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -776,9 +776,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
slot = id_to_memslot(kvm-memslots, mem-slot);
base_gfn = mem-guest_phys_addr  PAGE_SHIFT;
npages = mem-memory_size  PAGE_SHIFT;
 
-   r = -EINVAL;
if (npages  KVM_MEM_MAX_NR_PAGES)
goto out;
 
if (!npages)
@@ -790,9 +789,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem-flags;
 
-   r = -EINVAL;
if (npages) {
if (!old.npages)
change = KVM_MR_CREATE;
else { /* Modify an existing slot. */
@@ -846,9 +844,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
 
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-   r = -ENOMEM;
slots = kmemdup(kvm-memslots, sizeof(struct kvm_memslots),
GFP_KERNEL);
if (!slots)
goto out_free;
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/7] KVM: s390: Fixes and features for next (3.18)

2014-09-10 Thread Christian Borntraeger

Paolo,

please have a look at the next bunch of s390 patches and consider to apply:

The following changes since commit fd2752352bbc98850d83b5448a288d8991590317:

  KVM: x86: use guest maxphyaddr to check MTRR values (2014-08-29 18:56:24 
+0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20140910

for you to fetch changes up to bfac1f59a1afb13a3cf225bffd04be99a49c51a6:

  KVM: s390/interrupt: remove double assignment (2014-09-10 12:19:45 +0200)


KVM: s390: Fixes and features for next (3.18)

1. Crypto/CPACF support: To enable the MSA4 instructions we have to
   provide a common control structure for each SIE control block
2. Two cleanups found by a static code checker: one redundant assignment
   and one useless if
3. Fix the page handling of the diag10 ballooning interface. If the
   guest freed the pages at absolute 0 some checks and frees were
   incorrect
4. Limit guests to 16TB
5. Add __must_check to interrupt injection code


Christian Borntraeger (6):
  KVM: s390: add __must_check to interrupt deliver functions
  KVM: s390: Limit guest size to 16TB
  KVM: s390: unintended fallthrough for external call
  KVM: s390: get rid of constant condition in ipte_unlock_simple
  KVM: s390/cmm: Fix prefix handling for diag 10 balloon
  KVM: s390/interrupt: remove double assignment

Tony Krowiak (1):
  KVM: CPACF: Enable MSA4 instructions for kvm guest

 arch/s390/include/asm/kvm_host.h | 14 +-
 arch/s390/kvm/diag.c | 26 ++
 arch/s390/kvm/gaccess.c  |  3 +--
 arch/s390/kvm/interrupt.c| 14 +++---
 arch/s390/kvm/kvm-s390.c | 35 ++-
 arch/s390/kvm/kvm-s390.h |  2 +-
 6 files changed, 74 insertions(+), 20 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 3/7] KVM: s390: Limit guest size to 16TB

2014-09-10 Thread Christian Borntraeger

Currently we fill up a full 5 level page table to hold the guest
mapping. Since commit support gmap page tables with less than 5
levels we can do better.
Having more than 4 TB might be useful for some testing scenarios,
so let's just limit ourselves to 16TB guest size.
Having more than that is totally untested as I do not have enough
swap space/memory.

We continue to allow ucontrol the full size.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Cc: Martin Schwidefsky schwidef...@de.ibm.com
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2037738..b95d4a4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -458,7 +458,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type  KVM_VM_S390_UCONTROL) {
kvm-arch.gmap = NULL;
} else {
-   kvm-arch.gmap = gmap_alloc(current-mm, -1UL);
+   kvm-arch.gmap = gmap_alloc(current-mm, (1UL  44) - 1);
if (!kvm-arch.gmap)
goto out_nogmap;
kvm-arch.gmap-private = kvm;
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 4/7] KVM: s390: unintended fallthrough for external call

2014-09-10 Thread Christian Borntraeger

We must not fallthrough if the conditions for external call are not met.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Cc: sta...@vger.kernel.org
---
 arch/s390/kvm/interrupt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d56da1d..4abf819 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -86,6 +86,7 @@ static int __must_check __interrupt_is_deliverable(struct 
kvm_vcpu *vcpu,
return 0;
if (vcpu-arch.sie_block-gcr[0]  0x2000ul)
return 1;
+   return 0;
case KVM_S390_INT_EMERGENCY:
if (psw_extint_disabled(vcpu))
return 0;
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 5/7] KVM: s390: get rid of constant condition in ipte_unlock_simple

2014-09-10 Thread Christian Borntraeger

Due to the earlier check we know that ipte_lock_count must be 0.
No need to add a useless if. Let's make clear that we are going
to always wakeup when we execute that code.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Heiko Carstens heiko.carst...@de.ibm.com
---
 arch/s390/kvm/gaccess.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6..0f961a1 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
new = old = ACCESS_ONCE(*ic);
new.k = 0;
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
-   if (!ipte_lock_count)
-   wake_up(vcpu-kvm-arch.ipte_wq);
+   wake_up(vcpu-kvm-arch.ipte_wq);
 out:
mutex_unlock(ipte_mutex);
 }
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 6/7] KVM: s390/cmm: Fix prefix handling for diag 10 balloon

2014-09-10 Thread Christian Borntraeger

The old handling of prefix pages was broken in the diag10 ballooner.
We now rely on gmap_discard to check for start  end and do a
slow path if the prefix swap pages are affected:
1. discard the pages from start to prefix
2. discard the absolute 0 pages
3. discard the pages after prefix swap to end

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
---
 arch/s390/kvm/diag.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index b374b6c..9254aff 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
start = vcpu-run-s.regs.gprs[(vcpu-arch.sie_block-ipa  0xf0)  4];
end = vcpu-run-s.regs.gprs[vcpu-arch.sie_block-ipa  0xf] + 4096;
 
-   if (start  ~PAGE_MASK || end  ~PAGE_MASK || start  end
+   if (start  ~PAGE_MASK || end  ~PAGE_MASK || start = end
|| start  2 * PAGE_SIZE)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
VCPU_EVENT(vcpu, 5, diag release pages %lX %lX, start, end);
vcpu-stat.diagnose_10++;
 
-   /* we checked for start  end above */
-   if (end  prefix || start = prefix + 2 * PAGE_SIZE) {
+   /*
+* We checked for start = end above, so lets check for the
+* fast path (no prefix swap page involved)
+*/
+   if (end = prefix || start = prefix + 2 * PAGE_SIZE) {
gmap_discard(vcpu-arch.gmap, start, end);
} else {
-   if (start  prefix)
-   gmap_discard(vcpu-arch.gmap, start, prefix);
-   if (end = prefix)
-   gmap_discard(vcpu-arch.gmap,
-prefix + 2 * PAGE_SIZE, end);
+   /*
+* This is slow path.  gmap_discard will check for start
+* so lets split this into before prefix, prefix, after
+* prefix and let gmap_discard make some of these calls
+* NOPs.
+*/
+   gmap_discard(vcpu-arch.gmap, start, prefix);
+   if (start = prefix)
+   gmap_discard(vcpu-arch.gmap, 0, 4096);
+   if (end  prefix + 4096)
+   gmap_discard(vcpu-arch.gmap, 4096, 8192);
+   gmap_discard(vcpu-arch.gmap, prefix + 2 * PAGE_SIZE, end);
}
return 0;
 }
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 2/7] KVM: s390: add __must_check to interrupt deliver functions

2014-09-10 Thread Christian Borntraeger

We now propagate interrupt injection errors back to the ioctl. We
should mark functions that might fail with __must_check.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Jens Freimann jf...@linux.vnet.ibm.com
---
 arch/s390/kvm/interrupt.c | 12 ++--
 arch/s390/kvm/kvm-s390.h  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 60a5cf4..d56da1d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,7 +28,7 @@
 #define IOINT_AI_MASK 0x0400
 #define PFAULT_INIT 0x0600
 
-static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
 
 static int is_ioint(u64 type)
 {
@@ -77,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
return (0x80  isc)  24;
 }
 
-static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
  struct kvm_s390_interrupt_info *inti)
 {
switch (inti-type) {
@@ -225,7 +225,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
}
 }
 
-static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
  struct kvm_s390_pgm_info *pgm_info)
 {
int rc = 0;
@@ -307,7 +307,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
return rc;
 }
 
-static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
   struct kvm_s390_interrupt_info *inti)
 {
const unsigned short table[] = { 2, 4, 4, 6 };
@@ -508,7 +508,7 @@ static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
return rc;
 }
 
-static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
int rc;
 
@@ -657,7 +657,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
  vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].ctrl);
 }
 
-int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu-arch.local_int.float_int;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 99abcb5..b1a7766 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,7 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm 
*kvm)
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 7/7] KVM: s390/interrupt: remove double assignment

2014-09-10 Thread Christian Borntraeger

r is already initialized to 0.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
---
 arch/s390/kvm/interrupt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4abf819..4cad00a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1352,7 +1352,6 @@ static int flic_set_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
r = enqueue_floating_irq(dev, attr);
break;
case KVM_DEV_FLIC_CLEAR_IRQS:
-   r = 0;
kvm_s390_clear_float_irqs(dev-kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

blk-mq crash under KVM in multiqueue block code (with virtio-blk and ext4)

2014-09-11 Thread Christian Borntraeger

Folks,

we have seen the following bug with 3.16 as a KVM guest. It suspect the blk-mq 
rework that happened between 3.15 and 3.16, but it can be something completely 
different.


[   65.992022] Unable to handle kernel pointer dereference in virtual kernel 
address space
[   65.992187] failing address: d000 TEID: d803
[   65.992363] Fault in home space mode while using kernel ASCE.
[   65.992365] AS:00a7c007 R3:0024 
[   65.993754] Oops: 0038 [#1] SMP 
[   65.993923] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi 
scsi_transport_iscsi virtio_balloon vhost_net vhost macvtap macvlan kvm 
dm_multipath virtio_net virtio_blk sunrpc
[   65.994274] CPU: 0 PID: 44 Comm: kworker/u6:2 Not tainted 
3.16.0-20140814.0.c66c84c.fc18-s390xfrob #1
[   65.996043] Workqueue: writeback bdi_writeback_workfn (flush-251:32)
[   65.996222] task: 0225 ti: 02258000 task.ti: 
02258000
[   65.996228] Krnl PSW : 0704f0018000 003ed114 
(blk_mq_tag_to_rq+0x20/0x38)
[   65.997299]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 
EA:3
   Krnl GPRS: 0040  01619000 
004e
[   65.997301]004e  0001 
00a0de18
[   65.997302]77ffbe18 77ffbd50 6d72d620 
004f
[   65.997304]01a99400 0080 003eddee 
77ffbc28
[   65.997864] Krnl Code: 003ed106: e3102034lg  
%r1,48(%r2)
  003ed10c: 91082044tm  
68(%r2),8
 #003ed110: a7840009brc 8,3ed122
 003ed114: e34016880004lg  
%r4,1672(%r1)
  003ed11a: 59304100c   
%r3,256(%r4)
  003ed11e: a7840003brc 8,3ed124
  003ed122: 07febcr 15,%r14
  003ed124: b9040024lgr %r2,%r4
[   65.998221] Call Trace:
[   65.998224] ([0001] 0x1)
[   65.998227]  [003f17b6] blk_mq_tag_busy_iter+0x7a/0xc4
[   65.998228]  [003edcd6] blk_mq_rq_timer+0x96/0x13c
[   65.999226]  [0013ee60] call_timer_fn+0x40/0x110
[   65.999230]  [0013f642] run_timer_softirq+0x2de/0x3d0
[   65.999238]  [00135b70] __do_softirq+0x124/0x2ac
[   65.999241]  [00136000] irq_exit+0xc4/0xe4
[   65.999435]  [0010bc08] do_IRQ+0x64/0x84
[   66.437533]  [0067ccd8] ext_skip+0x42/0x46
[   66.437541]  [003ed7b4] __blk_mq_alloc_request+0x58/0x1e8
[   66.437544] ([003ed788] __blk_mq_alloc_request+0x2c/0x1e8)
[   66.437547]  [003eef82] blk_mq_map_request+0xc2/0x208
[   66.437549]  [003ef860] blk_sq_make_request+0xac/0x350
[   66.437721]  [003e2d6c] generic_make_request+0xc4/0xfc
[   66.437723]  [003e2e56] submit_bio+0xb2/0x1a8
[   66.438373]  [0031e8aa] ext4_io_submit+0x52/0x80
[   66.438375]  [0031ccfa] ext4_writepages+0x7c6/0xd0c
[   66.438378]  [002aea20] __writeback_single_inode+0x54/0x274
[   66.438379]  [002b0134] writeback_sb_inodes+0x28c/0x4ec
[   66.438380]  [002b042e] __writeback_inodes_wb+0x9a/0xe4
[   66.438382]  [002b06a2] wb_writeback+0x22a/0x358
[   66.438383]  [002b0cd0] bdi_writeback_workfn+0x354/0x538
[   66.438618]  [0014e3aa] process_one_work+0x1aa/0x418
[   66.438621]  [0014ef94] worker_thread+0x48/0x524
[   66.438625]  [001560ca] kthread+0xee/0x108
[   66.438627]  [0067c76e] kernel_thread_starter+0x6/0xc
[   66.438628]  [0067c768] kernel_thread_starter+0x0/0xc
[   66.438629] Last Breaking-Event-Address:
[   66.438631]  [003edde8] blk_mq_timeout_check+0x6c/0xb8

I looked into the dump, and the full function is  (annotated by me to match the 
source code)
r2= tags
r3= tag (4e)
Dump of assembler code for function blk_mq_tag_to_rq:
   0x003ed0f4 +0: lg  %r1,96(%r2) # r1 
has now tags-rqs
   0x003ed0fa +6: sllg%r2,%r3,3   # r2 
has tag*8
   0x003ed100 +12:lg  %r2,0(%r2,%r1)  # r2 
now has rq (=tags-rqs[tag])
   0x003ed106 +18:lg  %r1,48(%r2) # r1 
now has rq-q
   0x003ed10c +24:tm  68(%r2),8   # test 
for rq-cmd_flags  REQ_FLUSH_SEQ
   0x003ed110 +28:je  0x3ed122 blk_mq_tag_to_rq+46  #  if 
not goto 3ed122
   0x003ed114 +32:lg  %r4,1672(%r1)   # r4 = 
rq-q-flush_rq   CRASHES as rq-q points to 
   0x003ed11a +38:c   %r3,256(%r4)# 
compare tag with rq-q-flush_rq-tag
   0x003ed11e +42:je

Re: blk-mq crash under KVM in multiqueue block code (with virtio-blk and ext4)

2014-09-12 Thread Christian Borntraeger

On 09/11/2014 12:26 PM, Christian Borntraeger wrote:
 Folks,
 
 we have seen the following bug with 3.16 as a KVM guest. It suspect the 
 blk-mq rework that happened between 3.15 and 3.16, but it can be something 
 completely different.
 
 
 [   65.992022] Unable to handle kernel pointer dereference in virtual kernel 
 address space
 [   65.992187] failing address: d000 TEID: d803
 [   65.992363] Fault in home space mode while using kernel ASCE.
 [   65.992365] AS:00a7c007 R3:0024 
 [   65.993754] Oops: 0038 [#1] SMP 
 [   65.993923] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi 
 scsi_transport_iscsi virtio_balloon vhost_net vhost macvtap macvlan kvm 
 dm_multipath virtio_net virtio_blk sunrpc
 [   65.994274] CPU: 0 PID: 44 Comm: kworker/u6:2 Not tainted 
 3.16.0-20140814.0.c66c84c.fc18-s390xfrob #1
 [   65.996043] Workqueue: writeback bdi_writeback_workfn (flush-251:32)
 [   65.996222] task: 0225 ti: 02258000 task.ti: 
 02258000
 [   65.996228] Krnl PSW : 0704f0018000 003ed114 
 (blk_mq_tag_to_rq+0x20/0x38)
 [   65.997299]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 
 EA:3
Krnl GPRS: 0040  01619000 
 004e
 [   65.997301]004e  0001 
 00a0de18
 [   65.997302]77ffbe18 77ffbd50 6d72d620 
 004f
 [   65.997304]01a99400 0080 003eddee 
 77ffbc28
 [   65.997864] Krnl Code: 003ed106: e3102034lg  
 %r1,48(%r2)
   003ed10c: 91082044tm  
 68(%r2),8
  #003ed110: a7840009brc 
 8,3ed122
  003ed114: e34016880004lg  
 %r4,1672(%r1)
   003ed11a: 59304100c   
 %r3,256(%r4)
   003ed11e: a7840003brc 
 8,3ed124
   003ed122: 07febcr 
 15,%r14
   003ed124: b9040024lgr 
 %r2,%r4
 [   65.998221] Call Trace:
 [   65.998224] ([0001] 0x1)
 [   65.998227]  [003f17b6] blk_mq_tag_busy_iter+0x7a/0xc4
 [   65.998228]  [003edcd6] blk_mq_rq_timer+0x96/0x13c
 [   65.999226]  [0013ee60] call_timer_fn+0x40/0x110
 [   65.999230]  [0013f642] run_timer_softirq+0x2de/0x3d0
 [   65.999238]  [00135b70] __do_softirq+0x124/0x2ac
 [   65.999241]  [00136000] irq_exit+0xc4/0xe4
 [   65.999435]  [0010bc08] do_IRQ+0x64/0x84
 [   66.437533]  [0067ccd8] ext_skip+0x42/0x46
 [   66.437541]  [003ed7b4] __blk_mq_alloc_request+0x58/0x1e8
 [   66.437544] ([003ed788] __blk_mq_alloc_request+0x2c/0x1e8)
 [   66.437547]  [003eef82] blk_mq_map_request+0xc2/0x208

I am currently asking myself if blk_mq_map_request should protect against 
softirq here but I cant say for sure,as I have never looked into that code 
before.

Christian

 [   66.437549]  [003ef860] blk_sq_make_request+0xac/0x350
 [   66.437721]  [003e2d6c] generic_make_request+0xc4/0xfc
 [   66.437723]  [003e2e56] submit_bio+0xb2/0x1a8
 [   66.438373]  [0031e8aa] ext4_io_submit+0x52/0x80
 [   66.438375]  [0031ccfa] ext4_writepages+0x7c6/0xd0c
 [   66.438378]  [002aea20] __writeback_single_inode+0x54/0x274
 [   66.438379]  [002b0134] writeback_sb_inodes+0x28c/0x4ec
 [   66.438380]  [002b042e] __writeback_inodes_wb+0x9a/0xe4
 [   66.438382]  [002b06a2] wb_writeback+0x22a/0x358
 [   66.438383]  [002b0cd0] bdi_writeback_workfn+0x354/0x538
 [   66.438618]  [0014e3aa] process_one_work+0x1aa/0x418
 [   66.438621]  [0014ef94] worker_thread+0x48/0x524
 [   66.438625]  [001560ca] kthread+0xee/0x108
 [   66.438627]  [0067c76e] kernel_thread_starter+0x6/0xc
 [   66.438628]  [0067c768] kernel_thread_starter+0x0/0xc
 [   66.438629] Last Breaking-Event-Address:
 [   66.438631]  [003edde8] blk_mq_timeout_check+0x6c/0xb8
 
 I looked into the dump, and the full function is  (annotated by me to match 
 the source code)
 r2= tags
 r3= tag (4e)
 Dump of assembler code for function blk_mq_tag_to_rq:
0x003ed0f4 +0: lg  %r1,96(%r2)   # r1 
 has now tags-rqs
0x003ed0fa +6: sllg%r2,%r3,3 # r2 
 has tag*8
0x003ed100 +12:lg  %r2,0(%r2,%r1)
 # r2 now has rq (=tags-rqs[tag])
0x003ed106 +18:lg  %r1,48(%r2)   # r1 
 now has rq-q
0x003ed10c +24:tm  68(%r2),8 # test 
 for rq-cmd_flags  REQ_FLUSH_SEQ
0x003ed110 +28:je  0x3ed122

Re: blk-mq crash under KVM in multiqueue block code (with virtio-blk and ext4)

2014-09-12 Thread Christian Borntraeger

On 09/12/2014 01:54 PM, Ming Lei wrote:
 On Thu, Sep 11, 2014 at 6:26 PM, Christian Borntraeger
 borntrae...@de.ibm.com wrote:
 Folks,

 we have seen the following bug with 3.16 as a KVM guest. It suspect the 
 blk-mq rework that happened between 3.15 and 3.16, but it can be something 
 completely different.

 
 Care to share how you reproduce the issue?

Host with 16GB RAM 32GB swap. 15 guest all with 2 GB RAM (and varying amount of 
CPUs). All do heavy file I/O.
It did not happen with 3.15/3.15 in guest/host and does happen with 3.16/3.16. 
So our next step is to check
3.15/3.16 and 3.16/3.15 to identify if its host memory mgmt or guest block 
layer.

Christian

 
 [   65.992022] Unable to handle kernel pointer dereference in virtual kernel 
 address space
 [   65.992187] failing address: d000 TEID: d803
 [   65.992363] Fault in home space mode while using kernel ASCE.
 [   65.992365] AS:00a7c007 R3:0024
 [   65.993754] Oops: 0038 [#1] SMP
 [   65.993923] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi 
 scsi_transport_iscsi virtio_balloon vhost_net vhost macvtap macvlan kvm 
 dm_multipath virtio_net virtio_blk sunrpc
 [   65.994274] CPU: 0 PID: 44 Comm: kworker/u6:2 Not tainted 
 3.16.0-20140814.0.c66c84c.fc18-s390xfrob #1
 [   65.996043] Workqueue: writeback bdi_writeback_workfn (flush-251:32)
 [   65.996222] task: 0225 ti: 02258000 task.ti: 
 02258000
 [   65.996228] Krnl PSW : 0704f0018000 003ed114 
 (blk_mq_tag_to_rq+0x20/0x38)
 [   65.997299]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 
 EA:3
Krnl GPRS: 0040  01619000 
 004e
 [   65.997301]004e  0001 
 00a0de18
 [   65.997302]77ffbe18 77ffbd50 6d72d620 
 004f
 [   65.997304]01a99400 0080 003eddee 
 77ffbc28
 [   65.997864] Krnl Code: 003ed106: e3102034lg  
 %r1,48(%r2)
   003ed10c: 91082044tm  
 68(%r2),8
  #003ed110: a7840009brc 
 8,3ed122
  003ed114: e34016880004lg  
 %r4,1672(%r1)
   003ed11a: 59304100c   
 %r3,256(%r4)
   003ed11e: a7840003brc 
 8,3ed124
   003ed122: 07febcr 
 15,%r14
   003ed124: b9040024lgr 
 %r2,%r4
 [   65.998221] Call Trace:
 [   65.998224] ([0001] 0x1)
 [   65.998227]  [003f17b6] blk_mq_tag_busy_iter+0x7a/0xc4
 [   65.998228]  [003edcd6] blk_mq_rq_timer+0x96/0x13c
 [   65.999226]  [0013ee60] call_timer_fn+0x40/0x110
 [   65.999230]  [0013f642] run_timer_softirq+0x2de/0x3d0
 [   65.999238]  [00135b70] __do_softirq+0x124/0x2ac
 [   65.999241]  [00136000] irq_exit+0xc4/0xe4
 [   65.999435]  [0010bc08] do_IRQ+0x64/0x84
 [   66.437533]  [0067ccd8] ext_skip+0x42/0x46
 [   66.437541]  [003ed7b4] __blk_mq_alloc_request+0x58/0x1e8
 [   66.437544] ([003ed788] __blk_mq_alloc_request+0x2c/0x1e8)
 [   66.437547]  [003eef82] blk_mq_map_request+0xc2/0x208
 [   66.437549]  [003ef860] blk_sq_make_request+0xac/0x350
 [   66.437721]  [003e2d6c] generic_make_request+0xc4/0xfc
 [   66.437723]  [003e2e56] submit_bio+0xb2/0x1a8
 [   66.438373]  [0031e8aa] ext4_io_submit+0x52/0x80
 [   66.438375]  [0031ccfa] ext4_writepages+0x7c6/0xd0c
 [   66.438378]  [002aea20] __writeback_single_inode+0x54/0x274
 [   66.438379]  [002b0134] writeback_sb_inodes+0x28c/0x4ec
 [   66.438380]  [002b042e] __writeback_inodes_wb+0x9a/0xe4
 [   66.438382]  [002b06a2] wb_writeback+0x22a/0x358
 [   66.438383]  [002b0cd0] bdi_writeback_workfn+0x354/0x538
 [   66.438618]  [0014e3aa] process_one_work+0x1aa/0x418
 [   66.438621]  [0014ef94] worker_thread+0x48/0x524
 [   66.438625]  [001560ca] kthread+0xee/0x108
 [   66.438627]  [0067c76e] kernel_thread_starter+0x6/0xc
 [   66.438628]  [0067c768] kernel_thread_starter+0x0/0xc
 [   66.438629] Last Breaking-Event-Address:
 [   66.438631]  [003edde8] blk_mq_timeout_check+0x6c/0xb8

 I looked into the dump, and the full function is  (annotated by me to match 
 the source code)
 r2= tags
 r3= tag (4e)
 Dump of assembler code for function blk_mq_tag_to_rq:
0x003ed0f4 +0: lg  %r1,96(%r2) # r1 
 has now tags-rqs
0x003ed0fa +6: sllg%r2,%r3,3   # r2 
 has tag*8
0x003ed100 +12:lg  %r2,0(%r2,%r1)  # r2 
 now has rq (=tags-rqs

Re: blk-mq crash under KVM in multiqueue block code (with virtio-blk and ext4)

2014-09-17 Thread Christian Borntraeger

On 09/12/2014 10:09 PM, Christian Borntraeger wrote:
 On 09/12/2014 01:54 PM, Ming Lei wrote:
 On Thu, Sep 11, 2014 at 6:26 PM, Christian Borntraeger
 borntrae...@de.ibm.com wrote:
 Folks,

 we have seen the following bug with 3.16 as a KVM guest. It suspect the 
 blk-mq rework that happened between 3.15 and 3.16, but it can be something 
 completely different.


 Care to share how you reproduce the issue?
 
 Host with 16GB RAM 32GB swap. 15 guest all with 2 GB RAM (and varying amount 
 of CPUs). All do heavy file I/O.
 It did not happen with 3.15/3.15 in guest/host and does happen with 
 3.16/3.16. So our next step is to check
 3.15/3.16 and 3.16/3.15 to identify if its host memory mgmt or guest block 
 layer.

The crashed happen pretty randomly, but when they happen it seems that its the 
same trace as below. This makes memory corruption by host vm less likely and 
some thing wrong in blk-mq more likely I guess


 
 Christian
 

 [   65.992022] Unable to handle kernel pointer dereference in virtual 
 kernel address space
 [   65.992187] failing address: d000 TEID: d803
 [   65.992363] Fault in home space mode while using kernel ASCE.
 [   65.992365] AS:00a7c007 R3:0024
 [   65.993754] Oops: 0038 [#1] SMP
 [   65.993923] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi 
 scsi_transport_iscsi virtio_balloon vhost_net vhost macvtap macvlan kvm 
 dm_multipath virtio_net virtio_blk sunrpc
 [   65.994274] CPU: 0 PID: 44 Comm: kworker/u6:2 Not tainted 
 3.16.0-20140814.0.c66c84c.fc18-s390xfrob #1
 [   65.996043] Workqueue: writeback bdi_writeback_workfn (flush-251:32)
 [   65.996222] task: 0225 ti: 02258000 task.ti: 
 02258000
 [   65.996228] Krnl PSW : 0704f0018000 003ed114 
 (blk_mq_tag_to_rq+0x20/0x38)
 [   65.997299]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 
 PM:0 EA:3
Krnl GPRS: 0040  
 01619000 004e
 [   65.997301]004e  
 0001 00a0de18
 [   65.997302]77ffbe18 77ffbd50 
 6d72d620 004f
 [   65.997304]01a99400 0080 
 003eddee 77ffbc28
 [   65.997864] Krnl Code: 003ed106: e3102034lg  
 %r1,48(%r2)
   003ed10c: 91082044tm  
 68(%r2),8
  #003ed110: a7840009brc 
 8,3ed122
  003ed114: e34016880004lg  
 %r4,1672(%r1)
   003ed11a: 59304100c   
 %r3,256(%r4)
   003ed11e: a7840003brc 
 8,3ed124
   003ed122: 07febcr 
 15,%r14
   003ed124: b9040024lgr 
 %r2,%r4
 [   65.998221] Call Trace:
 [   65.998224] ([0001] 0x1)
 [   65.998227]  [003f17b6] blk_mq_tag_busy_iter+0x7a/0xc4
 [   65.998228]  [003edcd6] blk_mq_rq_timer+0x96/0x13c
 [   65.999226]  [0013ee60] call_timer_fn+0x40/0x110
 [   65.999230]  [0013f642] run_timer_softirq+0x2de/0x3d0
 [   65.999238]  [00135b70] __do_softirq+0x124/0x2ac
 [   65.999241]  [00136000] irq_exit+0xc4/0xe4
 [   65.999435]  [0010bc08] do_IRQ+0x64/0x84
 [   66.437533]  [0067ccd8] ext_skip+0x42/0x46
 [   66.437541]  [003ed7b4] __blk_mq_alloc_request+0x58/0x1e8
 [   66.437544] ([003ed788] __blk_mq_alloc_request+0x2c/0x1e8)
 [   66.437547]  [003eef82] blk_mq_map_request+0xc2/0x208
 [   66.437549]  [003ef860] blk_sq_make_request+0xac/0x350
 [   66.437721]  [003e2d6c] generic_make_request+0xc4/0xfc
 [   66.437723]  [003e2e56] submit_bio+0xb2/0x1a8
 [   66.438373]  [0031e8aa] ext4_io_submit+0x52/0x80
 [   66.438375]  [0031ccfa] ext4_writepages+0x7c6/0xd0c
 [   66.438378]  [002aea20] __writeback_single_inode+0x54/0x274
 [   66.438379]  [002b0134] writeback_sb_inodes+0x28c/0x4ec
 [   66.438380]  [002b042e] __writeback_inodes_wb+0x9a/0xe4
 [   66.438382]  [002b06a2] wb_writeback+0x22a/0x358
 [   66.438383]  [002b0cd0] bdi_writeback_workfn+0x354/0x538
 [   66.438618]  [0014e3aa] process_one_work+0x1aa/0x418
 [   66.438621]  [0014ef94] worker_thread+0x48/0x524
 [   66.438625]  [001560ca] kthread+0xee/0x108
 [   66.438627]  [0067c76e] kernel_thread_starter+0x6/0xc
 [   66.438628]  [0067c768] kernel_thread_starter+0x0/0xc
 [   66.438629] Last Breaking-Event-Address:
 [   66.438631]  [003edde8] blk_mq_timeout_check+0x6c/0xb8

 I looked into the dump, and the full function is  (annotated by me to match 
 the source code)
 r2= tags
 r3= tag (4e)
 Dump of assembler code for function blk_mq_tag_to_rq

Re: [PATCH] blk-mq: Avoid race condition with uninitialized requests

2014-09-19 Thread Christian Borntraeger

On 09/18/2014 11:04 AM, David Hildenbrand wrote:
 This patch should fix the bug reported in https://lkml.org/lkml/2014/9/11/249.
 
 We have to initialize at least the atomic_flags and the cmd_flags when
 allocating storage for the requests.
 
 Otherwise blk_mq_timeout_check() might dereference uninitialized pointers when
 racing with the creation of a request.
 
 Also move the reset of cmd_flags for the initializing code to the point where 
 a
 request is freed. So we will never end up with pending flush request 
 indicators
 that might trigger dereferences of invalid pointers in blk_mq_timeout_check().
 
 Cc: sta...@vger.kernel.org
 Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com

So far our testers were not able to reproduce the problem with this patch 
applied, we will continue testing over weekend. I will give an ACK/NACK on 
monday then.

Thanks


 ---
  block/blk-mq.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/block/blk-mq.c b/block/blk-mq.c
 index 383ea0c..eed6340 100644
 --- a/block/blk-mq.c
 +++ b/block/blk-mq.c
 @@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, 
 int rw)
   if (tag != BLK_MQ_TAG_FAIL) {
   rq = data-hctx-tags-rqs[tag];
 
 - rq-cmd_flags = 0;
   if (blk_mq_tag_busy(data-hctx)) {
   rq-cmd_flags = REQ_MQ_INFLIGHT;
   atomic_inc(data-hctx-nr_active);
 @@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx 
 *hctx,
 
   if (rq-cmd_flags  REQ_MQ_INFLIGHT)
   atomic_dec(hctx-nr_active);
 + rq-cmd_flags = 0;
 
   clear_bit(REQ_ATOM_STARTED, rq-atomic_flags);
   blk_mq_put_tag(hctx, tag, ctx-last_tag);
 @@ -1404,6 +1404,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct 
 blk_mq_tag_set *set,
   left -= to_do * rq_size;
   for (j = 0; j  to_do; j++) {
   tags-rqs[i] = p;
 + tags-rqs[i]-atomic_flags = 0;
 + tags-rqs[i]-cmd_flags = 0;
   if (set-ops-init_request) {
   if (set-ops-init_request(set-driver_data,
   tags-rqs[i], hctx_idx, i,
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [[RFC] KVM-S390: Provide guest TOD Clock Get/Set Controls

2014-09-19 Thread Christian Borntraeger

On 09/19/2014 04:19 PM, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@us.ibm.com
 
 Enable KVM_SET_CLOCK and KVM_GET_CLOCK Ioctls on S390 for managing guest TOD
 clock value.
 

Just some education. On s390 the guest visible TOD clock is thehost TOD clock + 
hypervisor programmable offset in the control block. There is only one TOD per 
system, so the offset must be the same for every CPU.

 we add the KVM_CLOCK_FORWARD_ONLY flag to indicate to KVM_SET_CLOCK that the
 given clock value should only be set if it is = the current guest TOD clock
   host TOD, 
(right?)

The alternative scheme would be to simply get/set the guest TOD time. This 
works perfect for migration, but for managedsave the guest time is in the past.
Your approach has the advantange that after managedsave the guest will (most of 
the time) have the host time of the target system, avoiding that the guest has 
a time that is in the past (e.g. after 1 week managedsave the guest would live 
in the past).

Question for Paolo (maybe others) is. Does it make sense to reuse/extend the 
existing ioctl (I think so, but defining a new one could also be ok)

Christian



 value. This guarantees a monotonically increasing time.
 
 NOTE: In the event that the KVM_CLOCK_FORWARD_ONLY flag is set and the given
 time would cause the guest time to jump backward, then we set the guest TOD
 clock equal to the host TOD clock. Does this behavior make sense, or is it too
 weird? I could believe that other architectures might not want this exact
 behavior. Instead they might prefer to implement the function such that an
 error code is returned instead of syncing the guest time to host time? In that
 case S390 would need another bit KVM_CLOCK_SET_TO_HOST which we could call to
 sync host time when the preferred guest time value would otherwise violate
 the monotonic property of the KVM_CLOCK_FORWARD_ONLY flag.
 
 Signed-off-by: Jason J. Herne jjhe...@us.ibm.com
 ---
  Documentation/virtual/kvm/api.txt |  5 +++
  arch/s390/kvm/kvm-s390.c  | 80 
 +++
  include/uapi/linux/kvm.h  |  3 ++
  3 files changed, 88 insertions(+)
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index beae3fd..615c2e4 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -779,6 +779,11 @@ struct kvm_clock_data {
   __u32 pad[9];
  };
 
 +S390: KVM_CLOCK_FORWARD_ONLY is used by KVM_SET_CLOCK to indicate that the 
 guest
 +TOD clock should not be allowed to jump back in time. This flag guarantees a
 +monotonically increasing guest clock. If the clock value specified would 
 cause
 +the guest to jump back in time then the guest TOD clock is set to the host
 +TOD clock value.
 
  4.31 KVM_GET_VCPU_EVENTS
 
 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index 81b0e11..2450db3 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -31,6 +31,7 @@
  #include asm/switch_to.h
  #include asm/facility.h
  #include asm/sclp.h
 +#includeasm/timex.h
  #include kvm-s390.h
  #include gaccess.h
 
 @@ -169,6 +170,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   case KVM_CAP_S390_IRQCHIP:
   case KVM_CAP_VM_ATTRIBUTES:
   case KVM_CAP_MP_STATE:
 + case KVM_CAP_ADJUST_CLOCK:
   r = 1;
   break;
   case KVM_CAP_NR_VCPUS:
 @@ -338,6 +340,63 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct 
 kvm_device_attr *attr)
   return ret;
  }
 
 +static int kvm_s390_get_guest_tod(struct kvm *kvm, struct kvm_clock_data 
 *user_clock)
 +{
 + u64 current_host_tod;
 + u64 epoch = 0;
 + struct kvm_vcpu *vcpu;
 + unsigned int vcpu_idx;
 + int r;
 +
 + /* All vcpu's epochs are in sync. Just Grab the 1st one */
 + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm)
 + {
 + epoch = vcpu-arch.sie_block-epoch;
 + break;
 + }
 +
 + r = store_tod_clock(current_host_tod);
 + if (r)
 + return r;
 +
 + user_clock-clock = current_host_tod + epoch;
 + return 0;
 +}
 +
 +/*
 +Set the guest's effective TOD clock to the given value. The guest's
 +TOD clock is determined by the following formula: gtod = host_tod + epoch.
 +NOTE: Even though the epoch value is associated with a vcpu, there is only
 +one TOD clock and epoch value per guest.  All vcpu's epoch values must be 
 kept
 +synchronized.
 +NOTE: The KVM_CLOCK_FORWARD_ONLY flag is used to indicate that the guest 
 clock
 +should only be set to the provided value if doing so does not cause guest 
 time
 +to jump backwards. In this case we zero the epoch thereby making the guest 
 TOD
 +clock equal to the host TOD clock.
 +*/
 +static int kvm_s390_set_guest_tod(struct kvm *kvm, struct kvm_clock_data 
 *user_clock)
 +{
 + u64 current_host_tod, epoch;
 + struct kvm_vcpu *vcpu;
 + unsigned int

Re: [[RFC] KVM-S390: Provide guest TOD Clock Get/Set Controls

2014-09-22 Thread Christian Borntraeger

On 09/19/2014 10:38 PM, Alexander Graf wrote:
 
 
 On 19.09.14 20:51, Christian Borntraeger wrote:
 On 09/19/2014 04:19 PM, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@us.ibm.com

 Enable KVM_SET_CLOCK and KVM_GET_CLOCK Ioctls on S390 for managing guest TOD
 clock value.


 Just some education. On s390 the guest visible TOD clock is thehost TOD 
 clock + hypervisor programmable offset in the control block. There is only 
 one TOD per system, so the offset must be the same for every CPU.
 
 Can that offset be negative?

The offset is an u64, but the usual sum rules apply. The carry is irgnored and 
by using a large value you can have a negative offset.

 

 we add the KVM_CLOCK_FORWARD_ONLY flag to indicate to KVM_SET_CLOCK that the
 given clock value should only be set if it is = the current guest TOD clock
host TOD, 
 (right?)

 The alternative scheme would be to simply get/set the guest TOD time. This 
 works perfect for migration, but for managedsave the guest time is in the 
 past.
 Your approach has the advantange that after managedsave the guest will (most 
 of the time) have the host time of the target system, avoiding that the 
 guest has a time that is in the past (e.g. after 1 week managedsave the 
 guest would live in the past).
 
 But that's what users will expect, no? When you save an image in the
 past, it should resume at that very point in time.

Actually, I would expect something different (more or less something like 
standby/resume).

In fact Jasons code that we have internally in testing is doing the simple 
approach
1. source reads guest time at migration end
2. target sets guest time from source

So we have the guarantee that the time will never move backwards. It also works 
quite well for migration. As a bonus, we could really reuse the existing ioctl. 

I asked Jason to explore alternatives, though: I think it is somehow wrong, if 
you save a guest into an image file, open that one month later and the guest 
will always be 1 month behind unless it uses some kind of ntp. If everybody 
agrees that this is fine, I will queue up Jasons intial patch (simple get/set). 
The only question is then: shall we use an s390 specific ioctl (e.g. via VM 
attribute) or just use the existing KVM_SET_CLOCK.
But maybe lets answer the first question before we decide on this.

 
 Also I personally don't care whether the interface is delta to now or
 this is the time. In general, delta to now is safer because you
 can't possibly run back in time. But you also definitely want to check
 out the way PPC does it - it also accomodates for the time we spend
 inside the migration path itself.
 
 
 Alex
 

 Question for Paolo (maybe others) is. Does it make sense to reuse/extend the 
 existing ioctl (I think so, but defining a new one could also be ok)

 Christian



 value. This guarantees a monotonically increasing time.

 NOTE: In the event that the KVM_CLOCK_FORWARD_ONLY flag is set and the given
 time would cause the guest time to jump backward, then we set the guest TOD
 clock equal to the host TOD clock. Does this behavior make sense, or is it 
 too
 weird? I could believe that other architectures might not want this exact
 behavior. Instead they might prefer to implement the function such that an
 error code is returned instead of syncing the guest time to host time? In 
 that
 case S390 would need another bit KVM_CLOCK_SET_TO_HOST which we could call 
 to
 sync host time when the preferred guest time value would otherwise violate
 the monotonic property of the KVM_CLOCK_FORWARD_ONLY flag.

 Signed-off-by: Jason J. Herne jjhe...@us.ibm.com
 ---
  Documentation/virtual/kvm/api.txt |  5 +++
  arch/s390/kvm/kvm-s390.c  | 80 
 +++
  include/uapi/linux/kvm.h  |  3 ++
  3 files changed, 88 insertions(+)

 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index beae3fd..615c2e4 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -779,6 +779,11 @@ struct kvm_clock_data {
 __u32 pad[9];
  };

 +S390: KVM_CLOCK_FORWARD_ONLY is used by KVM_SET_CLOCK to indicate that the 
 guest
 +TOD clock should not be allowed to jump back in time. This flag guarantees 
 a
 +monotonically increasing guest clock. If the clock value specified would 
 cause
 +the guest to jump back in time then the guest TOD clock is set to the host
 +TOD clock value.

  4.31 KVM_GET_VCPU_EVENTS

 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index 81b0e11..2450db3 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -31,6 +31,7 @@
  #include asm/switch_to.h
  #include asm/facility.h
  #include asm/sclp.h
 +#includeasm/timex.h
  #include kvm-s390.h
  #include gaccess.h

 @@ -169,6 +170,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
 case KVM_CAP_S390_IRQCHIP:
 case

Re: [PATCH] kvm: don't take vcpu mutex for obviously invalid vcpu ioctls

2014-09-22 Thread Christian Borntraeger

On 09/22/2014 12:50 PM, Paolo Bonzini wrote:
 Il 20/09/2014 01:03, David Matlack ha scritto:
 vcpu ioctls can hang the calling thread if issued while a vcpu is
 running. If we know ioctl is going to be rejected as invalid anyway,
 we can fail before trying to take the vcpu mutex.

 This patch does not change functionality, it just makes invalid ioctls
 fail faster.

 Signed-off-by: David Matlack dmatl...@google.com
 ---
  virt/kvm/kvm_main.c | 4 
  1 file changed, 4 insertions(+)

 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 96ec622..f9234e5 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -52,6 +52,7 @@
  
  #include asm/processor.h
  #include asm/io.h
 +#include asm/ioctl.h
  #include asm/uaccess.h
  #include asm/pgtable.h
  
 @@ -1975,6 +1976,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
  if (vcpu-kvm-mm != current-mm)
  return -EIO;
  
 +if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
 +return -EINVAL;
 +
  #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
  /*
   * Special cases: vcpu ioctls that are asynchronous to vcpu execution,

 
 Thanks, applying this patch.

Isnt that the wrong trade off?

We now have an extra condition check for every valid ioctl, to make an error 
case go faster.
I know, the extra check is just a 1 or 2 cycles if branch prediction is right, 
but still.

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] blk-mq: Avoid race condition with uninitialized requests

2014-09-22 Thread Christian Borntraeger

On 09/18/2014 11:04 AM, David Hildenbrand wrote:
 This patch should fix the bug reported in https://lkml.org/lkml/2014/9/11/249.
 
 We have to initialize at least the atomic_flags and the cmd_flags when
 allocating storage for the requests.
 
 Otherwise blk_mq_timeout_check() might dereference uninitialized pointers when
 racing with the creation of a request.
 
 Also move the reset of cmd_flags for the initializing code to the point where 
 a
 request is freed. So we will never end up with pending flush request 
 indicators
 that might trigger dereferences of invalid pointers in blk_mq_timeout_check().
 
 Cc: sta...@vger.kernel.org
 Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com

Acked-by: Christian Borntraeger borntrae...@de.ibm.com

Can you please add
Reported-by: Paulo De Rezende Pinatti ppina...@linux.vnet.ibm.com
Tested-by: Paulo De Rezende Pinatti ppina...@linux.vnet.ibm.com

as Paulo did the testing work?

We thing this patch is fine and should go upstream.



Christian


 ---
  block/blk-mq.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/block/blk-mq.c b/block/blk-mq.c
 index 383ea0c..eed6340 100644
 --- a/block/blk-mq.c
 +++ b/block/blk-mq.c
 @@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, 
 int rw)
   if (tag != BLK_MQ_TAG_FAIL) {
   rq = data-hctx-tags-rqs[tag];
 
 - rq-cmd_flags = 0;
   if (blk_mq_tag_busy(data-hctx)) {
   rq-cmd_flags = REQ_MQ_INFLIGHT;
   atomic_inc(data-hctx-nr_active);
 @@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx 
 *hctx,
 
   if (rq-cmd_flags  REQ_MQ_INFLIGHT)
   atomic_dec(hctx-nr_active);
 + rq-cmd_flags = 0;
 
   clear_bit(REQ_ATOM_STARTED, rq-atomic_flags);
   blk_mq_put_tag(hctx, tag, ctx-last_tag);
 @@ -1404,6 +1404,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct 
 blk_mq_tag_set *set,
   left -= to_do * rq_size;
   for (j = 0; j  to_do; j++) {
   tags-rqs[i] = p;
 + tags-rqs[i]-atomic_flags = 0;
 + tags-rqs[i]-cmd_flags = 0;
   if (set-ops-init_request) {
   if (set-ops-init_request(set-driver_data,
   tags-rqs[i], hctx_idx, i,
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: don't take vcpu mutex for obviously invalid vcpu ioctls

2014-09-22 Thread Christian Borntraeger

On 09/22/2014 04:31 PM, Paolo Bonzini wrote:
 Il 22/09/2014 15:45, Christian Borntraeger ha scritto:
 We now have an extra condition check for every valid ioctl, to make an error 
 case go faster.
 I know, the extra check is just a 1 or 2 cycles if branch prediction is 
 right, but still.
 
 I applied the patch because the delay could be substantial,

I know, but only for seriously misbehaving userspace, no? See my comment is 
really a minor one - lets say 2 more cycles for something that exited to 
userspace - nobody would even notice. I am just disturbed by the fact that we 
care about something that is not slow-path but broken beyond repair (why does 
userspace call a non-KVM ioctl on a fd of a vcpu from a different thread 
(otherwise the mutex would be free)?

Please, can we have an explanation, e.g. something like
while using trinity to fuzz KVM, we noticed long stalls on invalid ioctls. 
Lets bail out early on invalid ioctls. or similar?


 depending on what the other VCPU is doing.
 Perhaps something like this would be
 better?
 
 (Untested, but Tested-by/Reviewed-bys are welcome).

Given that it makes sense to improve a misbehaving userspace, I prefer Davids 
variant as the patch is smaller and easier to get right. No need to revert, but 
a better explanation for the use case would be appeciated.

Christian 
 
 Paolo
 
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 84e24b210273..ed31760d79fe 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -117,12 +117,10 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
  /*
   * Switches to specified vcpu, until a matching vcpu_put()
   */
 -int vcpu_load(struct kvm_vcpu *vcpu)
 +static void __vcpu_load(struct kvm_vcpu *vcpu)
  {
   int cpu;
 
 - if (mutex_lock_killable(vcpu-mutex))
 - return -EINTR;
   if (unlikely(vcpu-pid != current-pids[PIDTYPE_PID].pid)) {
   /* The thread running this VCPU changed. */
   struct pid *oldpid = vcpu-pid;
 @@ -136,6 +134,14 @@ int vcpu_load(struct kvm_vcpu *vcpu)
   preempt_notifier_register(vcpu-preempt_notifier);
   kvm_arch_vcpu_load(vcpu, cpu);
   put_cpu();
 +}
 +
 +int vcpu_load(struct kvm_vcpu *vcpu)
 +{
 + if (mutex_lock_killable(vcpu-mutex))
 + return -EINTR;
 +
 + __vcpu_load(vcpu);
   return 0;
  }
 
 @@ -1989,9 +1995,6 @@ static long kvm_vcpu_ioctl(struct file *filp,
   if (vcpu-kvm-mm != current-mm)
   return -EIO;
 
 - if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
 - return -EINVAL;
 -
  #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
   /*
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
 @@ -2001,8 +2004,21 @@ static long kvm_vcpu_ioctl(struct file *filp,
   return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
  #endif
 
 + if (!mutex_trylock(vcpu-mutex))) {
 + /*
 +  * Before a potentially long sleep, check if we'd exit anyway.
 +  * The common case is for the mutex not to be contended, when
 +  * this does not add overhead.
 +  */
 + if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
 + return -EINVAL;
 +
 + if (mutex_lock_killable(vcpu-mutex))
 + return -EINTR;
 + }
 +
 
 - r = vcpu_load(vcpu);
 + r = __vcpu_load(vcpu);
   if (r)
   return r;
   switch (ioctl) {
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: don't take vcpu mutex for obviously invalid vcpu ioctls

2014-09-23 Thread Christian Borntraeger

On 09/23/2014 08:49 AM, Gleb Natapov wrote:
 On Mon, Sep 22, 2014 at 09:29:19PM +0200, Paolo Bonzini wrote:
 Il 22/09/2014 21:20, Christian Borntraeger ha scritto:
 while using trinity to fuzz KVM, we noticed long stalls on invalid ioctls. 
 Lets bail out early on invalid ioctls. or similar?

 Okay.  David, can you explain how you found it so that I can make up my
 mind?

 Gleb and Marcelo, a fourth and fifth opinion? :)

 I agree with Christian that simpler fix is better here.
 The overhead is minimal. If we ever notice this overhead
 we can revert the patch all together since the problem it
 fixes can only be inflicted on userspace by itself and there
 are myriads other ways userspace can hurt itself.


Yes. Davids explanation also makes sense as a commit message. Paolo, if you use 
David patch with a better description of the why I am fine with this patch.

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/2] KVM: s390: Small enhancements for 3.18 (next)

2014-10-01 Thread Christian Borntraeger

Paolo,

let's use the additional week that Linus gave us for growing next by
adding two small enhancements.

The following changes since commit cec26bc3c125b5dd12a02f04133cd91eae3f1622:

  KVM: PPC: BOOK3S: HV: CMA: Reserve cma region only in hypervisor mode 
(2014-09-29 15:36:33 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20141001

for you to fetch changes up to ce2e4f0b75a567d25375b52476662c724304e476:

  KVM: s390: count vcpu wakeups in stat.halt_wakeup (2014-10-01 14:42:14 +0200)


KVM: s390: Small enhancements for 3.18 (next)

Provide TOD CLOCK steering to the guest and add cpu wakeup counter.


Christian Borntraeger (1):
  KVM: s390/facilities: allow TOD-CLOCK steering facility bit

David Hildenbrand (1):
  KVM: s390: count vcpu wakeups in stat.halt_wakeup

 arch/s390/include/asm/kvm_host.h | 1 +
 arch/s390/kvm/interrupt.c| 1 +
 arch/s390/kvm/kvm-s390.c | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 2/2] KVM: s390: count vcpu wakeups in stat.halt_wakeup

2014-10-01 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

This patch introduces the halt_wakeup counter used by common code and uses it to
count vcpu wakeups done in s390 arch specific code.

Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h | 1 +
 arch/s390/kvm/interrupt.c| 1 +
 arch/s390/kvm/kvm-s390.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1a6f6fd..2175f911 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -192,6 +192,7 @@ struct kvm_vcpu_stat {
u32 exit_stop_request;
u32 exit_validity;
u32 exit_instruction;
+   u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4cad00a..a398384 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -626,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 */
vcpu-preempted = true;
wake_up_interruptible(vcpu-wq);
+   vcpu-stat.halt_wakeup++;
}
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d5aa88..55aade4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ exit_instruction, VCPU_STAT(exit_instruction) },
{ exit_program_interruption, VCPU_STAT(exit_program_interruption) },
{ exit_instr_and_program_int, VCPU_STAT(exit_instr_and_program) },
+   { halt_wakeup, VCPU_STAT(halt_wakeup) },
{ instruction_lctlg, VCPU_STAT(instruction_lctlg) },
{ instruction_lctl, VCPU_STAT(instruction_lctl) },
{ instruction_stctl, VCPU_STAT(instruction_stctl) },
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 1/2] KVM: s390/facilities: allow TOD-CLOCK steering facility bit

2014-10-01 Thread Christian Borntraeger

There is nothing to do for KVM to support TOD-CLOCK steering.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 56a411c..0d5aa88 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1786,7 +1786,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
-   vfacilities[0] = 0xff82fff3f4fc2000UL;
+   vfacilities[0] = 0xff82fffbf47c2000UL;
vfacilities[1] = 0x005cUL;
return 0;
 }
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PULL 1/2] KVM: s390/facilities: allow TOD-CLOCK steering facility bit

2014-10-01 Thread Christian Borntraeger

On 10/01/2014 04:17 PM, Alexander Graf wrote:
 
 
 On 01.10.14 16:02, Christian Borntraeger wrote:
 There is nothing to do for KVM to support TOD-CLOCK steering.

 Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
 ---
  arch/s390/kvm/kvm-s390.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index 56a411c..0d5aa88 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -1786,7 +1786,7 @@ static int __init kvm_s390_init(void)
  return -ENOMEM;
  }
  memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
 -vfacilities[0] = 0xff82fff3f4fc2000UL;
 +vfacilities[0] = 0xff82fffbf47c2000UL;
 
 Can we please convert this into something readable soon? :)

It will be sooner when you send patches ;-)
The facility numbers are documented in the POP (chapter 4 last page) in IBM 
notation (bit0 is the MSB)
It probably makes sense to do this for the non-KVM part as well. When you grep
for test_facility under arch/s390 there are lots of numerical value.

Hmm, maybe we can find somebody that wants to increase the patch counter?

Christian


 
 
 Alex
 
  vfacilities[1] = 0x005cUL;
  return 0;
  }

 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PULL 1/2] KVM: s390/facilities: allow TOD-CLOCK steering facility bit

2014-10-02 Thread Christian Borntraeger

Am 02.10.2014 10:22, schrieb Heiko Carstens:
 On Wed, Oct 01, 2014 at 08:27:38PM +0200, Christian Borntraeger wrote:
 On 10/01/2014 04:17 PM, Alexander Graf wrote:


 On 01.10.14 16:02, Christian Borntraeger wrote:
 There is nothing to do for KVM to support TOD-CLOCK steering.

 Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
 ---
  arch/s390/kvm/kvm-s390.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index 56a411c..0d5aa88 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -1786,7 +1786,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
 -  vfacilities[0] = 0xff82fff3f4fc2000UL;
 +  vfacilities[0] = 0xff82fffbf47c2000UL;

 Can we please convert this into something readable soon? :)

 It will be sooner when you send patches ;-)
 The facility numbers are documented in the POP (chapter 4 last page) in
 IBM notation (bit0 is the MSB)
 It probably makes sense to do this for the non-KVM part as well. When you 
 grep
 for test_facility under arch/s390 there are lots of numerical value.
 
 These numbers _are_ a wart and were the source of a couple of bugs e.g.
 in our ALS code already.
 However converting these bitfields to something readable doesn't seem
 to be easy, since I'd like to have variable size array initializers which
 set the bits depening on the symbolic name (e.g. set bits 19, 20 and 139
 and automatically choose the correct size of the array):
 ..something like INIT_FACILITY_ARRAY(FAC19, FAC20, FAC139)
 
 And of course this should work for asm code as well.
 
 Hmm, maybe we can find somebody that wants to increase the patch counter?
 
 If you think this is trivial, please send a patch which does this.
 

Unfortunately its not.

Well a simple
#define KVM_FAC0 = FAC_N3 | FAC_ZARCH | .
would probably trivial (but error prone). Doing it for the whole tree 
(including head.S) will be harder. 
Especially the initial design approach is not trivial (where to put macros, how 
they look like...). I think we would have to come up with a skeleton and then 
the replacements itself might be trivial. For KVM we probably want to
defer that until we have Michaels CPU model support ready, though.

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [[RFC] KVM-S390: Provide guest TOD Clock Get/Set Controls

2014-10-09 Thread Christian Borntraeger

Am 08.10.2014 16:55, schrieb Alexander Graf:
 
 
 On 08.10.14 16:09, Jason Herne wrote:
 Christian Borntraeger borntrae...@de.ibm.com wrote on 09/22/2014
 05:08:34 AM:
 ...
 Actually, I would expect something different (more or less something
 like standby/resume).

 In fact Jasons code that we have internally in testing is doing the
 simple approach
 1. source reads guest time at migration end
 2. target sets guest time from source

 So we have the guarantee that the time will never move backwards. It
 also works quite well for migration. As a bonus, we could really
 reuse the existing ioctl.

 I asked Jason to explore alternatives, though: I think it is somehow
 wrong, if you save a guest into an image file, open that one month
 later and the guest will always be 1 month behind unless it uses
 some kind of ntp. If everybody agrees that this is fine, I will
 queue up Jasons intial patch (simple get/set).
 The only question is then: shall we use an s390 specific ioctl (e.g.
 via VM attribute) or just use the existing KVM_SET_CLOCK.
 But maybe lets answer the first question before we decide on this.

 Ping. Does anyone feel strongly about this issue? I'm interested in
 opinions so we can get s390 TOD clock migration working :).

 We need to decide which interface to use, s390 specific ioctl or
 KVM_SET_CLOCK.
 
 I don't have any particular preference. If anything, I'm leaning towards
 KVM_SET_CLOCK.
 
 Then we need to decide if we're going to snap a guest clock forward
 on the resume of a suspend to disk type operation. The alternative
 is to fix up the guest TOD value such that the guest notices no
 change of time, which as Christian points out, seems wrong. Unless we
 really want to show no time change and force the guest to use ntp to
 figure out that he is behind.
 
 Just do it the same as x86 :).

Yes, that is usally always the right thing to do with Linux :-)

Jason, can you post the minimal patch that used SET_CLOCK/GET_CLOCK to set/get 
the bits 0-63 of the TOD? (also apply it internally so that we can test it for 
some days. Its too late for this merge window anyway.)

If we want some different scheme, we can certainly discuss extension via the 
flags (and pad) in the future. So this interface is certainly not a dead end if 
we need more

I have 2 possible extension in mind:
1. the thing that we discussed, lets see if we need a fix or not
2. making KVM on s390x ready for 2042 and beyond (there is no architecture yet 
but STCKE stores a byte of zeroes to the left of the TOD clock value. So I 
guess if this is extended somewhen we might want an additional flag plus a 
maximum of 1 additional byte. There is plenty of pad space so this is fine

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: A question about HTL VM-Exit handling time

2014-10-28 Thread Christian Borntraeger

Am 21.10.2014 20:39, schrieb Paolo Bonzini:
 
 
 On 10/16/2014 10:15 AM, Wu, Feng wrote:
 Hi folks,

 I run kernel build in the guest and use perf kvm to get some VM-Exit result 
 as the following:

 Analyze events for all VCPUs:

  VM-EXITSamples  Samples% Time%   Min Time   Max Time
  A

MSR_WRITE361390857.53%18.97%5us 1362us
   9.73
  HLT139974722.28%74.90%5us   432448us
  99.24
CR_ACCESS 96120315.30% 3.28%4us  188us
   6.33
   EXTERNAL_INTERRUPT 213821 3.40% 2.25%4us 4089us
  19.54
EXCEPTION_NMI  25152 0.40% 0.12%4us   71us
   9.05
EPT_MISCONFIG  20104 0.32% 0.15%8us 5628us
  13.74
CPUID  19904 0.32% 0.07%4us  220us
   6.90
   IO_INSTRUCTION  17097 0.27% 0.20%   13us 1008us
  22.08
PAUSE_INSTRUCTION  10737 0.17% 0.05%4us   53us
   8.33
 MSR_READ 48 0.00% 0.00%4us8us
   5.62

 Total Samples:6281721, Total events handled time:185457820.41us.

 I also do some other experiments with different workload in the guest, I got 
 the same results in terms of
 HLT VM-Exit handling time. Does anyone know why the handling time for HLT 
 VM-Exit is so high? Appreciate
 You help!
 
 432 ms sounds like a lot, but in general it is expected that HLT vmexits
 take a long time.  After an HLT vmexit, the VCPU will not be reentered
 until the next interrupt comes.  On hardware, the HLT instruction can
 also take many milliseconds.
 
 If this is an SMP guest, it's possible that the maximum time is
 registered on the APs before Linux boots.  With a UP guest I would
 expect a shorter maximum time, but still longer than other vmexits.

We have the same on s390 with wait state. The thing is, with an idle system and 
NOHZ the time spend in HLT/wait could be really long. So We might want to 
provide an option to filter this out.
(A similar filter already exitis for the --duration option)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/9] KVM: s390: Fixes and cleanups for kvm/next (3.19)

2014-10-28 Thread Christian Borntraeger

Paolo,

the first bunch on s390 change for next:

The following changes since commit cac7f2429872d3733dc3f9915857b1691da2eb2f:

  Linux 3.18-rc2 (2014-10-26 16:48:41 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20141028

for you to fetch changes up to a6cc3108567e0adc06c4a8031186f84ad1e1e194:

  KVM: s390: sigp: split handling of SIGP STOP (AND STORE STATUS) (2014-10-28 
13:09:14 +0100)


KVM: s390: Fixes and cleanups

1. A small fix regarding program check handling (cc stable as it
   overwrites the wrong guest memory)
2. Improve the ipte interlock scalability for older hardware
3. current-mm to mm cleanup (currently a no-op)
4. several SIGP rework patches (more to come)


David Hildenbrand (6):
  KVM: s390: sigp: dispatch orders with one target in a separate function
  KVM: s390: sigp: move target cpu checks into dispatcher
  KVM: s390: sigp: separate preparation handlers
  KVM: s390: sigp: instruction counters for all sigp orders
  KVM: s390: sigp: inject emergency calls in a separate function
  KVM: s390: sigp: split handling of SIGP STOP (AND STORE STATUS)

Jason J. Herne (1):
  KVM: s390: Cleanup usage of current-mm in set_guest_storage_key

Thomas Huth (2):
  KVM: s390: Make the simple ipte mutex specific to a VM instead of global
  KVM: s390: Fix size of monitor-class number field

 arch/s390/include/asm/kvm_host.h |   9 ++
 arch/s390/include/asm/sigp.h |   1 +
 arch/s390/kvm/gaccess.c  |  20 ++-
 arch/s390/kvm/interrupt.c|   2 +-
 arch/s390/kvm/kvm-s390.c |   8 ++
 arch/s390/kvm/sigp.c | 269 ---
 arch/s390/mm/pgtable.c   |   2 +-
 7 files changed, 168 insertions(+), 143 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 2/9] KVM: s390: Cleanup usage of current-mm in set_guest_storage_key

2014-10-28 Thread Christian Borntraeger

From: Jason J. Herne jjhe...@us.ibm.com

In set_guest_storage_key, we really want to reference the mm struct given as
a parameter to the function. So replace the current-mm reference with the
mm struct passed in by the caller.

Signed-off-by: Jason J. Herne jjhe...@us.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/mm/pgtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1b79ca6..cfecc24 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -936,7 +936,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned 
long addr,
 
down_read(mm-mmap_sem);
 retry:
-   ptep = get_locked_pte(current-mm, addr, ptl);
+   ptep = get_locked_pte(mm, addr, ptl);
if (unlikely(!ptep)) {
up_read(mm-mmap_sem);
return -EFAULT;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 8/9] KVM: s390: sigp: inject emergency calls in a separate function

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

In preparation for further code changes, this patch moves the injection of
emergency calls into a separate function and uses it for the processing of
SIGP EMERGENCY CALL and SIGP CONDITIONAL EMERGENCY CALL.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/sigp.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9ee63e4..1b330d4 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -46,7 +46,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct 
kvm_vcpu *dst_vcpu,
return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+   struct kvm_vcpu *dst_vcpu)
 {
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_EMERGENCY,
@@ -62,6 +63,11 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, struct 
kvm_vcpu *dst_vcpu)
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+   return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
 static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu,
u16 asn, u64 *reg)
@@ -76,12 +82,12 @@ static int __sigp_conditional_emergency(struct kvm_vcpu 
*vcpu,
p_asn = dst_vcpu-arch.sie_block-gcr[4]  0x;  /* Primary ASN */
s_asn = dst_vcpu-arch.sie_block-gcr[3]  0x;  /* Secondary ASN */
 
-   /* Deliver the emergency signal? */
+   /* Inject the emergency signal? */
if (!(flags  CPUSTAT_STOPPED)
|| (psw-mask  psw_int_mask) != psw_int_mask
|| ((flags  CPUSTAT_WAIT)  psw-addr != 0)
|| (!(flags  CPUSTAT_WAIT)  (asn == p_asn || asn == s_asn))) {
-   return __sigp_emergency(vcpu, dst_vcpu);
+   return __inject_sigp_emergency(vcpu, dst_vcpu);
} else {
*reg = 0xUL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 7/9] KVM: s390: sigp: instruction counters for all sigp orders

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

This patch introduces instruction counters for all known sigp orders and also a
separate one for unknown orders that are passed to user space.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h | 7 +++
 arch/s390/kvm/kvm-s390.c | 7 +++
 arch/s390/kvm/sigp.c | 8 +++-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 584b820..7e02d77 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
+   u32 instruction_sigp_cond_emergency;
+   u32 instruction_sigp_start;
u32 instruction_sigp_stop;
+   u32 instruction_sigp_stop_store_status;
+   u32 instruction_sigp_store_status;
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
+   u32 instruction_sigp_init_cpu_reset;
+   u32 instruction_sigp_cpu_reset;
+   u32 instruction_sigp_unknown;
u32 diagnose_10;
u32 diagnose_44;
u32 diagnose_9c;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3e83d4b..06878bd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ instruction_sigp_sense_running, 
VCPU_STAT(instruction_sigp_sense_running) },
{ instruction_sigp_external_call, 
VCPU_STAT(instruction_sigp_external_call) },
{ instruction_sigp_emergency, VCPU_STAT(instruction_sigp_emergency) },
+   { instruction_sigp_cond_emergency, 
VCPU_STAT(instruction_sigp_cond_emergency) },
+   { instruction_sigp_start, VCPU_STAT(instruction_sigp_start) },
{ instruction_sigp_stop, VCPU_STAT(instruction_sigp_stop) },
+   { instruction_sigp_stop_store_status, 
VCPU_STAT(instruction_sigp_stop_store_status) },
+   { instruction_sigp_store_status, 
VCPU_STAT(instruction_sigp_store_status) },
{ instruction_sigp_set_arch, VCPU_STAT(instruction_sigp_arch) },
{ instruction_sigp_set_prefix, VCPU_STAT(instruction_sigp_prefix) },
{ instruction_sigp_restart, VCPU_STAT(instruction_sigp_restart) },
+   { instruction_sigp_cpu_reset, VCPU_STAT(instruction_sigp_cpu_reset) },
+   { instruction_sigp_init_cpu_reset, 
VCPU_STAT(instruction_sigp_init_cpu_reset) },
+   { instruction_sigp_unknown, VCPU_STAT(instruction_sigp_unknown) },
{ diagnose_10, VCPU_STAT(diagnose_10) },
{ diagnose_44, VCPU_STAT(diagnose_44) },
{ diagnose_9c, VCPU_STAT(diagnose_9c) },
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index a9e1739..9ee63e4 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -344,11 +344,12 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
rc = __sigp_stop(vcpu, dst_vcpu, ACTION_STOP_ON_STOP);
break;
case SIGP_STOP_AND_STORE_STATUS:
-   vcpu-stat.instruction_sigp_stop++;
+   vcpu-stat.instruction_sigp_stop_store_status++;
rc = __sigp_stop(vcpu, dst_vcpu, ACTION_STORE_ON_STOP |
 ACTION_STOP_ON_STOP);
break;
case SIGP_STORE_STATUS_AT_ADDRESS:
+   vcpu-stat.instruction_sigp_store_status++;
rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
 status_reg);
break;
@@ -357,6 +358,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
break;
case SIGP_COND_EMERGENCY_SIGNAL:
+   vcpu-stat.instruction_sigp_cond_emergency++;
rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
  status_reg);
break;
@@ -365,6 +367,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
break;
case SIGP_START:
+   vcpu-stat.instruction_sigp_start++;
rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
break;
case SIGP_RESTART:
@@ -372,12 +375,15 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
break;
case SIGP_INITIAL_CPU_RESET:
+   vcpu-stat.instruction_sigp_init_cpu_reset++;
rc

[GIT PULL 1/9] KVM: s390: Make the simple ipte mutex specific to a VM instead of global

2014-10-28 Thread Christian Borntraeger

From: Thomas Huth th...@linux.vnet.ibm.com

The ipte-locking should be done for each VM seperately, not globally.
This way we avoid possible congestions when the simple ipte-lock is used
and multiple VMs are running.

Suggested-by: Heiko Carstens heiko.carst...@de.ibm.com
Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Acked-by: Heiko Carstens heiko.carst...@de.ibm.com
Reviewed-by: Christian Borntraeger borntrae...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h |  2 ++
 arch/s390/kvm/gaccess.c  | 20 +---
 arch/s390/kvm/kvm-s390.c |  1 +
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911..584b820 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -434,6 +434,8 @@ struct kvm_arch{
int user_cpu_state_ctrl;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
+   int ipte_lock_count;
+   struct mutex ipte_mutex;
spinlock_t start_stop_lock;
struct kvm_s390_crypto crypto;
 };
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1..c1424e8 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,16 +214,16 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
 
if (vcpu-arch.sie_block-eca  1)
return ic-kh != 0;
-   return ipte_lock_count != 0;
+   return vcpu-kvm-arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
union ipte_control old, new, *ic;
 
-   mutex_lock(ipte_mutex);
-   ipte_lock_count++;
-   if (ipte_lock_count  1)
+   mutex_lock(vcpu-kvm-arch.ipte_mutex);
+   vcpu-kvm-arch.ipte_lock_count++;
+   if (vcpu-kvm-arch.ipte_lock_count  1)
goto out;
ic = vcpu-kvm-arch.sca-ipte_control;
do {
@@ -238,16 +236,16 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
new.k = 1;
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
 out:
-   mutex_unlock(ipte_mutex);
+   mutex_unlock(vcpu-kvm-arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
union ipte_control old, new, *ic;
 
-   mutex_lock(ipte_mutex);
-   ipte_lock_count--;
-   if (ipte_lock_count)
+   mutex_lock(vcpu-kvm-arch.ipte_mutex);
+   vcpu-kvm-arch.ipte_lock_count--;
+   if (vcpu-kvm-arch.ipte_lock_count)
goto out;
ic = vcpu-kvm-arch.sca-ipte_control;
do {
@@ -256,7 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
wake_up(vcpu-kvm-arch.ipte_wq);
 out:
-   mutex_unlock(ipte_mutex);
+   mutex_unlock(vcpu-kvm-arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 55aade4..3e83d4b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -453,6 +453,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(kvm-arch.float_int.lock);
INIT_LIST_HEAD(kvm-arch.float_int.list);
init_waitqueue_head(kvm-arch.ipte_wq);
+   mutex_init(kvm-arch.ipte_mutex);
 
debug_register_view(kvm-arch.dbf, debug_sprintf_view);
VM_EVENT(kvm, 3, %s, vm created);
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 9/9] KVM: s390: sigp: split handling of SIGP STOP (AND STORE STATUS)

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

In preparation for further code changes (e.g. getting rid of action_flags),
this patch splits the handling of the two sigp orders SIGP STOP and SIGP STOP
AND STORE STATUS by introducing a separate handler function for SIGP STOP AND
STORE STATUS.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/sigp.c | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1b330d4..f7cd3f7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -147,15 +147,27 @@ out:
return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
-  int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
int rc;
 
-   rc = __inject_sigp_stop(dst_vcpu, action);
+   rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
VCPU_EVENT(vcpu, 4, sent sigp stop to cpu %x, dst_vcpu-vcpu_id);
 
-   if ((action  ACTION_STORE_ON_STOP) != 0  rc == -ESHUTDOWN) {
+   return rc;
+}
+
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+   struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+   int rc;
+
+   rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+ ACTION_STORE_ON_STOP);
+   VCPU_EVENT(vcpu, 4, sent sigp stop and store status to cpu %x,
+  dst_vcpu-vcpu_id);
+
+   if (rc == -ESHUTDOWN) {
/* If the CPU has already been stopped, we still have
 * to save the status when doing stop-and-store. This
 * has to be done after unlocking all spinlocks. */
@@ -347,12 +359,11 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
break;
case SIGP_STOP:
vcpu-stat.instruction_sigp_stop++;
-   rc = __sigp_stop(vcpu, dst_vcpu, ACTION_STOP_ON_STOP);
+   rc = __sigp_stop(vcpu, dst_vcpu);
break;
case SIGP_STOP_AND_STORE_STATUS:
vcpu-stat.instruction_sigp_stop_store_status++;
-   rc = __sigp_stop(vcpu, dst_vcpu, ACTION_STORE_ON_STOP |
-ACTION_STOP_ON_STOP);
+   rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
break;
case SIGP_STORE_STATUS_AT_ADDRESS:
vcpu-stat.instruction_sigp_store_status++;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 4/9] KVM: s390: sigp: dispatch orders with one target in a separate function

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

All sigp orders except SIGP SET ARCHITECTURE target exactly one vcpu.

Let's move the dispatch code for these orders into a separate function to
prepare for cleaner target availability checks.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/sigp.c | 74 ++--
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba..5e259bd 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -349,32 +349,15 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 
cpu_addr)
return rc;
 }
 
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+  u16 cpu_addr, u32 parameter, u64 *status_reg)
 {
-   int r1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
-   int r3 = vcpu-arch.sie_block-ipa  0x000f;
-   u32 parameter;
-   u16 cpu_addr = vcpu-run-s.regs.gprs[r3];
-   u8 order_code;
int rc;
 
-   /* sigp in userspace can exit */
-   if (vcpu-arch.sie_block-gpsw.mask  PSW_MASK_PSTATE)
-   return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
-   order_code = kvm_s390_get_base_disp_rs(vcpu);
-
-   if (r1 % 2)
-   parameter = vcpu-run-s.regs.gprs[r1];
-   else
-   parameter = vcpu-run-s.regs.gprs[r1 + 1];
-
-   trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
switch (order_code) {
case SIGP_SENSE:
vcpu-stat.instruction_sigp_sense++;
-   rc = __sigp_sense(vcpu, cpu_addr,
- vcpu-run-s.regs.gprs[r1]);
+   rc = __sigp_sense(vcpu, cpu_addr, status_reg);
break;
case SIGP_EXTERNAL_CALL:
vcpu-stat.instruction_sigp_external_call++;
@@ -395,25 +378,19 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
break;
case SIGP_STORE_STATUS_AT_ADDRESS:
rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-vcpu-run-s.regs.gprs[r1]);
-   break;
-   case SIGP_SET_ARCHITECTURE:
-   vcpu-stat.instruction_sigp_arch++;
-   rc = __sigp_set_arch(vcpu, parameter);
+status_reg);
break;
case SIGP_SET_PREFIX:
vcpu-stat.instruction_sigp_prefix++;
-   rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-  vcpu-run-s.regs.gprs[r1]);
+   rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, status_reg);
break;
case SIGP_COND_EMERGENCY_SIGNAL:
rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
- vcpu-run-s.regs.gprs[r1]);
+ status_reg);
break;
case SIGP_SENSE_RUNNING:
vcpu-stat.instruction_sigp_sense_running++;
-   rc = __sigp_sense_running(vcpu, cpu_addr,
- vcpu-run-s.regs.gprs[r1]);
+   rc = __sigp_sense_running(vcpu, cpu_addr, status_reg);
break;
case SIGP_START:
rc = sigp_check_callable(vcpu, cpu_addr);
@@ -432,7 +409,42 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
}
break;
default:
-   return -EOPNOTSUPP;
+   rc = -EOPNOTSUPP;
+   }
+
+   return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+   int r1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
+   int r3 = vcpu-arch.sie_block-ipa  0x000f;
+   u32 parameter;
+   u16 cpu_addr = vcpu-run-s.regs.gprs[r3];
+   u8 order_code;
+   int rc;
+
+   /* sigp in userspace can exit */
+   if (vcpu-arch.sie_block-gpsw.mask  PSW_MASK_PSTATE)
+   return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+   order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+   if (r1 % 2)
+   parameter = vcpu-run-s.regs.gprs[r1];
+   else
+   parameter = vcpu-run-s.regs.gprs[r1 + 1];
+
+   trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+   switch (order_code) {
+   case SIGP_SET_ARCHITECTURE:
+   vcpu-stat.instruction_sigp_arch++;
+   rc = __sigp_set_arch(vcpu, parameter);
+   break;
+   default:
+   rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+parameter,
+vcpu-run-s.regs.gprs[r1]);
}
 
if (rc  0)
-- 
1.9.3

[GIT PULL 3/9] KVM: s390: Fix size of monitor-class number field

2014-10-28 Thread Christian Borntraeger

From: Thomas Huth th...@linux.vnet.ibm.com

The monitor-class number field is only 16 bits, so we have to use
a u16 pointer to access it.

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
CC: sta...@vger.kernel.org # v3.16+
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a398384..4fc3fed 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -270,7 +270,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu 
*vcpu,
break;
case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info-mon_class_nr,
- (u64 *)__LC_MON_CLASS_NR);
+ (u16 *)__LC_MON_CLASS_NR);
rc |= put_guest_lc(vcpu, pgm_info-mon_code,
   (u64 *)__LC_MON_CODE);
break;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 6/9] KVM: s390: sigp: separate preparation handlers

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

This patch introduces in preparation for further code changes separate handler
functions for:
- SIGP (RE)START - will not be allowed to terminate pending orders
- SIGP (INITIAL) CPU RESET - will be allowed to terminate certain pending orders
- unknown sigp orders

All sigp orders that require user space intervention are logged.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/sigp.h |  1 +
 arch/s390/kvm/sigp.c | 47 ++--
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 4957611..fad4ae2 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
 #define SIGP_RESTART 6
 #define SIGP_STOP_AND_STORE_STATUS9
 #define SIGP_INITIAL_CPU_RESET  11
+#define SIGP_CPU_RESET  12
 #define SIGP_SET_PREFIX 13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE   18
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 660a945..a9e1739 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -284,11 +284,12 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, struct kvm_vcpu 
*dst_vcpu)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+  struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
struct kvm_s390_local_interrupt *li = dst_vcpu-arch.local_int;
-   int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+   /* handle (RE)START in user space */
+   int rc = -EOPNOTSUPP;
 
spin_lock(li-lock);
if (li-action_bits  ACTION_STOP_ON_STOP)
@@ -298,6 +299,20 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, 
struct kvm_vcpu *dst_vcpu)
return rc;
 }
 
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+   struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+   /* handle (INITIAL) CPU RESET in user space */
+   return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
+{
+   /* handle unknown orders in user space */
+   return -EOPNOTSUPP;
+}
+
 static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
   u16 cpu_addr, u32 parameter, u64 *status_reg)
 {
@@ -350,25 +365,27 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 
order_code,
rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
break;
case SIGP_START:
-   rc = sigp_check_callable(vcpu, dst_vcpu);
-   if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-   rc = -EOPNOTSUPP;/* Handle START in user space */
+   rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
break;
case SIGP_RESTART:
vcpu-stat.instruction_sigp_restart++;
-   rc = sigp_check_callable(vcpu, dst_vcpu);
-   if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-   VCPU_EVENT(vcpu, 4,
-  sigp restart %x to handle userspace,
-  cpu_addr);
-   /* user space must know about restart */
-   rc = -EOPNOTSUPP;
-   }
+   rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+   break;
+   case SIGP_INITIAL_CPU_RESET:
+   rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+   break;
+   case SIGP_CPU_RESET:
+   rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
break;
default:
-   rc = -EOPNOTSUPP;
+   rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
}
 
+   if (rc == -EOPNOTSUPP)
+   VCPU_EVENT(vcpu, 4,
+  sigp order %u - cpu %x: handled in user space,
+  order_code, dst_vcpu-vcpu_id);
+
return rc;
 }
 
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 5/9] KVM: s390: sigp: move target cpu checks into dispatcher

2014-10-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

All sigp orders targeting one VCPU have to verify that the target is valid and
available.

Let's move the check from the single functions to the dispatcher. The
destination VCPU is directly passed as a pointer - instead of the cpu address of
the target.

Please note that all SIGP orders except SIGP SET ARCHITECTURE - even unknown
ones - will now check for the availability of the target VCPU. This is what the
architecture documentation specifies.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/sigp.c | 139 ++-
 1 file changed, 48 insertions(+), 91 deletions(-)

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 5e259bd..660a945 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
 #include kvm-s390.h
 #include trace.h
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
u64 *reg)
 {
struct kvm_s390_local_interrupt *li;
-   struct kvm_vcpu *dst_vcpu = NULL;
int cpuflags;
int rc;
 
-   if (cpu_addr = KVM_MAX_VCPUS)
-   return SIGP_CC_NOT_OPERATIONAL;
-
-   dst_vcpu = kvm_get_vcpu(vcpu-kvm, cpu_addr);
-   if (!dst_vcpu)
-   return SIGP_CC_NOT_OPERATIONAL;
li = dst_vcpu-arch.local_int;
 
cpuflags = atomic_read(li-cpuflags);
@@ -48,44 +41,36 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
rc = SIGP_CC_STATUS_STORED;
}
 
-   VCPU_EVENT(vcpu, 4, sensed status of cpu %x rc %x, cpu_addr, rc);
+   VCPU_EVENT(vcpu, 4, sensed status of cpu %x rc %x, dst_vcpu-vcpu_id,
+  rc);
return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_EMERGENCY,
.parm = vcpu-vcpu_id,
};
-   struct kvm_vcpu *dst_vcpu = NULL;
int rc = 0;
 
-   if (cpu_addr  KVM_MAX_VCPUS)
-   dst_vcpu = kvm_get_vcpu(vcpu-kvm, cpu_addr);
-   if (!dst_vcpu)
-   return SIGP_CC_NOT_OPERATIONAL;
-
rc = kvm_s390_inject_vcpu(dst_vcpu, s390int);
if (!rc)
-   VCPU_EVENT(vcpu, 4, sent sigp emerg to cpu %x, cpu_addr);
+   VCPU_EVENT(vcpu, 4, sent sigp emerg to cpu %x,
+  dst_vcpu-vcpu_id);
 
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+   struct kvm_vcpu *dst_vcpu,
u16 asn, u64 *reg)
 {
-   struct kvm_vcpu *dst_vcpu = NULL;
const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
u16 p_asn, s_asn;
psw_t *psw;
u32 flags;
 
-   if (cpu_addr  KVM_MAX_VCPUS)
-   dst_vcpu = kvm_get_vcpu(vcpu-kvm, cpu_addr);
-   if (!dst_vcpu)
-   return SIGP_CC_NOT_OPERATIONAL;
flags = atomic_read(dst_vcpu-arch.sie_block-cpuflags);
psw = dst_vcpu-arch.sie_block-gpsw;
p_asn = dst_vcpu-arch.sie_block-gcr[4]  0x;  /* Primary ASN */
@@ -96,7 +81,7 @@ static int __sigp_conditional_emergency(struct kvm_vcpu 
*vcpu, u16 cpu_addr,
|| (psw-mask  psw_int_mask) != psw_int_mask
|| ((flags  CPUSTAT_WAIT)  psw-addr != 0)
|| (!(flags  CPUSTAT_WAIT)  (asn == p_asn || asn == s_asn))) {
-   return __sigp_emergency(vcpu, cpu_addr);
+   return __sigp_emergency(vcpu, dst_vcpu);
} else {
*reg = 0xUL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +89,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu 
*vcpu, u16 cpu_addr,
}
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+   struct kvm_vcpu *dst_vcpu)
 {
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_EXTERNAL_CALL,
.parm = vcpu-vcpu_id,
};
-   struct kvm_vcpu *dst_vcpu = NULL;
int rc;
 
-   if (cpu_addr  KVM_MAX_VCPUS)
-   dst_vcpu = kvm_get_vcpu(vcpu-kvm, cpu_addr);
-   if (!dst_vcpu)
-   return SIGP_CC_NOT_OPERATIONAL;
-
rc = kvm_s390_inject_vcpu(dst_vcpu, s390int);
if (!rc)
-   VCPU_EVENT(vcpu, 4, sent sigp ext call to cpu %x, cpu_addr);
+   VCPU_EVENT(vcpu, 4, sent sigp ext call to cpu %x

Re: [PATCH v12 0/6] arm/KVM: dirty page logging support for ARMv7 (3.17.0-rc1)

2014-10-30 Thread Christian Borntraeger

Am 23.10.2014 00:34, schrieb Mario Smarduch:
 This patch series introduces dirty page logging for ARMv7 and adds some 
 degree 
 of generic dirty logging support for x86, armv7 and later armv8.
 
 I implemented Alex's  suggestion after he took a look at the patches at kvm
 forum to simplify the generic/arch split - leaving mips, powerpc, s390, 
 (ia64 although broken) unchanged. x86/armv7 now share some dirty logging 
 code. 
 armv8 dirty log patches have been posted and tested but for time being armv8
 is non-generic as well.
 
 I briefly spoke to most of you at kvm forum, and this is the patch series
 I was referring to. Implementation changed from previous version (patches
 1  2), those who acked previous revision, please review again.
 
 Last 4 patches (ARM) have been rebased for newer kernel, with no signifcant
 changes.
 
 Testing:
 - Generally live migration + checksumming of source/destination memory 
 regions 
   is used validate correctness. 
 - qemu machvirt, VExpress - Exynos 5440, FastModels - lmbench + dirty guest
   memory cycling.
 - ARMv8 Foundation Model/kvmtool - Due to slight overlap in 2nd stage handlers
   did a basic bringup using qemu.
 - x86_64 qemu  default machine model, tested migration on HP Z620, tested 
   convergence for several dirty page rates
 
 See https://github.com/mjsmar/arm-dirtylog-tests
 - Dirtlogtest-setup.pdf for ARMv7
 - https://github.com/mjsmar/arm-dirtylog-tests/tree/master/v7 - README
 
 The patch affects armv7,armv8, mips, ia64, powerpc, s390, x86_64. Patch
 series has been compiled for affected architectures:
 
 - x86_64 - defconfig 
 - ia64 - ia64-linux-gcc4.6.3 - defconfig, ia64 Kconfig defines BROKEN worked 
   around that to make sure new changes don't break build. Eventually build
   breaks due to other reasons.
 - mips - mips64-linux-gcc4.6.3 - malta_kvm_defconfig
 - ppc - powerpc64-linux-gcc4.6.3 - pseries_defconfig
 - s390 - s390x-linux-gcc4.6.3 - defconfig
 - armv8 - aarch64-linux-gnu-gcc4.8.1 - defconfig
 
 ARMv7 Dirty page logging implementation overivew-
 - initially write protects VM RAM memory region - 2nd stage page tables
 - add support to read dirty page log and again write protect the dirty pages 
   - second stage page table for next pass.
 - second stage huge page are dissolved into small page tables to keep track of
   dirty pages at page granularity. Tracking at huge page granularity limits
   migration to an almost idle system. Small page size logging supports higher 
   memory dirty rates.
 - In the event migration is canceled, normal behavior is resumed huge pages
   are rebuilt over time.
 
 Changes since v11:
 - Implemented Alex's comments to simplify generic layer.
 
 Changes since v10:
 - addressed wanghaibin comments 
 - addressed Christoffers comments
 
 Changes since v9:
 - Split patches into generic and architecture specific variants for TLB 
 Flushing
   and dirty log read (patches 1,2  3,4,5,6)
 - rebased to 3.16.0-rc1
 - Applied Christoffers comments.
 
 Mario Smarduch (6):
   KVM: Add architecture-defined TLB flush support
   KVM: Add generic support for dirty page logging
   arm: KVM: Add ARMv7 API to flush TLBs
   arm: KVM: Add initial dirty page locking infrastructure
   arm: KVM: dirty log read write protect support
   arm: KVM: ARMv7 dirty page logging 2nd stage page fault
 
  arch/arm/include/asm/kvm_asm.h|1 +
  arch/arm/include/asm/kvm_host.h   |   14 +++
  arch/arm/include/asm/kvm_mmu.h|   20 
  arch/arm/include/asm/pgtable-3level.h |1 +
  arch/arm/kvm/Kconfig  |2 +
  arch/arm/kvm/Makefile |1 +
  arch/arm/kvm/arm.c|2 +
  arch/arm/kvm/interrupts.S |   11 ++
  arch/arm/kvm/mmu.c|  209 
 +++--
  arch/x86/include/asm/kvm_host.h   |3 -
  arch/x86/kvm/Kconfig  |1 +
  arch/x86/kvm/Makefile |1 +
  arch/x86/kvm/x86.c|   86 --
  include/linux/kvm_host.h  |4 +
  virt/kvm/Kconfig  |6 +
  virt/kvm/dirtylog.c   |  112 ++
  virt/kvm/kvm_main.c   |2 +
  17 files changed, 380 insertions(+), 96 deletions(-)
  create mode 100644 virt/kvm/dirtylog.c
 

Patches 1-3 seem to work fine on s390. The other patches are arm-only (well 
cant find 5 and 6) so I guess its ok for s390.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-05 Thread Christian Borntraeger

Am 05.11.2014 11:07, schrieb Alexander Graf:
 
 
 On 27.10.14 16:44, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@linux.vnet.ibm.com

 Enable KVM_SET_CLOCK and KVM_GET_CLOCK ioctls on s390 for managing guest Time
 Of Day clock value.

 Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
 
 I like it.
 
 Reviewed-by: Alexander Graf ag...@suse.de

Paolo, are you ok with that patch as well? If yes I will send it with the next 
bunch of s390 patches.

 PS: I remember that you were considering some different take on the interface: 
IIRC you suggest to have the same format in kvm_clock_data-clock as x86, and 
that we might want to use a flag and a new field in the padding area that then 
contains the TOD value. Now looking again at Documentation/virtual/kvm/api.txt 
I actually prefer Jasons implementation since the api does not mention the 
value/format/offset. It seems to be ns since boot, correct?

So if any changes, I would prefer a small change to the documentation, that 
makes the meaning of clock explicit per architecture?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-05 Thread Christian Borntraeger

Am 05.11.2014 14:11, schrieb Paolo Bonzini:
 
 
 On 05/11/2014 13:28, Christian Borntraeger wrote:
 Am 05.11.2014 11:07, schrieb Alexander Graf:


 On 27.10.14 16:44, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@linux.vnet.ibm.com

 Enable KVM_SET_CLOCK and KVM_GET_CLOCK ioctls on s390 for
 managing guest Time Of Day clock value.

 Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com 
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com

 I like it.

 Reviewed-by: Alexander Graf ag...@suse.de

 Paolo, are you ok with that patch as well? If yes I will send it with
 the next bunch of s390 patches.

 PS: I remember that you were considering some different take on the
 interface: IIRC you suggest to have the same format in
 kvm_clock_data-clock as x86, and that we might want to use a flag
 and a new field in the padding area that then contains the TOD value.
 Now looking again at Documentation/virtual/kvm/api.txt I actually
 prefer Jasons implementation since the api does not mention the
 value/format/offset. It seems to be ns since boot, correct?

 So if any changes, I would prefer a small change to the
 documentation, that makes the meaning of clock explicit per
 architecture?
 
 After a quick refresh on IRC, I remembered our previous discussion.
 
 I was a bit worried that the interface did not let us pass the extra
 byte for the stcke instruction's overflow counter.  The question then is
 whether to:
 
 1) keep an x86-consistent interface for KVM_GET/SET_CLOCK, and put the
 whole 16 byte stcke output in the padding
 
 2) put the 8-byte stck value (stcke bytes 1-8) in the value, and the
 overflow counter (stcke byte 0) in the padding (with the presence
 governed by a flag).  As you explained, bytes 9-13 are computed by the
 CPU and we do not care anyway of accuracy beyond 0.25 ns, while bytes
 14-15 are accessed separately via ONEREG.
 
 3) use ONEREG instead of KVM_GET/SET_CLOCK.  You can decide whether to
 use a 72 (or 96) bit value, or two separate 8+64 values.
 
 1 or 3 seem the cleanest.  On the other hand s390 doesn't have a use for
 a bootbased counter, which makes 1 much less interesting/useful than I
 imagined.
 
 PPC uses a combination of KVM_GET_SREGS and KVM_GET/SET_ONEREG for the
 closest equivalent (TBL/TBU), not KVM_GET/SET_CLOCK.  MIPS is also
 ONEREG-based.  This makes me lean towards 3.
 
 Of course 2 has code written, but it should be a small change to use
 ONEREG instead.  What do you think?
 

I think the change to a ONEREG should be trivial. (it will be synced between 
all guest CPUs, so we could also use a VM attribute but a ONEREG should be ok 
as well.

I think two registers (one 64bit and another 8bit register (which must be 0 all 
the time as of today) is preferred.

I think we could even defer the 2nd register until we know what the hardware 
folks will come up with before 2042. (stcke in the POP indicates an 8bit 
counter).

So Paolo, Alex two simple questions:

- ONEREG or VM attribute?
- Just one 64bit value today and the other one later or both now (64+8)

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-05 Thread Christian Borntraeger

Am 05.11.2014 18:37, schrieb Alexander Graf:
 
 
 On 05.11.14 17:48, Christian Borntraeger wrote:
 Am 05.11.2014 14:11, schrieb Paolo Bonzini:


 On 05/11/2014 13:28, Christian Borntraeger wrote:
 Am 05.11.2014 11:07, schrieb Alexander Graf:


 On 27.10.14 16:44, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@linux.vnet.ibm.com

 Enable KVM_SET_CLOCK and KVM_GET_CLOCK ioctls on s390 for
 managing guest Time Of Day clock value.

 Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com 
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com

 I like it.

 Reviewed-by: Alexander Graf ag...@suse.de

 Paolo, are you ok with that patch as well? If yes I will send it with
 the next bunch of s390 patches.

 PS: I remember that you were considering some different take on the
 interface: IIRC you suggest to have the same format in
 kvm_clock_data-clock as x86, and that we might want to use a flag
 and a new field in the padding area that then contains the TOD value.
 Now looking again at Documentation/virtual/kvm/api.txt I actually
 prefer Jasons implementation since the api does not mention the
 value/format/offset. It seems to be ns since boot, correct?

 So if any changes, I would prefer a small change to the
 documentation, that makes the meaning of clock explicit per
 architecture?

 After a quick refresh on IRC, I remembered our previous discussion.

 I was a bit worried that the interface did not let us pass the extra
 byte for the stcke instruction's overflow counter.  The question then is
 whether to:

 1) keep an x86-consistent interface for KVM_GET/SET_CLOCK, and put the
 whole 16 byte stcke output in the padding

 2) put the 8-byte stck value (stcke bytes 1-8) in the value, and the
 overflow counter (stcke byte 0) in the padding (with the presence
 governed by a flag).  As you explained, bytes 9-13 are computed by the
 CPU and we do not care anyway of accuracy beyond 0.25 ns, while bytes
 14-15 are accessed separately via ONEREG.

 3) use ONEREG instead of KVM_GET/SET_CLOCK.  You can decide whether to
 use a 72 (or 96) bit value, or two separate 8+64 values.

 1 or 3 seem the cleanest.  On the other hand s390 doesn't have a use for
 a bootbased counter, which makes 1 much less interesting/useful than I
 imagined.

 PPC uses a combination of KVM_GET_SREGS and KVM_GET/SET_ONEREG for the
 closest equivalent (TBL/TBU), not KVM_GET/SET_CLOCK.  MIPS is also
 ONEREG-based.  This makes me lean towards 3.

 Of course 2 has code written, but it should be a small change to use
 ONEREG instead.  What do you think?


 I think the change to a ONEREG should be trivial. (it will be synced between 
 all guest CPUs, so we could also use a VM attribute but a ONEREG should be 
 ok as well.

 I think two registers (one 64bit and another 8bit register (which must be 0 
 all the time as of today) is preferred.

 I think we could even defer the 2nd register until we know what the hardware 
 folks will come up with before 2042. (stcke in the POP indicates an 8bit 
 counter).

 So Paolo, Alex two simple questions:

 - ONEREG or VM attribute?
 
 On PPC we have core granularity, so while the interface is on one vcpu,
 it really only affects every 8th vcpu (or whatever you configure the
 number of threads as).
 
 So there is precedence for an interface that modifies other vcpus while
 the ONE_REG is only targeting a single vcpu.
 
 Whether you want to follow that approach or do it as VM attribute
 straight away, I don't mind much :).

given that top programmable field and epoch are available as ONEREG, lets do 
the same for TOD.

 
 - Just one 64bit value today and the other one later or both now (64+8)
 
 Make it both today with a check that the second one has to be 0 maybe?
 Then we only need to modify the kernel itself, not the API later.

Makes sense.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-06 Thread Christian Borntraeger

Am 05.11.2014 20:45, schrieb Paolo Bonzini:
 
 
 On 05/11/2014 18:56, Christian Borntraeger wrote:

 Whether you want to follow that approach or do it as VM attribute
 straight away, I don't mind much :).
 given that top programmable field and epoch are available as ONEREG, lets do 
 the same for TOD.
 
 Is the epoch per-cpu?

two answers :-)

- the implementation is one epoch per control block, so someone could do that 
per CPU...but:
- guest TOD == host TOD + epochdiff. architecture mandates that there is only 
one TOD per system, so all guest TODs must be synced and so must be all 
epochdiffs

Some background. We provided access to the epoch value about 2 years ago with 
other things as ONEREG. Asumming that all hosts are time synced, we could just 
migrate the epoch value.
Now: this is not the case all the time. Just migrating the epoch could result 
in time jumping forth and back.

This thing is now: QEMU cannot calculate a correction reliably, because it 
cannot rely on the value of the TOD (by using stck) since the kernel might do 
tricks with the host TOD value as soon as we enable time synching between z 
boxes.
(Thats why normal userspace should not use stck either, it should use 
gettimeofday because the kernel might have offsets etc due to NTP or time 
synching between boxes). So we finally cam up with just migrating the guest 
visible TOD, which seems to work fine.

As a recap we have now:
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)

and we would
add
#define KVM_REG_S390_TOD   (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) 
#define KVM_REG_S390_TOD_INDEX (KVM_REG_S390 | KVM_REG_SIZE_U8 | 0x4)
(any better name?)

Makes sense?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-06 Thread Christian Borntraeger

Am 06.11.2014 09:43, schrieb Christian Borntraeger:
 Am 05.11.2014 20:45, schrieb Paolo Bonzini:


 On 05/11/2014 18:56, Christian Borntraeger wrote:

 Whether you want to follow that approach or do it as VM attribute
 straight away, I don't mind much :).
 given that top programmable field and epoch are available as ONEREG, lets 
 do the same for TOD.

 Is the epoch per-cpu?
 
 two answers :-)
 
 - the implementation is one epoch per control block, so someone could do that 
 per CPU...but:
 - guest TOD == host TOD + epochdiff. architecture mandates that there is only 
 one TOD per system, so all guest TODs must be synced and so must be all 
 epochdiffs
 
 Some background. We provided access to the epoch value about 2 years ago with 
 other things as ONEREG. Asumming that all hosts are time synced, we could 
 just migrate the epoch value.
 Now: this is not the case all the time. Just migrating the epoch could result 
 in time jumping forth and back.
 
 This thing is now: QEMU cannot calculate a correction reliably, because it 
 cannot rely on the value of the TOD (by using stck) since the kernel might do 
 tricks with the host TOD value as soon as we enable time synching between z 
 boxes.
 (Thats why normal userspace should not use stck either, it should use 
 gettimeofday because the kernel might have offsets etc due to NTP or time 
 synching between boxes). So we finally cam up with just migrating the guest 
 visible TOD, which seems to work fine.
 
 As a recap we have now:
 #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
 #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
 
 and we would
 add
 #define KVM_REG_S390_TOD   (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) 
 #define KVM_REG_S390_TOD_INDEX (KVM_REG_S390 | KVM_REG_SIZE_U8 | 0x4)

obviously not 0x3 and 0x4 but something higher..

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/4] KVM: s390: Fixes for kvm/next (3.19) and stable

2014-11-07 Thread Christian Borntraeger

Paolo,

here are some fixes for KVM on s390 for kvm/next. The first two patches
have a stable tag and could also go via kvm/master - if you like.

The following changes since commit 0df1f2487d2f0d04703f142813d53615d62a1da4:

  Linux 3.18-rc3 (2014-11-02 15:01:51 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20141107

for you to fetch changes up to 365dc1633521a32d55d839f56b41bb9a531d957a:


KVM: s390: Fixes for kvm/next (3.19) and stable

1. We should flush TLBs for load control instruction emulation (stable)
2. A workaround for a compiler bug that renders ACCESS_ONCE broken (stable)
3. Fix program check handling for load control
4. Documentation Fix


Christian Borntraeger (2):
  KVM: s390: Fix ipte locking
  KVM: s390: flush CPU on load control

Dominik Dingel (1):
  KVM: fix vm device attribute documentation

Heiko Carstens (1):
  KVM: s390: fix handling of lctl[g]/stctl[g]

 Documentation/virtual/kvm/devices/vm.txt | 10 ++---
 arch/s390/kvm/gaccess.c  | 20 ++---
 arch/s390/kvm/priv.c | 72 +++-
 3 files changed, 53 insertions(+), 49 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 2/4] KVM: s390: flush CPU on load control

2014-11-07 Thread Christian Borntraeger

some control register changes will flush some aspects of the CPU, e.g.
POP explicitely mentions that for CR9-CR11 TLBs may be cleared.
Instead of trying to be clever and only flush on specific CRs, let
play safe and flush on all lctl(g) as future machines might define
new bits in CRs. Load control intercept should not happen that often.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Cc: sta...@vger.kernel.org
---
 arch/s390/kvm/priv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 72bb2dd..9c565b6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -791,7 +791,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
break;
reg = (reg + 1) % 16;
} while (1);
-
+   kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
 }
 
@@ -863,7 +863,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
break;
reg = (reg + 1) % 16;
} while (1);
-
+   kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
 }
 
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 1/4] KVM: s390: Fix ipte locking

2014-11-07 Thread Christian Borntraeger

ipte_unlock_siif uses cmpxchg to replace the in-memory data of the ipte
lock together with ACCESS_ONCE for the intial read.

union ipte_control {
unsigned long val;
struct {
unsigned long k  : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};
[...]
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;

ic = vcpu-kvm-arch.sca-ipte_control;
do {
new = old = ACCESS_ONCE(*ic);
new.kh--;
if (!new.kh)
new.k = 0;
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
if (!new.kh)
wake_up(vcpu-kvm-arch.ipte_wq);
}

The new value, is loaded twice from memory with gcc 4.7.2 of
fedora 18, despite the ACCESS_ONCE:

---

l   %r4,0(%r3)  --- load first 32 bit of lock (k and kh) in r4
alfi%r4,2147483647  --- add -1 to r4
llgtr   %r4,%r4 --- zero out the sign bit of r4
lg  %r1,0(%r3)  --- load all 64 bit of lock into new
lgr %r2,%r1 --- load the same into old
risbg   %r1,%r4,1,31,32 --- shift and insert r4 into the bits 1-31 of
new
llihf   %r4,2147483647
ngrk%r4,%r1,%r4
jne aa0 ipte_unlock+0xf8
nihh%r1,32767
lgr %r4,%r2
csg %r4,%r1,0(%r3)
cgr %r2,%r4
jne a70 ipte_unlock+0xc8

If the memory value changes between the first load (l) and the second
load (lg) we are broken. If that happens VCPU threads will hang
(unkillable) in handle_ipte_interlock.

Andreas Krebbel analyzed this and tracked it down to a compiler bug in
that version:
while it is not that obvious the C99 standard basically forbids
duplicating the memory access also in that case. For an argumentation of
a similiar case please see:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=22278#c43

For the implementation-defined cases regarding volatile there are some
GCC-specific clarifications which can be found here:
https://gcc.gnu.org/onlinedocs/gcc/Volatiles.html#Volatiles

I've tracked down the problem with a reduced testcase. The problem was
that during a tree level optimization (SRA - scalar replacement of
aggregates) the volatile marker is lost. And an RTL level optimizer (CSE
- common subexpression elimination) then propagated the memory read into
  its second use introducing another access to the memory location. So
indeed Christian's suspicion that the union access has something to do
with it is correct (since it triggered the SRA optimization).

This issue has been reported and fixed in the GCC 4.8 development cycle:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145;

This patch replaces the ACCESS_ONCE scheme with a barrier() based scheme
that should work for all supported compilers.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Cc: sta...@vger.kernel.org # v3.16+
---
 arch/s390/kvm/gaccess.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1..6dc0ad9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -229,10 +229,12 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
goto out;
ic = vcpu-kvm-arch.sca-ipte_control;
do {
-   old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
while (old.k) {
cond_resched();
-   old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
}
new = old;
new.k = 1;
@@ -251,7 +253,9 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
goto out;
ic = vcpu-kvm-arch.sca-ipte_control;
do {
-   new = old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
+   new = old;
new.k = 0;
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
wake_up(vcpu-kvm-arch.ipte_wq);
@@ -265,10 +269,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 
ic = vcpu-kvm-arch.sca-ipte_control;
do {
-   old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
while (old.kg) {
cond_resched();
-   old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
}
new = old;
new.k = 1;
@@ -282,7 +288,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 
ic = vcpu-kvm-arch.sca-ipte_control;
do {
-   new = old = ACCESS_ONCE(*ic);
+   old = *ic;
+   barrier();
+   new = old;
new.kh--;
if (!new.kh)
new.k = 0;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body

[GIT PULL 4/4] KVM: fix vm device attribute documentation

2014-11-07 Thread Christian Borntraeger

From: Dominik Dingel din...@linux.vnet.ibm.com

Documentation uses incorrect attribute names for some vm device
attributes: fix this.

Signed-off-by: Dominik Dingel din...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 Documentation/virtual/kvm/devices/vm.txt | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/vm.txt 
b/Documentation/virtual/kvm/devices/vm.txt
index 0d16f96..d426fc8 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -12,14 +12,14 @@ specific.
 1. GROUP: KVM_S390_VM_MEM_CTRL
 Architectures: s390
 
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
 Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
 
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
 
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
 Returns: 0
 
 Clear the CMMA status for all guest pages, so any pages the guest marked
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 3/4] KVM: s390: fix handling of lctl[g]/stctl[g]

2014-11-07 Thread Christian Borntraeger

From: Heiko Carstens heiko.carst...@de.ibm.com

According to the architecture all instructions are suppressing if memory
access is prohibited due to DAT protection, unless stated otherwise for
an instruction.
The lctl[g]/stctl[g] implementations handled this incorrectly since
control register handling was done piecemeal, which means they had
terminating instead of suppressing semantics.
This patch fixes this.

Signed-off-by: Heiko Carstens heiko.carst...@de.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/priv.c | 68 +---
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9c565b6..9bde32f 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -762,8 +762,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
int reg1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
int reg3 = vcpu-arch.sie_block-ipa  0x000f;
-   u32 val = 0;
-   int reg, rc;
+   int reg, rc, nr_regs;
+   u32 ctl_array[16];
u64 ga;
 
vcpu-stat.instruction_lctl++;
@@ -779,14 +779,15 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, lctl r1:%x, r3:%x, addr:%llx, reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+   nr_regs = ((reg3 - reg1)  0xf) + 1;
+   rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+   if (rc)
+   return kvm_s390_inject_prog_cond(vcpu, rc);
reg = reg1;
+   nr_regs = 0;
do {
-   rc = read_guest(vcpu, ga, val, sizeof(val));
-   if (rc)
-   return kvm_s390_inject_prog_cond(vcpu, rc);
vcpu-arch.sie_block-gcr[reg] = 0xul;
-   vcpu-arch.sie_block-gcr[reg] |= val;
-   ga += 4;
+   vcpu-arch.sie_block-gcr[reg] |= ctl_array[nr_regs++];
if (reg == reg3)
break;
reg = (reg + 1) % 16;
@@ -799,9 +800,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 {
int reg1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
int reg3 = vcpu-arch.sie_block-ipa  0x000f;
+   int reg, rc, nr_regs;
+   u32 ctl_array[16];
u64 ga;
-   u32 val;
-   int reg, rc;
 
vcpu-stat.instruction_stctl++;
 
@@ -817,26 +818,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
reg = reg1;
+   nr_regs = 0;
do {
-   val = vcpu-arch.sie_block-gcr[reg]   0xul;
-   rc = write_guest(vcpu, ga, val, sizeof(val));
-   if (rc)
-   return kvm_s390_inject_prog_cond(vcpu, rc);
-   ga += 4;
+   ctl_array[nr_regs++] = vcpu-arch.sie_block-gcr[reg];
if (reg == reg3)
break;
reg = (reg + 1) % 16;
} while (1);
-
-   return 0;
+   rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+   return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
int reg1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
int reg3 = vcpu-arch.sie_block-ipa  0x000f;
-   u64 ga, val;
-   int reg, rc;
+   int reg, rc, nr_regs;
+   u64 ctl_array[16];
+   u64 ga;
 
vcpu-stat.instruction_lctlg++;
 
@@ -848,17 +847,17 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
if (ga  7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-   reg = reg1;
-
VCPU_EVENT(vcpu, 5, lctlg r1:%x, r3:%x, addr:%llx, reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+   nr_regs = ((reg3 - reg1)  0xf) + 1;
+   rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+   if (rc)
+   return kvm_s390_inject_prog_cond(vcpu, rc);
+   reg = reg1;
+   nr_regs = 0;
do {
-   rc = read_guest(vcpu, ga, val, sizeof(val));
-   if (rc)
-   return kvm_s390_inject_prog_cond(vcpu, rc);
-   vcpu-arch.sie_block-gcr[reg] = val;
-   ga += 8;
+   vcpu-arch.sie_block-gcr[reg] = ctl_array[nr_regs++];
if (reg == reg3)
break;
reg = (reg + 1) % 16;
@@ -871,8 +870,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 {
int reg1 = (vcpu-arch.sie_block-ipa  0x00f0)  4;
int reg3 = vcpu-arch.sie_block-ipa  0x000f;
-   u64 ga, val;
-   int reg, rc;
+   int reg, rc, nr_regs;
+   u64 ctl_array[16];
+   u64 ga;
 
vcpu-stat.instruction_stctg++;
 
@@ -884,23 +884,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu

Re: [PATCH v2] kvm-s390: Provide guest TOD Clock Get/Set Controls

2014-11-07 Thread Christian Borntraeger

Am 07.11.2014 um 16:06 schrieb Paolo Bonzini:
 
 
 On 07/11/2014 15:55, Jason J. Herne wrote:
 From: Jason J. Herne jjhe...@linux.vnet.ibm.com

 Provide KVM_REG_S390_TOD and KVM_REG_S390_TOD_HIGH registers on s390 for
 managing guest Time Of Day clock value.

 KVM_REG_S390_TOD_HIGH is presently always set to 0. In the future it will
 contain a high order expansion of the tod clock value after it overflows
 the 64-bits of KVM_REG_S390_TOD.

 Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com
 ---
  arch/s390/include/uapi/asm/kvm.h |  2 ++
  arch/s390/kvm/kvm-s390.c | 63 
 
  2 files changed, 65 insertions(+)

 diff --git a/arch/s390/include/uapi/asm/kvm.h 
 b/arch/s390/include/uapi/asm/kvm.h
 index 48eda3a..5578832 100644
 --- a/arch/s390/include/uapi/asm/kvm.h
 +++ b/arch/s390/include/uapi/asm/kvm.h
 @@ -138,4 +138,6 @@ struct kvm_sync_regs {
  #define KVM_REG_S390_PFSELECT   (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
  #define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
  #define KVM_REG_S390_GBEA   (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
 +#define KVM_REG_S390_TOD(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0xA)
 +#define KVM_REG_S390_TOD_HIGH   (KVM_REG_S390 | KVM_REG_SIZE_U8 | 0xB)
  #endif
 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index 55aade4..17e3d61 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -31,6 +31,7 @@
  #include asm/switch_to.h
  #include asm/facility.h
  #include asm/sclp.h
 +#includeasm/timex.h

Will apply with a small whitespace fix
[..]

 Acked-by: Paolo Bonzini pbonz...@redhat.com

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

compiler bug gcc4.6/4.7 with ACCESS_ONCE and workarounds

2014-11-10 Thread Christian Borntraeger

Linus,

Last week I sent belows patch to Paolo for kvm/next.
Heiko Carstens pointed out that the kernel often does not 
work around compiler bugs, instead we blacklist the 
compiler, make a buildbug on or similar when we cant be 
sure to catch all broken cases, e.g. Heiko mentioned 
the the x86 specific test cases for stack protector.

Now: I can reproduces belows miscompile on gcc46 and gcc 47
gcc 45 seems ok, gcc 48 is fixed.  This makes blacklisting
a bit hard, especially since it is not limited to s390, but
covers all architectures.
In essence ACCESS_ONCE will not work reliably on aggregate 
types with gcc 4.6 and gcc 4.7.
In Linux we seem to use ACCESS_ONCE mostly on scalar types,
below code is an example were we dont - and break.

Linus, what is your take on workarounds of compiler bugs?
The barrier solution below will fix that particular case, but
are there others? Maybe we can come with a clever trick of
creating a build-bug for ACCESS_ONCE on non-scalar types?
A testcase is not trivial, since we have to rely on other
optimization steps to actually do the wrong thing and the 
gcc testcase test will dump the internal tree and check
that - something that does not seem to be ok for the kernel.

Christian


 Forwarded Message 
Subject: [GIT PULL 1/4] KVM: s390: Fix ipte locking
Date: Fri,  7 Nov 2014 12:45:13 +0100
From: Christian Borntraeger borntrae...@de.ibm.com
To: Paolo Bonzini pbonz...@redhat.com
CC: KVM kvm@vger.kernel.org, Gleb Natapov g...@kernel.org, Alexander Graf 
ag...@suse.de, Cornelia Huck cornelia.h...@de.ibm.com, Jens Freimann 
jf...@linux.vnet.ibm.com, linux-s390 linux-s...@vger.kernel.org, Christian 
Borntraeger borntrae...@de.ibm.com, sta...@vger.kernel.org

ipte_unlock_siif uses cmpxchg to replace the in-memory data of the ipte
lock together with ACCESS_ONCE for the intial read.

union ipte_control {
unsigned long val;
struct {
unsigned long k  : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};
[...]
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;

ic = vcpu-kvm-arch.sca-ipte_control;
do {
new = old = ACCESS_ONCE(*ic);
new.kh--;
if (!new.kh)
new.k = 0;
} while (cmpxchg(ic-val, old.val, new.val) != old.val);
if (!new.kh)
wake_up(vcpu-kvm-arch.ipte_wq);
}

The new value, is loaded twice from memory with gcc 4.7.2 of
fedora 18, despite the ACCESS_ONCE:

---

l   %r4,0(%r3)  --- load first 32 bit of lock (k and kh) in r4
alfi%r4,2147483647  --- add -1 to r4
llgtr   %r4,%r4 --- zero out the sign bit of r4
lg  %r1,0(%r3)  --- load all 64 bit of lock into new
lgr %r2,%r1 --- load the same into old
risbg   %r1,%r4,1,31,32 --- shift and insert r4 into the bits 1-31 of
new
llihf   %r4,2147483647
ngrk%r4,%r1,%r4
jne aa0 ipte_unlock+0xf8
nihh%r1,32767
lgr %r4,%r2
csg %r4,%r1,0(%r3)
cgr %r2,%r4
jne a70 ipte_unlock+0xc8

If the memory value changes between the first load (l) and the second
load (lg) we are broken. If that happens VCPU threads will hang
(unkillable) in handle_ipte_interlock.

Andreas Krebbel analyzed this and tracked it down to a compiler bug in
that version:
while it is not that obvious the C99 standard basically forbids
duplicating the memory access also in that case. For an argumentation of
a similiar case please see:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=22278#c43

For the implementation-defined cases regarding volatile there are some
GCC-specific clarifications which can be found here:
https://gcc.gnu.org/onlinedocs/gcc/Volatiles.html#Volatiles

I've tracked down the problem with a reduced testcase. The problem was
that during a tree level optimization (SRA - scalar replacement of
aggregates) the volatile marker is lost. And an RTL level optimizer (CSE
- common subexpression elimination) then propagated the memory read into
  its second use introducing another access to the memory location. So
indeed Christian's suspicion that the union access has something to do
with it is correct (since it triggered the SRA optimization).

This issue has been reported and fixed in the GCC 4.8 development cycle:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145;

This patch replaces the ACCESS_ONCE scheme with a barrier() based scheme
that should work for all supported compilers.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Cc: sta...@vger.kernel.org # v3.16+
---
 arch/s390/kvm/gaccess.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1..6dc0ad9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -229,10 +229,12 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
goto out;
ic = vcpu-kvm-arch.sca

Re: compiler bug gcc4.6/4.7 with ACCESS_ONCE and workarounds

2014-11-11 Thread Christian Borntraeger

Am 10.11.2014 um 22:07 schrieb Linus Torvalds:
 On Mon, Nov 10, 2014 at 12:18 PM, Christian Borntraeger
 borntrae...@de.ibm.com wrote:

 Now: I can reproduces belows miscompile on gcc46 and gcc 47
 gcc 45 seems ok, gcc 48 is fixed.  This makes blacklisting
 a bit hard, especially since it is not limited to s390, but
 covers all architectures.
 In essence ACCESS_ONCE will not work reliably on aggregate
 types with gcc 4.6 and gcc 4.7.
 In Linux we seem to use ACCESS_ONCE mostly on scalar types,
 below code is an example were we dont - and break.
 
 Hmm. I think we should see how painful it would be to make it a rule
 that ACCESS_ONCE() only works on scalar types.
 
 Even in the actual code you show as an example, the fix is really to
 use the unsigned long val member of the union for the ACCESS_ONCE().
 And that seems to be true in many other cases too.

Yes, using the val like in 
-   new = old = ACCESS_ONCE(*ic);
+   new.val = old.val = ACCESS_ONCE(ic-val);

does solve the problem as well. In fact, gcc does create the same binary
code on my 4.7.2.

Are you ok with the patch as is in kvm/next for the time being or shall
we revert that and replace it with the .val scheme?

We can also do the cleanup later on if we manage to get your initial patch
into a shape that works out.

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: compiler bug gcc4.6/4.7 with ACCESS_ONCE and workarounds

2014-11-12 Thread Christian Borntraeger

Am 12.11.2014 um 01:36 schrieb Linus Torvalds:
 On Tue, Nov 11, 2014 at 4:33 PM, Linus Torvalds
 torva...@linux-foundation.org wrote:

 I guess as a workaround it is fine, as long as we don't lose sight of
 trying to eventually do a better job.
 
 Oh, and when it comes to the actual gcc bug - do you have any reason
 to believe that it's somehow triggered more easily by something
 particular in the arch/s390/kvm/gaccess.c code?

Yes there are reasons. First of all the bug if SRA removes the volatile tag, 
but that does not mean that this breaks things. As long as the operation is 
simple enough things will be mostly ok. If things are not simple like in 
gaccess things get more complicated. Lets look at the ipte lock. The lock 
itself consists of 3 parts: k (1 bit:locked), kh(31bit counter for the host) 
and kg(32 bit counter for the millicode when doing specific guest 
instructions). There are 3 valid states
1. k=0, kh=0; kg=0
2. k=1, kh!=0, kg=0
3. k=1, kh=0, kg!=0

So the host code must check if the guest counter is zero and can then set the k 
bit to one and increase the counter. (for unlock it has to decrement kh and if 
that becomes zero also zero the k bit)
That means that we have multiple accesses to subcomponents. As the host counter 
is bit 1-31 (ibm speak, linux speak bit 32-62) gcc wants to use the load thirty 
one bit instruction. 
So far so good. The ticket lock is also not a trivial set/clear bit.

Now: In gcc the memory costs for s390 are modeled to have the same costs as 
register accesses (TARGET_MEMORY_MOVE_COST==1, TARGET_REGISTER_MOVE_COST=1)
So for gcc a re-loading of a part of the lock from memory costs the same as 
loading it from a register. That probably triggered that bug.

Christian




 
 IOW, why does this problem not hit the x86 spinlocks that also use
 volatile pointers to aggregate types? Or does it?

I think we would have noticed if that hits. But there are certainly cases where 
this bug triggers also on x86, see
the initial bug report of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 
This bug is certainly different, (instead of transforming one load into 
multiple loads , it combines multiple write into one) but it shows, that a 
volatile marker is removed.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: compiler bug gcc4.6/4.7 with ACCESS_ONCE and workarounds

2014-11-20 Thread Christian Borntraeger

Am 10.11.2014 um 22:07 schrieb Linus Torvalds:
[...]
 So before blacklisting any compilers, let's first see if
 
  (a) we can actually make it a real rule that we only use ACCESS_ONCE on 
 scalars
  (b) we can somehow enforce this with a compiler warning/error for mis-uses
 
 For example, the attached patch works for some cases, but shows how we
 use ACCESS_ONCE() on pointers to pte_t's etc, so it doesn't come even
 close to compiling the whole kernel. But I wonder how painful that
 would be to change.. The places where it complains are actually
 somewhat debatable to begin with, like:
 
  - handle_pte_fault(.. pte_t *pte ..):
 
 entry = ACCESS_ONCE(*pte);
 
 and the thing is, pte is actually possibly an 8-byte entity on
 x86-32, and that ACCESS_ONCE() fundamentally will be two 32-byte
 reads.
 
 So there is a very valid argument for saying well, you shouldn't do
 that, then, and that we might be better off cleaning up our
 ACCESS_ONCE() uses, than to just blindly blacklist compilers.
 
 NOTE! I'm not at all advocating the attached patch. I'm sending it out
 white-space damaged on purpose, it's more of a hey, something like
 this might be the direction we want to go in, with the spinlock.h
 part of the patch also acting as an example of the kind of changes the
 ACCESS_ONCE() only works on scalars rule would require.

So I tried to see if I can come up with some results on how often this problem 
happens...

[...]


 diff --git a/include/linux/compiler.h b/include/linux/compiler.h
 index d5ad7b1118fc..63e82f1dfc1a 100644
 --- a/include/linux/compiler.h
 +++ b/include/linux/compiler.h
 @@ -378,7 +378,11 @@ void ftrace_likely_update(struct
 ftrace_branch_data *f, int val, int expect);
   * use is to mediate communication between process-level code and irq/NMI
   * handlers, all running on the same CPU.
   */
 -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)(x))
 +#define get_scalar_volatile_pointer(x) ({ \
 +   typeof(x) *__p = (x); \
 +   volatile typeof(x) *__vp = __p; \
 +   (void)(long)*__p; __vp; })
 +#define ACCESS_ONCE(x) (*get_scalar_volatile_pointer(x))

..and just took this patch. On s390 is pretty much clean with allyesconfig
In fact with the siif lock changed only the pte/pmd cases you mentioned trigger 
a compile error:

mm/memory.c: In function 'handle_pte_fault':
mm/memory.c:3203:2: error: aggregate value used where an integer was expected
  entry = ACCESS_ONCE(*pte);

mm/rmap.c: In function 'mm_find_pmd':
mm/rmap.c:584:2: error: aggregate value used where an integer was expected
  pmde = ACCESS_ONCE(*pmd);


Here a barrier() might be a good solution as well, I guess.
On x86 allyesconfig its almost the same.
- we need your spinlock changes (well, something different to make it compile)
- we need to fix pmd and pte
- we have gup_get_pte in arch/x86/mm/gup.c getting a ptep

So It looks like we could make a change to ACCESS_ONCE. Would something like

CONFIG_ARCH_SCALAR_ACCESS_ONCE be a good start?

This would boil down to
Patch1: Provide stricter ACCESS_ONCE if CONFIG_ARCH_SCALAR_ACCESS_ONCE is set + 
docu update + comments
Patch2: Change mm/* to barriers
Patch3: Change x86 locks
Patch4: Change x86 gup
Patch4: Enable CONFIG_ARCH_SCALAR_ACCESS_ONCE for s390x and x86

Makes sense?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 1/2] KVM: don't check for PF_VCPU when yielding

2014-11-25 Thread Christian Borntraeger

Am 25.11.2014 um 17:04 schrieb David Hildenbrand:
 As some architectures (e.g. s390) can't disable preemption while
 entering/leaving the guest, they won't receive the yield in all situations.
 
 kvm_enter_guest() has to be called with preemption_disabled and will set
 PF_VCPU. After that point e.g. s390 reenables preemption and starts to 
 execute the
 guest. The thread might therefore be scheduled out between kvm_enter_guest() 
 and
 kvm_exit_guest(), resulting in PF_VCPU being set but not being run.
 
 Please note that preemption has to stay enabled in order to correctly process
 page faults on s390.
 
 Current code takes PF_VCPU as a hint that the VCPU thread is running and
 therefore needs no yield. yield_to() checks whether the target thread is 
 running,
 so let's use the inbuilt functionality to make it independent of PF_VCPU and
 preemption.

This change is a trade-off.
PRO: This patch would improve the case of preemption on s390. This is probably 
a corner case as most distros have preemption off anyway.
CON: The downside is that kvm_vcpu_yield_to is called also from 
kvm_vcpu_on_spin. Here we want to avoid the scheduler overhead for a wrong 
decision. 

So I think this patch is probably not what we want in most cases.

 
 Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
 ---
  virt/kvm/kvm_main.c | 4 
  1 file changed, 4 deletions(-)
 
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 5b45330..184f52e 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -1782,10 +1782,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
   rcu_read_unlock();
   if (!task)
   return ret;
 - if (task-flags  PF_VCPU) {
 - put_task_struct(task);
 - return ret;
 - }
   ret = yield_to(task, 1);
   put_task_struct(task);
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 2/2] KVM: thread creating a vcpu is the owner of that vcpu

2014-11-25 Thread Christian Borntraeger

Am 25.11.2014 um 17:04 schrieb David Hildenbrand:
 Currently, we allow changing the PID of a VCPU. This PID is used to
 identify the thread to yield to if we want to yield to this specific
 VCPU.
 
 In practice (e.g. QEMU), the thread creating and executing the VCPU remains
 always the same. Temporarily exchanging the PID (e.g. because an ioctl is
 triggered from a wrong thread) doesn't really make sense.
 
 The PID is exchanged and a synchronize_rcu() is called. When the executing
 thread tries to run the VCPU again, another synchronize_rcu() happens.
 
 If a yield to that VCPU is triggered while the PID of the wrong thread is 
 active,
 the wrong thread might receive a yield, but this will most likely not
 help the executing thread at all. The executing thread won't have a higher
 priority after the wrong thread has finished with the ioctl. The wrong thread
 will even receive yields afterwards that were targeted to the executing vcpu,
 until the executing VCPU has replaced the PID on the next ioctl - doesn't feel
 correct to me.
 
 This patch makes the creating thread the owning thread, and therefore the only
 valid yield candidate (especially because VCPU ioctls are - in theory - only
 valid when triggered from the owning thread - old user space versions may not
 stick to this rule). This should also speed up the initial start of all VCPUs,
 when the PID is assigned for the first time.
 
 Should be backwards compatible - if there is any old user space version out
 there that doesn't stick to the creating == executing thread rule, yields will
 not work as intended.
 
 Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com

This change actually makes perfect sense to me:
- The runtime change logic was problematic, (e.g. see commit 7103f60de8 KVM: 
avoid unnecessary synchronize_rc and the qemu fixes for s390 to bring all vCPU 
ioctls in the right thread).
- It makes vcpu_load cheaper
- It emphasizes what in api.txt:  Only run vcpu ioctls from the same thread 
that was used to create the
   vcpu.


Acked-by: Christian Borntraeger borntrae...@de.ibm.com

 ---
  include/linux/kvm_host.h |  1 +
  virt/kvm/kvm_main.c  | 18 ++
  2 files changed, 3 insertions(+), 16 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index aa56894..f1fe655 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -245,6 +245,7 @@ struct kvm_vcpu {
   int fpu_active;
   int guest_fpu_loaded, guest_xcr0_loaded;
   wait_queue_head_t wq;
 + /* the pid owning this vcpu - target for vcpu yields */
   struct pid *pid;
   int sigset_active;
   sigset_t sigset;
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 184f52e..4ba7810 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 
   if (mutex_lock_killable(vcpu-mutex))
   return -EINTR;
 - if (unlikely(vcpu-pid != current-pids[PIDTYPE_PID].pid)) {
 - /* The thread running this VCPU changed. */
 - struct pid *oldpid = vcpu-pid;
 - struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 - rcu_assign_pointer(vcpu-pid, newpid);
 - if (oldpid)
 - synchronize_rcu();
 - put_pid(oldpid);
 - }
   cpu = get_cpu();
   preempt_notifier_register(vcpu-preempt_notifier);
   kvm_arch_vcpu_load(vcpu, cpu);
 @@ -220,7 +211,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, 
 unsigned id)
   vcpu-cpu = -1;
   vcpu-kvm = kvm;
   vcpu-vcpu_id = id;
 - vcpu-pid = NULL;
 + vcpu-pid = get_task_pid(current, PIDTYPE_PID);
   init_waitqueue_head(vcpu-wq);
   kvm_async_pf_vcpu_init(vcpu);
 
 @@ -1771,15 +1762,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 
  int kvm_vcpu_yield_to(struct kvm_vcpu *target)
  {
 - struct pid *pid;
   struct task_struct *task = NULL;
   int ret = 0;
 
 - rcu_read_lock();
 - pid = rcu_dereference(target-pid);
 - if (pid)
 - task = get_pid_task(pid, PIDTYPE_PID);
 - rcu_read_unlock();
 + task = get_pid_task(target-pid, PIDTYPE_PID);
   if (!task)
   return ret;
   ret = yield_to(task, 1);
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 1/2] KVM: don't check for PF_VCPU when yielding

2014-11-26 Thread Christian Borntraeger

Am 26.11.2014 um 10:23 schrieb David Hildenbrand:
 This change is a trade-off.
 PRO: This patch would improve the case of preemption on s390. This is 
 probably a corner case as most distros have preemption off anyway.
 CON: The downside is that kvm_vcpu_yield_to is called also from 
 kvm_vcpu_on_spin. Here we want to avoid the scheduler overhead for a wrong 
 decision.   
 
 Won't most of that part be covered by:
   if (!ACCESS_ONCE(vcpu-preempted))

Hmm, right. Checking vcpu-preempted and PF_VCPU might boil down to the same.
Would be good if to have to performance regression test, though. 

 
 vcpu-preempted is only set when scheduled out involuntarily. It is cleared
 when scheduled in. s390 sets it manually, to speed up waking up a vcpu.
 
 So when our task is scheduled in (an PF_VCPU is set), this check will already
 avoid scheduler overhead in kvm_vcpu_on_spin() or am I missing something?
 

CC Raghavendra KT. Could be rerun your kernbench/sysbench/ebizzy setup on x86 
to see if the patch in this thread causes any regression? If think your commit 
7bc7ae25b143kvm: Iterate over only vcpus that are preempted might have really 
made the PF_VCPU check unnecessary

CC Michael Mueller, do we still have our yield performance setup handy to check 
if this patch causes any regression?


Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 1/2] KVM: don't check for PF_VCPU when yielding

2014-11-28 Thread Christian Borntraeger

Am 28.11.2014 um 11:08 schrieb Raghavendra KT:
 Was able to test the patch, here is the result: I have not tested with
 bigger VMs though. Results make it difficult to talk about any side
 effect of
 patch if any.

Thanks a log.

If our assumption is correct, then this patch should have no side effect on 
x86. Do you have any confidence guess if the numbers below mean: no-change vs. 
regression vs improvement?

Christian


 
 System 16 core 32cpu (+ht) sandybridge
 with 4 guests of 16vcpu each
 
 +---+---+---++---+
  kernbench (time taken lower is better)
 +---+---+---++---+
  base   %stdev  patched  %stdev%improvement
 +---+---+---++---+
 1x   53.1421 2.308654.6671 2.9673  -2.86966
 2x   89.6858 6.454094.0626 6.8317  -4.88015
 +---+---+---++---+
 
 +---+---+---++---+
  ebizzy  (recors/sec higher is better)
 +---+---+---++---+
  base%stdev  patched  %stdev%improvement
 +---+---+---++---+
 1x 14523.2500 8.438814928.8750 3.0478   2.79294
 2x  3338.8750 1.4592 3270.8750 2.3980  -2.03661
 +---+---+---++---+
 +---+---+---++---+
  dbench  (Throughput higher is better)
 +---+---+---++---+
  base   %stdev   patched  %stdev%improvement
 +---+---+---++---+
 1x  6386.4737 1.04876703.9113 1.2298   4.97047
 2x  2571.4712 1.37332571.8175 1.6919   0.01347
 +---+---+---++---+
 
 Raghu
 
 On Wed, Nov 26, 2014 at 3:01 PM, Christian Borntraeger
 borntrae...@de.ibm.com wrote:
 Am 26.11.2014 um 10:23 schrieb David Hildenbrand:
 This change is a trade-off.
 PRO: This patch would improve the case of preemption on s390. This is 
 probably a corner case as most distros have preemption off anyway.
 CON: The downside is that kvm_vcpu_yield_to is called also from 
 kvm_vcpu_on_spin. Here we want to avoid the scheduler overhead for a wrong 
 decision.

 Won't most of that part be covered by:
   if (!ACCESS_ONCE(vcpu-preempted))

 Hmm, right. Checking vcpu-preempted and PF_VCPU might boil down to the same.
 Would be good if to have to performance regression test, though.


 vcpu-preempted is only set when scheduled out involuntarily. It is cleared
 when scheduled in. s390 sets it manually, to speed up waking up a vcpu.

 So when our task is scheduled in (an PF_VCPU is set), this check will 
 already
 avoid scheduler overhead in kvm_vcpu_on_spin() or am I missing something?


 CC Raghavendra KT. Could be rerun your kernbench/sysbench/ebizzy setup on 
 x86 to see if the patch in this thread causes any regression? If think your 
 commit 7bc7ae25b143kvm: Iterate over only vcpus that are preempted might 
 have really made the PF_VCPU check unnecessary

 CC Michael Mueller, do we still have our yield performance setup handy to 
 check if this patch causes any regression?


 Christian

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 05/11] KVM: s390: refactor interrupt injection code

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

In preparation for the rework of the local interrupt injection code,
factor out injection routines from kvm_s390_inject_vcpu().

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 221 +++---
 1 file changed, 167 insertions(+), 54 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ead52bf..8f50f8c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -719,6 +719,16 @@ int __must_check 
kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
return rc;
 }
 
+static int __inject_prog_irq(struct kvm_vcpu *vcpu,
+struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   list_add(inti-list, li-list);
+   atomic_set(li-active, 1);
+   return 0;
+}
+
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 {
struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
@@ -746,6 +756,7 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 {
struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
struct kvm_s390_interrupt_info *inti;
+   int rc;
 
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
@@ -759,10 +770,133 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
inti-type = KVM_S390_PROGRAM_INT;
memcpy(inti-pgm, pgm_info, sizeof(inti-pgm));
spin_lock(li-lock);
-   list_add(inti-list, li-list);
-   atomic_set(li-active, 1);
+   rc = __inject_prog_irq(vcpu, inti);
BUG_ON(waitqueue_active(li-wq));
spin_unlock(li-lock);
+   return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu,
+   struct kvm_s390_interrupt *s390int,
+   struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   inti-ext.ext_params2 = s390int-parm64;
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
+   return 0;
+}
+
+static int __inject_extcall(struct kvm_vcpu *vcpu,
+   struct kvm_s390_interrupt *s390int,
+   struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   VCPU_EVENT(vcpu, 3, inject: external call source-cpu:%u,
+  s390int-parm);
+   if (s390int-parm  0x)
+   return -EINVAL;
+   inti-extcall.code = s390int-parm;
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
+   return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu,
+  struct kvm_s390_interrupt *s390int,
+  struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   VCPU_EVENT(vcpu, 3, inject: set prefix to %x (from user),
+  s390int-parm);
+   inti-prefix.address = s390int-parm;
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int,
+ struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   li-action_bits |= ACTION_STOP_ON_STOP;
+   return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+struct kvm_s390_interrupt *s390int,
+struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   VCPU_EVENT(vcpu, 3, inject: type %x, s390int-type);
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+  struct kvm_s390_interrupt *s390int,
+  struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
+
+   VCPU_EVENT(vcpu, 3, inject: emergency %u\n, s390int-parm);
+   if (s390int-parm  0x)
+   return -EINVAL;
+   inti-emerg.code = s390int-parm;
+   list_add_tail(inti-list, li-list);
+   atomic_set(li-active, 1);
+   atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
+   return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu,
+struct

[GIT PULL 01/11] KVM: s390: Small fixes for the PFMF handler

2014-11-28 Thread Christian Borntraeger

From: Thomas Huth th...@linux.vnet.ibm.com

This patch includes two small fixes for the PFMF handler: First, the
start address for PFMF has to be masked according to the current
addressing mode, which is now done with kvm_s390_logical_to_effective().
Second, the protection exceptions have a lower priority than the
specification exceptions, so the check for low-address protection
has to be moved after the last spot where we inject a specification
exception.

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/priv.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9bde32f..04f70fd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -646,10 +646,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
start = vcpu-run-s.regs.gprs[reg2]  PAGE_MASK;
-   if (vcpu-run-s.regs.gprs[reg1]  PFMF_CF) {
-   if (kvm_s390_check_low_addr_protection(vcpu, start))
-   return kvm_s390_inject_prog_irq(vcpu, vcpu-arch.pgm);
-   }
+   start = kvm_s390_logical_to_effective(vcpu, start);
 
switch (vcpu-run-s.regs.gprs[reg1]  PFMF_FSC) {
case 0x:
@@ -665,6 +662,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
+
+   if (vcpu-run-s.regs.gprs[reg1]  PFMF_CF) {
+   if (kvm_s390_check_low_addr_protection(vcpu, start))
+   return kvm_s390_inject_prog_irq(vcpu, vcpu-arch.pgm);
+   }
+
while (start  end) {
unsigned long useraddr, abs_addr;
 
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 07/11] KVM: s390: add defines for virtio and pfault interrupt code

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Get rid of open coded value for virtio and pfault completion interrupts.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bccda76..481f136 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -27,6 +27,8 @@
 #define IOINT_CSSID_MASK 0x03fc
 #define IOINT_AI_MASK 0x0400
 #define PFAULT_INIT 0x0600
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
 
@@ -391,7 +393,7 @@ static int __must_check __do_deliver_interrupt(struct 
kvm_vcpu *vcpu,
trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, inti-type, 0,
 inti-ext.ext_params2);
rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-   rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+   rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 vcpu-arch.sie_block-gpsw,
 sizeof(psw_t));
@@ -408,7 +410,7 @@ static int __must_check __do_deliver_interrupt(struct 
kvm_vcpu *vcpu,
 inti-ext.ext_params,
 inti-ext.ext_params2);
rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-   rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+   rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 
*)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 vcpu-arch.sie_block-gpsw,
 sizeof(psw_t));
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 11/11] KVM: s390: allow injecting all kinds of machine checks

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Allow to specify CR14, logout area, external damage code
and failed storage address.

Since more then one machine check can be indicated to the guest at
a time we need to combine all indication bits with already pending
requests.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1aa7f28..b3d4409 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1063,11 +1063,19 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct 
kvm_s390_irq *irq)
   mchk-mcic, 2);
 
/*
-* Combine mcic with previously injected machine checks and
-* indicate them all together as described in the Principles
-* of Operation, Chapter 11, Interruption action
+* Because repressible machine checks can be indicated along with
+* exigent machine checks (PoP, Chapter 11, Interruption action)
+* we need to combine cr14, mcic and external damage code.
+* Failing storage address and the logout area should not be or'ed
+* together, we just indicate the last occurrence of the corresponding
+* machine check
 */
+   mchk-cr14 |= irq-u.mchk.cr14;
mchk-mcic |= irq-u.mchk.mcic;
+   mchk-ext_damage_code |= irq-u.mchk.ext_damage_code;
+   mchk-failing_storage_address = irq-u.mchk.failing_storage_address;
+   memcpy(mchk-fixed_logout, irq-u.mchk.fixed_logout,
+  sizeof(mchk-fixed_logout));
if (mchk-mcic  MCHK_EX_MASK)
set_bit(IRQ_PEND_MCHK_EX, li-pending_irqs);
else if (mchk-mcic  MCHK_REP_MASK)
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 10/11] KVM: s390: handle pending local interrupts via bitmap

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

This patch adapts handling of local interrupts to be more compliant with
the z/Architecture Principles of Operation and introduces a data
structure
which allows more efficient handling of interrupts.

* get rid of li-active flag, use bitmap instead
* Keep interrupts in a bitmap instead of a list
* Deliver interrupts in the order of their priority as defined in the
  PoP
* Use a second bitmap for sigp emergency requests, as a CPU can have
  one request pending from every other CPU in the system.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h |   2 -
 arch/s390/kvm/intercept.c|   4 +-
 arch/s390/kvm/interrupt.c| 601 +++
 arch/s390/kvm/kvm-s390.c |  14 +-
 arch/s390/kvm/kvm-s390.h |   5 +-
 arch/s390/kvm/sigp.c |  36 +--
 6 files changed, 380 insertions(+), 282 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 624a821..9cba74d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -398,8 +398,6 @@ struct kvm_s390_irq_payload {
 
 struct kvm_s390_local_interrupt {
spinlock_t lock;
-   struct list_head list;
-   atomic_t active;
struct kvm_s390_float_interrupt *float_int;
wait_queue_head_t *wq;
atomic_t *cpuflags;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 1d244df..81c77ab 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -257,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu 
*vcpu)
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
u16 eic = vcpu-arch.sie_block-eic;
-   struct kvm_s390_interrupt irq;
+   struct kvm_s390_irq irq;
psw_t newpsw;
int rc;
 
@@ -282,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
if (kvm_s390_si_ext_call_pending(vcpu))
return 0;
irq.type = KVM_S390_INT_EXTERNAL_CALL;
-   irq.parm = vcpu-arch.sie_block-extcpuaddr;
+   irq.u.extcall.code = vcpu-arch.sie_block-extcpuaddr;
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0d7f0a7..1aa7f28 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
 #include linux/mmu_context.h
 #include linux/signal.h
 #include linux/slab.h
+#include linux/bitmap.h
 #include asm/asm-offsets.h
 #include asm/uaccess.h
 #include kvm-s390.h
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+   unsigned long active_mask = pending_local_irqs(vcpu);
+
+   if (psw_extint_disabled(vcpu))
+   active_mask = ~IRQ_PEND_EXT_MASK;
+   if (!(vcpu-arch.sie_block-gcr[0]  0x2000ul))
+   __clear_bit(IRQ_PEND_EXT_EXTERNAL, active_mask);
+   if (!(vcpu-arch.sie_block-gcr[0]  0x4000ul))
+   __clear_bit(IRQ_PEND_EXT_EMERGENCY, active_mask);
+   if (!(vcpu-arch.sie_block-gcr[0]  0x800ul))
+   __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, active_mask);
+   if (!(vcpu-arch.sie_block-gcr[0]  0x400ul))
+   __clear_bit(IRQ_PEND_EXT_CPU_TIMER, active_mask);
+   if (psw_mchk_disabled(vcpu))
+   active_mask = ~IRQ_PEND_MCHK_MASK;
+
+   return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
atomic_set_mask(CPUSTAT_WAIT, vcpu-arch.sie_block-cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
atomic_set_mask(flag, vcpu-arch.sie_block-cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+   if (!(pending_local_irqs(vcpu)  IRQ_PEND_EXT_MASK))
+   return;
+   if (psw_extint_disabled(vcpu))
+   __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+   else
+   vcpu-arch.sie_block-lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+   if (!(pending_local_irqs(vcpu)  IRQ_PEND_MCHK_MASK))
+   return;
+   if (psw_mchk_disabled(vcpu))
+   vcpu-arch.sie_block-ictl |= ICTL_LPSW;
+   else
+   vcpu-arch.sie_block-lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+   set_intercept_indicators_ext(vcpu

[GIT PULL 08/11] KVM: s390: refactor interrupt delivery code

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Move delivery code for cpu-local interrupt from the huge do_deliver_interrupt()
to smaller functions which handle one type of interrupt.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 459 --
 1 file changed, 282 insertions(+), 177 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 481f136..0d7f0a7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -30,8 +30,6 @@
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
-
 static int is_ioint(u64 type)
 {
return ((type  0xfffeu) != 0xfffeu);
@@ -228,12 +226,183 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
}
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+   int rc;
+
+   trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, KVM_S390_INT_CPU_TIMER,
+0, 0);
+
+   rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+  (u16 *)__LC_EXT_INT_CODE);
+   rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+   vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   return rc;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+   int rc;
+
+   trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, KVM_S390_INT_CLOCK_COMP,
+0, 0);
+
+   rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+  (u16 __user *)__LC_EXT_INT_CODE);
+   rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+   vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   return rc;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu,
+  struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_ext_info *ext = inti-ext;
+   int rc;
+
+   VCPU_EVENT(vcpu, 4, interrupt: pfault init parm:%x,parm64:%llx,
+  0, ext-ext_params2);
+   trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id,
+KVM_S390_INT_PFAULT_INIT,
+0, ext-ext_params2);
+
+   rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+   rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+   rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+   vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc |= put_guest_lc(vcpu, ext-ext_params2, (u64 *) __LC_EXT_PARAMS2);
+   return rc;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu,
+  struct kvm_s390_interrupt_info *inti)
+{
+   struct kvm_s390_mchk_info *mchk = inti-mchk;
+   int rc;
+
+   VCPU_EVENT(vcpu, 4, interrupt: machine check mcic=%llx,
+  mchk-mcic);
+   trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, KVM_S390_MCHK,
+mchk-cr14, mchk-mcic);
+
+   rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+   rc |= put_guest_lc(vcpu, mchk-mcic,
+  (u64 __user *) __LC_MCCK_CODE);
+   rc |= put_guest_lc(vcpu, mchk-failing_storage_address,
+  (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+   rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+mchk-fixed_logout, sizeof(mchk-fixed_logout));
+   rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+   vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   return rc;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+   int rc;
+
+   VCPU_EVENT(vcpu, 4, %s, interrupt: cpu restart);
+   vcpu-stat.deliver_restart_signal++;
+   trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, KVM_S390_RESTART, 0, 0);
+
+   rc  = write_guest_lc(vcpu,
+offsetof(struct _lowcore, restart_old_psw),
+vcpu-arch.sie_block-gpsw, sizeof(psw_t));
+   rc

[GIT PULL 02/11] KVM: s390: Fix rewinding of the PSW pointing to an EXECUTE instruction

2014-11-28 Thread Christian Borntraeger

From: Thomas Huth th...@linux.vnet.ibm.com

A couple of our interception handlers rewind the PSW to the beginning
of the instruction to run the intercepted instruction again during the
next SIE entry. This normally works fine, but there is also the
possibility that the instruction did not get run directly but via an
EXECUTE instruction.
In this case, the PSW does not point to the instruction that caused the
interception, but to the EXECUTE instruction! So we've got to rewind the
PSW to the beginning of the EXECUTE instruction instead.
This is now accomplished with a new helper function kvm_s390_rewind_psw().

Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h |  2 +-
 arch/s390/kvm/intercept.c| 16 ++--
 arch/s390/kvm/kvm-s390.h |  6 --
 arch/s390/kvm/priv.c | 12 
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 7e02d77..ac7b074 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
 #define ICPT_PARTEXEC  0x38
 #define ICPT_IOINST0x40
__u8icptcode;   /* 0x0050 */
-   __u8reserved51; /* 0x0051 */
+   __u8icptstatus; /* 0x0051 */
__u16   ihcpu;  /* 0x0052 */
__u8reserved54[2];  /* 0x0054 */
__u16   ipa;/* 0x0056 */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf4629..1d244df 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = 
{
[0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+   struct kvm_s390_sie_block *sie_block = vcpu-arch.sie_block;
+
+   /* Use the length of the EXECUTE instruction if necessary */
+   if (sie_block-icptstatus  1) {
+   ilc = (sie_block-icptstatus  4)  0x6;
+   if (!ilc)
+   ilc = 4;
+   }
+   sie_block-gpsw.addr = __rewind_psw(sie_block-gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
switch (vcpu-arch.sie_block-icptcode) {
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-   psw_t *psw = vcpu-arch.sie_block-gpsw;
unsigned long srcaddr, dstaddr;
int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
if (rc != 0)
return rc;
 
-   psw-addr = __rewind_psw(*psw, 4);
+   kvm_s390_rewind_psw(vcpu, 4);
 
return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d023..ff8d977 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)((vcpu-arch.sie_block-ecb  0x10))
 #define TDB_FORMAT11
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
  struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 04f70fd..b37db1a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -176,21 +176,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
if (vcpu-arch.sie_block-gpsw.mask  PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-   vcpu-arch.sie_block-gpsw.addr =
-   __rewind_psw(vcpu-arch.sie_block-gpsw, 4);
+   kvm_s390_rewind_psw(vcpu, 4);
VCPU_EVENT(vcpu, 4, %s, retrying storage key operation);
return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-   psw_t *psw = vcpu-arch.sie_block-gpsw;
-
vcpu-stat.instruction_ipte_interlock++;
-   if (psw_bits(*psw).p)
+   if (psw_bits(vcpu-arch.sie_block-gpsw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu-kvm-arch.ipte_wq, !ipte_lock_held(vcpu));
-   psw-addr = __rewind_psw(*psw, 4

[GIT PULL 00/11] KVM: s390: Several changes for 3.19 (kvm/next)

2014-11-28 Thread Christian Borntraeger

Paolo,

here is a bunch of changes dealing mostly with architectural compliance.
I have deferred the TOD clock interface (as well as a storage key
read/write) as we found some issues in our final internal review.
Depending on Linus schedule these might have to wait for the next merge
window or might still come next week as I want to gives other a chance
to comment on interface changes.

The following changes since commit b65d6e17fe2239c9b2051727903955d922083fbf:

  kvm: x86: mask out XSAVES (2014-11-23 18:33:37 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20141128

for you to fetch changes up to fc2020cfe9f8102d17dad79ed96dc68a9d84b19e:

  KVM: s390: allow injecting all kinds of machine checks (2014-11-28 13:59:05 
+0100)


KVM: s390: Several fixes,cleanups and reworks

Here is a bunch of fixes that deal mostly with architectural compliance:
- interrupt priorities
- interrupt handling
- intruction exit handling

We also provide a helper function for getting the guest visible storage key.


Christian Borntraeger (1):
  KVM: s390: trigger the right CPU exit for floating interrupts

David Hildenbrand (1):
  KVM: s390: external param not valid for cpu timer and ckc

Jason J. Herne (1):
  KVM: S390: Create helper function get_guest_storage_key

Jens Freimann (6):
  KVM: s390: refactor interrupt injection code
  KVM: s390: add defines for virtio and pfault interrupt code
  KVM: s390: refactor interrupt delivery code
  KVM: s390: add bitmap for handling cpu-local interrupts
  KVM: s390: handle pending local interrupts via bitmap
  KVM: s390: allow injecting all kinds of machine checks

Thomas Huth (2):
  KVM: s390: Small fixes for the PFMF handler
  KVM: s390: Fix rewinding of the PSW pointing to an EXECUTE instruction

 arch/s390/include/asm/kvm_host.h |   90 +++-
 arch/s390/include/asm/pgalloc.h  |1 +
 arch/s390/kvm/intercept.c|   20 +-
 arch/s390/kvm/interrupt.c| 1037 +-
 arch/s390/kvm/kvm-s390.c |   14 +-
 arch/s390/kvm/kvm-s390.h |   11 +-
 arch/s390/kvm/priv.c |   23 +-
 arch/s390/kvm/sigp.c |   36 +-
 arch/s390/mm/pgtable.c   |   39 ++
 9 files changed, 872 insertions(+), 399 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 03/11] KVM: s390: trigger the right CPU exit for floating interrupts

2014-11-28 Thread Christian Borntraeger

When injecting a floating interrupt and no CPU is idle we
kick one CPU to do an external exit. In case of I/O we
should trigger an I/O exit instead. This does not matter
for Linux guests as external and I/O interrupts are
enabled/disabled at the same time, but play safe anyway.

The same holds true for machine checks. Since there is no
special exit, just reuse the generic stop exit. The injection
code inside the VCPU loop will recheck anyway and rearm the
proper exits (e.g. control registers) if necessary.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
---
 arch/s390/kvm/interrupt.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4fc3fed..ead52bf 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -851,7 +851,17 @@ static int __inject_vm(struct kvm *kvm, struct 
kvm_s390_interrupt_info *inti)
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
li = dst_vcpu-arch.local_int;
spin_lock(li-lock);
-   atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
+   switch (inti-type) {
+   case KVM_S390_MCHK:
+   atomic_set_mask(CPUSTAT_STOP_INT, li-cpuflags);
+   break;
+   case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+   atomic_set_mask(CPUSTAT_IO_INT, li-cpuflags);
+   break;
+   default:
+   atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
+   break;
+   }
spin_unlock(li-lock);
kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 04/11] KVM: S390: Create helper function get_guest_storage_key

2014-11-28 Thread Christian Borntraeger

From: Jason J. Herne jjhe...@linux.vnet.ibm.com

Define get_guest_storage_key which can be used to get the value of a guest
storage key. This compliments the functionality provided by the helper function
set_guest_storage_key. Both functions are needed for live migration of s390
guests that use storage keys.

Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/pgalloc.h |  1 +
 arch/s390/mm/pgtable.c  | 39 +++
 2 files changed, 40 insertions(+)

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d39a31c..ede2eab 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -26,6 +26,7 @@ void page_table_reset_pgste(struct mm_struct *, unsigned 
long, unsigned long,
bool init_skey);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index cfecc24..0b18585 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -980,6 +980,45 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+   spinlock_t *ptl;
+   pgste_t pgste;
+   pte_t *ptep;
+   uint64_t physaddr;
+   unsigned long key = 0;
+
+   down_read(mm-mmap_sem);
+   ptep = get_locked_pte(mm, addr, ptl);
+   if (unlikely(!ptep)) {
+   up_read(mm-mmap_sem);
+   return -EFAULT;
+   }
+   pgste = pgste_get_lock(ptep);
+
+   if (pte_val(*ptep)  _PAGE_INVALID) {
+   key |= (pgste_val(pgste)  PGSTE_ACC_BITS)  56;
+   key |= (pgste_val(pgste)  PGSTE_FP_BIT)  56;
+   key |= (pgste_val(pgste)  PGSTE_GR_BIT)  48;
+   key |= (pgste_val(pgste)  PGSTE_GC_BIT)  48;
+   } else {
+   physaddr = pte_val(*ptep)  PAGE_MASK;
+   key = page_get_storage_key(physaddr);
+
+   /* Reflect guest's logical view, not physical */
+   if (pgste_val(pgste)  PGSTE_GR_BIT)
+   key |= _PAGE_REFERENCED;
+   if (pgste_val(pgste)  PGSTE_GC_BIT)
+   key |= _PAGE_CHANGED;
+   }
+
+   pgste_set_unlock(ptep, pgste);
+   pte_unmap_unlock(ptep, ptl);
+   up_read(mm-mmap_sem);
+   return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 09/11] KVM: s390: add bitmap for handling cpu-local interrupts

2014-11-28 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Adds a bitmap to the vcpu structure which is used to keep track
of local pending interrupts. Also add enum with all interrupt
types sorted in order of priority (highest to lowest)

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h | 86 
 1 file changed, 86 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index ac7b074..624a821 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -295,6 +295,79 @@ struct kvm_vcpu_stat {
 #define PGM_PER0x80
 #define PGM_CRYPTO_OPERATION   0x119
 
+/* irq types in order of priority */
+enum irq_types {
+   IRQ_PEND_MCHK_EX = 0,
+   IRQ_PEND_SVC,
+   IRQ_PEND_PROG,
+   IRQ_PEND_MCHK_REP,
+   IRQ_PEND_EXT_IRQ_KEY,
+   IRQ_PEND_EXT_MALFUNC,
+   IRQ_PEND_EXT_EMERGENCY,
+   IRQ_PEND_EXT_EXTERNAL,
+   IRQ_PEND_EXT_CLOCK_COMP,
+   IRQ_PEND_EXT_CPU_TIMER,
+   IRQ_PEND_EXT_TIMING,
+   IRQ_PEND_EXT_SERVICE,
+   IRQ_PEND_EXT_HOST,
+   IRQ_PEND_PFAULT_INIT,
+   IRQ_PEND_PFAULT_DONE,
+   IRQ_PEND_VIRTIO,
+   IRQ_PEND_IO_ISC_0,
+   IRQ_PEND_IO_ISC_1,
+   IRQ_PEND_IO_ISC_2,
+   IRQ_PEND_IO_ISC_3,
+   IRQ_PEND_IO_ISC_4,
+   IRQ_PEND_IO_ISC_5,
+   IRQ_PEND_IO_ISC_6,
+   IRQ_PEND_IO_ISC_7,
+   IRQ_PEND_SIGP_STOP,
+   IRQ_PEND_RESTART,
+   IRQ_PEND_SET_PREFIX,
+   IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL  MCHK_DEGR_BIT) | \
+  (1UL  MCHK_EXTD_BIT) | \
+  (1UL  MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL  MCHK_SD_BIT) | (1UL  MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL  IRQ_PEND_EXT_IRQ_KEY)| \
+  (1UL  IRQ_PEND_EXT_CLOCK_COMP) | \
+  (1UL  IRQ_PEND_EXT_CPU_TIMER)  | \
+  (1UL  IRQ_PEND_EXT_MALFUNC)| \
+  (1UL  IRQ_PEND_EXT_EMERGENCY)  | \
+  (1UL  IRQ_PEND_EXT_EXTERNAL)   | \
+  (1UL  IRQ_PEND_EXT_TIMING) | \
+  (1UL  IRQ_PEND_EXT_HOST)   | \
+  (1UL  IRQ_PEND_EXT_SERVICE)| \
+  (1UL  IRQ_PEND_VIRTIO) | \
+  (1UL  IRQ_PEND_PFAULT_INIT)| \
+  (1UL  IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL  IRQ_PEND_IO_ISC_0) | \
+ (1UL  IRQ_PEND_IO_ISC_1) | \
+ (1UL  IRQ_PEND_IO_ISC_2) | \
+ (1UL  IRQ_PEND_IO_ISC_3) | \
+ (1UL  IRQ_PEND_IO_ISC_4) | \
+ (1UL  IRQ_PEND_IO_ISC_5) | \
+ (1UL  IRQ_PEND_IO_ISC_6) | \
+ (1UL  IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL  IRQ_PEND_MCHK_REP) | \
+   (1UL  IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -313,6 +386,16 @@ struct kvm_s390_interrupt_info {
 #define ACTION_STORE_ON_STOP   (10)
 #define ACTION_STOP_ON_STOP(11)
 
+struct kvm_s390_irq_payload {
+   struct kvm_s390_io_info io;
+   struct kvm_s390_ext_info ext;
+   struct kvm_s390_pgm_info pgm;
+   struct kvm_s390_emerg_info emerg;
+   struct kvm_s390_extcall_info extcall;
+   struct kvm_s390_prefix_info prefix;
+   struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
spinlock_t lock;
struct list_head list;
@@ -321,6 +404,9 @@ struct kvm_s390_local_interrupt {
wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
+   DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+   struct kvm_s390_irq_payload irq;
+   unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 06/11] KVM: s390: external param not valid for cpu timer and ckc

2014-11-28 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

The 32bit external interrupt parameter is only valid for timing-alert and
service-signal interrupts.

Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 8f50f8c..bccda76 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -345,12 +345,12 @@ static int __must_check __do_deliver_interrupt(struct 
kvm_vcpu *vcpu,
break;
case KVM_S390_INT_CLOCK_COMP:
trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, inti-type,
-inti-ext.ext_params, 0);
+0, 0);
rc = deliver_ckc_interrupt(vcpu);
break;
case KVM_S390_INT_CPU_TIMER:
trace_kvm_s390_deliver_interrupt(vcpu-vcpu_id, inti-type,
-inti-ext.ext_params, 0);
+0, 0);
rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
   (u16 *)__LC_EXT_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
@@ -358,8 +358,6 @@ static int __must_check __do_deliver_interrupt(struct 
kvm_vcpu *vcpu,
 sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
-   rc |= put_guest_lc(vcpu, inti-ext.ext_params,
-  (u32 *)__LC_EXT_PARAMS);
break;
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, interrupt: sclp parm:%x,
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PULL 09/11] KVM: s390: add bitmap for handling cpu-local interrupts

2014-11-29 Thread Christian Borntraeger

Am 28.11.2014 um 18:18 schrieb Paolo Bonzini:
 
 
 On 28/11/2014 14:25, Christian Borntraeger wrote:
  
 +struct kvm_s390_irq_payload {
 +struct kvm_s390_io_info io;
 +struct kvm_s390_ext_info ext;
 +struct kvm_s390_pgm_info pgm;
 +struct kvm_s390_emerg_info emerg;
 +struct kvm_s390_extcall_info extcall;
 +struct kvm_s390_prefix_info prefix;
 +struct kvm_s390_mchk_info mchk;
 +};
 +
 
 struct or union?

struct. This is used for keeping the payload of the interrupts. Multiple 
different interrupts can be pending and most of them have payload - we want to 
keep everything.

Now, looking at that code again, as I/O is floating and emergency is also 
handled via a separate bitmap we could get rid of these two in a follow-up 
patch. Jens, can you have a look and prepare a followup-cleanup if appropriate?
Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PULL 05/11] KVM: s390: refactor interrupt injection code

2014-12-01 Thread Christian Borntraeger

Am 28.11.2014 um 18:16 schrieb Paolo Bonzini:
 
 
 On 28/11/2014 14:25, Christian Borntraeger wrote:
  
 +static int __inject_prog_irq(struct kvm_vcpu *vcpu,
 + struct kvm_s390_interrupt_info *inti)
 +{
 
 Why the __s? :)

In this case because its called with a lock already held. (from 
kvm_s390_inject_vcpu).
I usually extend the usage of the __ prefix to heho, pay attention. YOu are 
calling this function, but you are maybe supposed to do something else).

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 0/2] assign each vcpu an owning thread and improve yielding

2014-12-03 Thread Christian Borntraeger

Am 03.12.2014 um 13:54 schrieb Paolo Bonzini:
 
 
 On 03/12/2014 13:12, David Hildenbrand wrote:
 This series improves yielding on architectures that cannot disable 
 preemption
 while entering the guest and makes the creating thread of a VCPU the owning
 thread and therefore the yield target when yielding to that VCPU.

 We should focus on the case creating thread == executing thread and 
 therefore
 remove the complicated handling of PIDs involving synchronize_rcus.

 This way we can speed up the creation of VCPUs and directly yield to the
 executing vcpu threads.

 Please note that - in theory - all VCPU ioctls should be triggered from the 
 same
 VCPU thread, so changing threads is not a scenario we should optimize.


 David Hildenbrand (2):
   KVM: don't check for PF_VCPU when yielding
   KVM: thread creating a vcpu is the owner of that vcpu

  include/linux/kvm_host.h |  1 +
  virt/kvm/kvm_main.c  | 22 ++
  2 files changed, 3 insertions(+), 20 deletions(-)


 Hi Paolo,

 would be good if you could have a look at these patches.
 
 Sure.
 
 I think patch 1 is fine and I am applying it.  For patch 2, what about
 moving the -pid assignment in the KVM_RUN case of kvm_vcpu_ioctl?

That was my initial patch for the rcu specific latencies (do you remember?) But 
IMHO  patch 2 is actually the proper thing to do, no?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 0/3] KVM: s390: Fixups for kvm/next (3.19)

2014-12-04 Thread Christian Borntraeger

Paolo,

some last minute changes for 3.19 as followup for the latest interrupt
rework + a small architectural compliance fix.

Christian

The following changes since commit be06b6bece19be5b167d863fd6c5271e4ec8f1fa:

  Merge tag 'kvm-s390-next-20141128' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD 
(2014-12-03 15:20:11 +0100)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
tags/kvm-s390-next-20141204

for you to fetch changes up to 99e20009aeee47049900ac152d7a88d4f68697d3:

  KVM: s390: clean up return code handling in irq delivery code (2014-12-04 
16:39:00 +0100)


KVM: s390: Fixups for kvm/next (3.19)

Here we have two fixups of the latest interrupt rework and
one architectural fixup.


David Hildenbrand (1):
  KVM: s390: some ext irqs have to clear the ext cpu addr

Jens Freimann (2):
  KVM: s390: use atomic bitops to access pending_irqs bitmap
  KVM: s390: clean up return code handling in irq delivery code

 arch/s390/kvm/interrupt.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 1/3] KVM: s390: some ext irqs have to clear the ext cpu addr

2014-12-04 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

The cpu address of a source cpu (responsible for an external irq) is only to
be stored if bit 6 of the ext irq code is set.

If bit 6 is not set, it is to be zeroed out.

The special external irq code used for virtio and pfault uses the cpu addr as a
parameter field. As bit 6 is set, this implementation is correct.

Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com
Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b3d4409..6c0d14b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -281,6 +281,7 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu 
*vcpu)
 
rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
   (u16 *)__LC_EXT_INT_CODE);
+   rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -299,6 +300,7 @@ static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
 
rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
   (u16 __user *)__LC_EXT_INT_CODE);
+   rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -585,6 +587,7 @@ static int __must_check __deliver_service(struct kvm_vcpu 
*vcpu,
 inti-ext.ext_params, 0);
 
rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+   rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL 3/3] KVM: s390: clean up return code handling in irq delivery code

2014-12-04 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Instead of returning a possibly random or'ed together value, let's
always return -EFAULT if rc is set.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 86bc89a..f00f31e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -287,7 +287,7 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu 
*vcpu)
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
clear_bit(IRQ_PEND_EXT_CPU_TIMER, li-pending_irqs);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
@@ -306,7 +306,7 @@ static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, li-pending_irqs);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
@@ -334,7 +334,7 @@ static int __must_check __deliver_pfault_init(struct 
kvm_vcpu *vcpu)
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
@@ -371,7 +371,7 @@ static int __must_check __deliver_machine_check(struct 
kvm_vcpu *vcpu)
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
@@ -389,7 +389,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu 
*vcpu)
rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
clear_bit(IRQ_PEND_RESTART, li-pending_irqs);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
@@ -450,7 +450,7 @@ static int __must_check __deliver_emergency_signal(struct 
kvm_vcpu *vcpu)
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
@@ -478,7 +478,7 @@ static int __must_check __deliver_external_call(struct 
kvm_vcpu *vcpu)
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, vcpu-arch.sie_block-gpsw,
sizeof(psw_t));
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
@@ -572,7 +572,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu 
*vcpu)
 vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
@@ -594,7 +594,7 @@ static int __must_check __deliver_service(struct kvm_vcpu 
*vcpu,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= put_guest_lc(vcpu, inti-ext.ext_params,
   (u32 *)__LC_EXT_PARAMS);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
@@ -614,7 +614,7 @@ static int __must_check __deliver_pfault_done(struct 
kvm_vcpu *vcpu,
vcpu-arch.sie_block-gpsw, sizeof(psw_t));
rc |= put_guest_lc(vcpu, inti-ext.ext_params2,
   (u64 *)__LC_EXT_PARAMS2);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
@@ -639,7 +639,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu 
*vcpu,
   (u32 *)__LC_EXT_PARAMS);
rc |= put_guest_lc(vcpu, inti-ext.ext_params2,
   (u64 *)__LC_EXT_PARAMS2);
-   return rc;
+   return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_io

[GIT PULL 2/3] KVM: s390: use atomic bitops to access pending_irqs bitmap

2014-12-04 Thread Christian Borntraeger

From: Jens Freimann jf...@linux.vnet.ibm.com

Currently we use a mixture of atomic/non-atomic bitops
and the local_int spin lock to protect the pending_irqs bitmap
and interrupt payload data.

We need to use atomic bitops for the pending_irqs bitmap everywhere
and in addition acquire the local_int lock where interrupt data needs
to be protected.

Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 6c0d14b..86bc89a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -929,7 +929,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct 
kvm_s390_irq *irq)
struct kvm_s390_local_interrupt *li = vcpu-arch.local_int;
 
li-irq.pgm = irq-u.pgm;
-   __set_bit(IRQ_PEND_PROG, li-pending_irqs);
+   set_bit(IRQ_PEND_PROG, li-pending_irqs);
return 0;
 }
 
@@ -995,7 +995,7 @@ int __inject_extcall(struct kvm_vcpu *vcpu, struct 
kvm_s390_irq *irq)
   irq-u.extcall.code, 0, 2);
 
*extcall = irq-u.extcall;
-   __set_bit(IRQ_PEND_EXT_EXTERNAL, li-pending_irqs);
+   set_bit(IRQ_PEND_EXT_EXTERNAL, li-pending_irqs);
atomic_set_mask(CPUSTAT_EXT_INT, li-cpuflags);
return 0;
 }
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH/RFC] s390/kernel: use stnsm instead of stosm

2014-12-16 Thread Christian Borntraeger

At least on z196 stnsm is faster than stosm.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/irqflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 37b9091..16aa0c7 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -36,7 +36,7 @@ static inline notrace void __arch_local_irq_ssm(unsigned long 
flags)
 
 static inline notrace unsigned long arch_local_save_flags(void)
 {
-   return __arch_local_irq_stosm(0x00);
+   return __arch_local_irq_stnsm(0xff);
 }
 
 static inline notrace unsigned long arch_local_irq_save(void)
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH/RFC] s390/kernel: use stnsm instead of stosm

2014-12-16 Thread Christian Borntraeger

Paolo,


sorry, should have only go to Martin and Heiko. 
Nothing to worry about from your side. :-)


Am 16.12.2014 um 10:30 schrieb Christian Borntraeger:
 At least on z196 stnsm is faster than stosm.
 
 Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
 ---
  arch/s390/include/asm/irqflags.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/arch/s390/include/asm/irqflags.h 
 b/arch/s390/include/asm/irqflags.h
 index 37b9091..16aa0c7 100644
 --- a/arch/s390/include/asm/irqflags.h
 +++ b/arch/s390/include/asm/irqflags.h
 @@ -36,7 +36,7 @@ static inline notrace void __arch_local_irq_ssm(unsigned 
 long flags)
 
  static inline notrace unsigned long arch_local_save_flags(void)
  {
 - return __arch_local_irq_stosm(0x00);
 + return __arch_local_irq_stnsm(0xff);
  }
 
  static inline notrace unsigned long arch_local_irq_save(void)
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 1/1] KVM: s390: Add MEMOP ioctl for reading/writing guest memory

2015-02-04 Thread Christian Borntraeger

Am 03.02.2015 um 16:22 schrieb Paolo Bonzini:
 
 
 On 03/02/2015 16:16, Thomas Huth wrote:
 Actually, I'd prefer to keep the virtual in the defines for the type
 of operation below: When it comes to s390 storage keys, we likely might
 need some calls for reading and writing to physical memory, too. Then
 we could simply extend this ioctl instead of inventing a new one.

Rereading that. Shall we replace virtual with logical? That is what is
used architecturally when we mean do whatever is appropriate right now
That can boil down to virtual via DAT, virtual via access register mode, 
real if DAT is off... and if fact your kernel implementation does that.


 
 Can you explain why it is necessary to read/write physical addresses
 from user space?  In the case of QEMU, I'm worried that you would have
 to invent your own memory read/write APIs that are different from
 everything else.
 
 On real s390 zPCI, does bus-master DMA update storage keys?

the classic channel I/O does set the storage key change/reference and
also triggers errors in the storage key protection value mismatches.

The PCI IOTA structure does contain a storage key value for accesses,
so I assume its the same here, but I dont know for sure.

Conny:
I am asking myself, if we should explicitly add a comment in the 
virtio-ccw spec, that all accesses are assumed to be with key 0 and 
thus never cause key protection. The change/reference bit is set
by the underlying I/O or memory copy anyway.
We can then add a ccw later on to set a different key if we ever need
that.


 
 Not really true, as you don't check it.  So It is not used by KVM with
 the currently defined set of flags is a better explanation.

 ok ... and maybe add should be set to zero ?
 
 If you don't check it, it is misleading to document this.
 
 Paolo
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/7] s390/kernel: Update /proc/sysinfo file with Extended Name and UUID

2015-02-04 Thread Christian Borntraeger

From: Ekaterina Tumanova tuman...@linux.vnet.ibm.com

Read the additional data fields (Extended Name and UUID) from the
1KB block returned by the STSI command and reflect this information in
the /proc/sysinfo file accordingly

Signed-off-by: Ekaterina Tumanova tuman...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Acked-by: Heiko Carstens heiko.carst...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/sysinfo.h | 10 +++---
 arch/s390/kernel/sysinfo.c  | 29 +
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index f92428e..9f8f2b5 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -15,6 +15,7 @@
 #define __ASM_S390_SYSINFO_H
 
 #include asm/bitsperlong.h
+#include linux/uuid.h
 
 struct sysinfo_1_1_1 {
unsigned char p:1;
@@ -112,10 +113,13 @@ struct sysinfo_3_2_2 {
char name[8];
unsigned int caf;
char cpi[16];
-   char reserved_1[24];
-
+   char reserved_1[3];
+   char ext_name_encoding;
+   unsigned int reserved_2;
+   uuid_be uuid;
} vm[8];
-   char reserved_544[3552];
+   char reserved_3[1504];
+   char ext_names[8][256];
 };
 
 extern int topology_max_mnest;
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 811f542..cebab77 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -196,6 +196,33 @@ static void stsi_2_2_2(struct seq_file *m, struct 
sysinfo_2_2_2 *info)
seq_printf(m, LPAR CPUs Shared: %d\n, info-cpus_shared);
 }
 
+static void print_ext_name(struct seq_file *m, int lvl,
+  struct sysinfo_3_2_2 *info)
+{
+   if (info-vm[lvl].ext_name_encoding == 0)
+   return;
+   if (info-ext_names[lvl][0] == 0)
+   return;
+   switch (info-vm[lvl].ext_name_encoding) {
+   case 1: /* EBCDIC */
+   EBCASC(info-ext_names[lvl], sizeof(info-ext_names[lvl]));
+   break;
+   case 2: /* UTF-8 */
+   break;
+   default:
+   return;
+   }
+   seq_printf(m, VM%02d Extended Name:   %-.256s\n, lvl,
+  info-ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+   if (!memcmp(info-vm[i].uuid, NULL_UUID_BE, sizeof(uuid_be)))
+   return;
+   seq_printf(m, VM%02d UUID:%pUb\n, i, info-vm[i].uuid);
+}
+
 static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
 {
int i;
@@ -213,6 +240,8 @@ static void stsi_3_2_2(struct seq_file *m, struct 
sysinfo_3_2_2 *info)
seq_printf(m, VM%02d CPUs Configured: %d\n, i, 
info-vm[i].cpus_configured);
seq_printf(m, VM%02d CPUs Standby:%d\n, i, 
info-vm[i].cpus_standby);
seq_printf(m, VM%02d CPUs Reserved:   %d\n, i, 
info-vm[i].cpus_reserved);
+   print_ext_name(m, i, info);
+   print_uuid(m, i, info);
}
 }
 
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/7] KVM: s390: floating irqs: fix user triggerable endless loop

2015-02-04 Thread Christian Borntraeger

From: David Hildenbrand d...@linux.vnet.ibm.com

If a vm with no VCPUs is created, the injection of a floating irq
leads to an endless loop in the kernel.

Let's skip the search for a destination VCPU for a floating irq if no
VCPUs were created.

Reviewed-by: Dominik Dingel din...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Signed-off-by: David Hildenbrand d...@linux.vnet.ibm.com
Cc: sta...@vger.kernel.org # v3.15+
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/interrupt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c34e1d9..073b5f3 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1244,6 +1244,8 @@ static int __inject_vm(struct kvm *kvm, struct 
kvm_s390_interrupt_info *inti)
list_add_tail(inti-list, iter-list);
}
atomic_set(fi-active, 1);
+   if (atomic_read(kvm-online_vcpus) == 0)
+   goto unlock_fi;
sigcpu = find_first_bit(fi-idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
do {
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/7] KVM: s390: fixes and features for kvm/next (3.20)

2015-02-04 Thread Christian Borntraeger

Paolo,

here is my remaining bunch of patches for 3.20 for review. All patches
have been in linux-next via
git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git next
(well a tiny fix went in today, so the commit id for the last 3 patches
has changed)

I think it makes sense to give some of these patches some more days
for external review. Is it ok to send a pull request early next week
or do you want it before 3.19?

There is one non-KVM s390 patch, but it has an ACK from Heiko Carstens

Christian Borntraeger (1):
  KVM: s390: reenable LPP facility

David Hildenbrand (1):
  KVM: s390: floating irqs: fix user triggerable endless loop

Ekaterina Tumanova (1):
  s390/kernel: Update /proc/sysinfo file with Extended Name and UUID

Jason J. Herne (1):
  KVM: s390: Create ioctl for Getting/Setting guest storage keys

Michael Mueller (2):
  KVM: s390: use facilities and cpu_id per KVM
  KVM: s390: add cpu model support

Tony Krowiak (1):
  KVM: s390/CPACF: Choose crypto control block format

 Documentation/virtual/kvm/api.txt|  58 +
 Documentation/virtual/kvm/devices/vm.txt |  45 
 arch/s390/include/asm/kvm_host.h |  27 ++-
 arch/s390/include/asm/sysinfo.h  |  10 +-
 arch/s390/include/uapi/asm/kvm.h |  21 ++
 arch/s390/kernel/sysinfo.c   |  29 +++
 arch/s390/kvm/gaccess.c  |   4 +-
 arch/s390/kvm/interrupt.c|   2 +
 arch/s390/kvm/kvm-s390.c | 394 ---
 arch/s390/kvm/kvm-s390.h |  13 +-
 arch/s390/kvm/priv.c |  13 +-
 include/uapi/linux/kvm.h |  14 ++
 12 files changed, 580 insertions(+), 50 deletions(-)

-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/7] KVM: s390/CPACF: Choose crypto control block format

2015-02-04 Thread Christian Borntraeger

From: Tony Krowiak akrow...@linux.vnet.ibm.com

We need to specify a different format for the crypto control block
depending on whether the APXA facility is installed or not. Let's test
for it by executing the PQAP(QCI) function and use either a format-1 or
a format-2 crypto control block accordingly.

Signed-off-by: Tony Krowiak akrow...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h |  2 ++
 arch/s390/kvm/kvm-s390.c | 49 ++--
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d1ecc7f..09b6c1f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -163,6 +163,7 @@ struct kvm_s390_sie_block {
__u64   tecmc;  /* 0x00e8 */
__u8reservedf0[12]; /* 0x00f0 */
 #define CRYCB_FORMAT1 0x0001
+#define CRYCB_FORMAT2 0x0003
__u32   crycbd; /* 0x00fc */
__u64   gcr[16];/* 0x0100 */
__u64   gbea;   /* 0x0180 */
@@ -515,6 +516,7 @@ struct kvm_s390_crypto_cb {
__u8reserved00[72]; /* 0x */
__u8dea_wrapping_key_mask[24];  /* 0x0048 */
__u8aes_wrapping_key_mask[32];  /* 0x0060 */
+   __u8reserved80[128];/* 0x0080 */
 };
 
 struct kvm_arch{
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 57f5538..57ba533 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -653,6 +653,52 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
 }
 
+static int kvm_s390_query_ap_config(u8 *config)
+{
+   u32 fcn_code = 0x0400UL;
+   u32 cc;
+
+   asm volatile(
+   lgr 0,%1\n
+   lgr 2,%2\n
+   .long 0xb2af\n/* PQAP(QCI) */
+   ipm %0\n
+   srl %0,28\n
+   : =r (cc)
+   : r (fcn_code), r (config)
+   : cc, 0, 2, memory
+   );
+
+   return cc;
+}
+
+static int kvm_s390_apxa_installed(void)
+{
+   u8 config[128];
+   int cc;
+
+   if (test_facility(2)  test_facility(12)) {
+   cc = kvm_s390_query_ap_config(config);
+
+   if (cc)
+   pr_err(PQAP(QCI) failed with cc=%d, cc);
+   else
+   return config[0]  0x40;
+   }
+
+   return 0;
+}
+
+static void kvm_s390_set_crycb_format(struct kvm *kvm)
+{
+   kvm-arch.crypto.crycbd = (__u32)(unsigned long) kvm-arch.crypto.crycb;
+
+   if (kvm_s390_apxa_installed())
+   kvm-arch.crypto.crycbd |= CRYCB_FORMAT2;
+   else
+   kvm-arch.crypto.crycbd |= CRYCB_FORMAT1;
+}
+
 static int kvm_s390_crypto_init(struct kvm *kvm)
 {
if (!test_vfacility(76))
@@ -663,8 +709,7 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
if (!kvm-arch.crypto.crycb)
return -ENOMEM;
 
-   kvm-arch.crypto.crycbd = (__u32) (unsigned long) 
kvm-arch.crypto.crycb |
- CRYCB_FORMAT1;
+   kvm_s390_set_crycb_format(kvm);
 
/* Disable AES/DEA protected key functions by default */
kvm-arch.crypto.aes_kw = 0;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 7/7] KVM: s390: Create ioctl for Getting/Setting guest storage keys

2015-02-04 Thread Christian Borntraeger

From: Jason J. Herne jjhe...@linux.vnet.ibm.com

Provide the KVM_S390_GET_SKEYS and KVM_S390_SET_SKEYS ioctl which can be used
to get/set guest storage keys. This functionality is needed for live migration
of s390 guests that use storage keys.

Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 Documentation/virtual/kvm/api.txt |  58 ++
 arch/s390/kvm/kvm-s390.c  | 121 ++
 include/uapi/linux/kvm.h  |  14 +
 3 files changed, 193 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index b112efc..20c0dac 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2716,6 +2716,64 @@ The fields in each entry are defined as follows:
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
  this function/index combination
 
+4.89 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+ keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+   __u64 start_gfn;
+   __u64 count;
+   __u64 skeydata_addr;
+   __u32 flags;
+   __u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.90 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
 5. The kvm_run structure
 
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9da937a..97c9795 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,6 +25,7 @@
 #include linux/random.h
 #include linux/slab.h
 #include linux/timer.h
+#include linux/vmalloc.h
 #include asm/asm-offsets.h
 #include asm/lowcore.h
 #include asm/pgtable.h
@@ -708,6 +709,106 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct 
kvm_device_attr *attr)
return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+   uint8_t *keys;
+   uint64_t hva;
+   unsigned long curkey;
+   int i, r = 0;
+
+   if (args-flags != 0)
+   return -EINVAL;
+
+   /* Is this guest using storage keys? */
+   if (!mm_use_skey(current-mm))
+   return KVM_S390_GET_SKEYS_NONE;
+
+   /* Enforce sane limit on memory allocation */
+   if (args-count  1 || args-count  KVM_S390_SKEYS_MAX)
+   return -EINVAL;
+
+   keys = kmalloc_array(args-count, sizeof(uint8_t), GFP_KERNEL);
+   if (!keys)
+   keys = vmalloc(sizeof(uint8_t) * args-count);
+   if (!keys)
+   return -ENOMEM;
+
+   for (i = 0; i  args-count; i++) {
+   hva = gfn_to_hva(kvm, args-start_gfn + i);
+   if (kvm_is_error_hva(hva)) {
+   r = -EFAULT;
+   goto out;
+   }
+
+   curkey = get_guest_storage_key(current-mm, hva);
+   if (IS_ERR_VALUE(curkey)) {
+   r = curkey;
+   goto out;
+   }
+   keys[i] = curkey;
+   }
+
+   r = copy_to_user((uint8_t __user *)args-skeydata_addr, keys

[PATCH 2/7] KVM: s390: reenable LPP facility

2015-02-04 Thread Christian Borntraeger

commit 7be81a46695d (KVM: s390/facilities: allow TOD-CLOCK steering
facility bit) accidentially disabled the load program parameter
facility bit during rebase for upstream submission (my fault).

Re-add that bit.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Acked-by: Cornelia Huck cornelia.h...@de.ibm.com
Fixes: 7be81a46695d (KVM: s390/facilities: allow TOD-CLOCK steering)
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b2371c0..57f5538 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2073,7 +2073,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
-   vfacilities[0] = 0xff82fffbf47c2000UL;
+   vfacilities[0] = 0xff82fffbf4fc2000UL;
vfacilities[1] = 0x005cUL;
return 0;
 }
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/7] KVM: s390: add cpu model support

2015-02-04 Thread Christian Borntraeger

From: Michael Mueller m...@linux.vnet.ibm.com

This patch enables cpu model support in kvm/s390 via the vm attribute
interface.

During KVM initialization, the host properties cpuid, IBC value and the
facility list are stored in the architecture specific cpu model structure.

During vcpu setup, these properties are taken to initialize the related SIE
state. This mechanism allows to adjust the properties from user space and thus
to implement different selectable cpu models.

This patch uses the IBC functionality to block instructions that have not
been implemented at the requested CPU type and GA level compared to the
full host capability.

Userspace has to initialize the cpu model before vcpu creation. A cpu model
change of running vcpus is not possible.

Signed-off-by: Michael Mueller m...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 Documentation/virtual/kvm/devices/vm.txt |  45 +++
 arch/s390/include/asm/kvm_host.h |   4 +-
 arch/s390/include/uapi/asm/kvm.h |  21 +
 arch/s390/kvm/kvm-s390.c | 132 +++
 4 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/devices/vm.txt 
b/Documentation/virtual/kvm/devices/vm.txt
index c3b17c6..5542c46 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -38,3 +38,48 @@ Allows userspace to query the actual limit and set a new 
limit for
 the maximum guest memory size. The limit will be rounded up to
 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
 the number of page table levels.
+
+2. GROUP: KVM_S390_VM_CPU_MODEL
+Architectures: s390
+
+2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
+
+Allows user space to retrieve machine and kvm specific cpu related information:
+
+struct kvm_s390_vm_cpu_machine {
+   __u64 cpuid;   # CPUID of host
+   __u32 ibc; # IBC level range offered by host
+   __u8  pad[4];
+   __u64 fac_mask[256];   # set of cpu facilities enabled by KVM
+   __u64 fac_list[256];   # set of cpu facilities offered by host
+}
+
+Parameters: address of buffer to store the machine related cpu data
+of type struct kvm_s390_vm_cpu_machine*
+Returns:-EFAULT if the given address is not accessible from kernel space
+   -ENOMEM if not enough memory is available to process the ioctl
+   0 in case of success
+
+2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
+
+Allows user space to retrieve or request to change cpu related information for 
a vcpu:
+
+struct kvm_s390_vm_cpu_processor {
+   __u64 cpuid;   # CPUID currently (to be) used by this vcpu
+   __u16 ibc; # IBC level currently (to be) used by this vcpu
+   __u8  pad[6];
+   __u64 fac_list[256];   # set of cpu facilities currently (to be) used
+  # by this vcpu
+}
+
+KVM does not enforce or limit the cpu model data in any form. Take the 
information
+retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable 
configuration
+setups. Instruction interceptions triggered by additionally set facilitiy bits 
that
+are not handled by KVM need to by imlemented in the VM driver code.
+
+Parameters: address of buffer to store/set the processor related cpu
+   data of type struct kvm_s390_vm_cpu_processor*.
+Returns:-EBUSY in case 1 or more vcpus are already activated (only in 
write case)
+   -EFAULT if the given address is not accessible from kernel space
+   -ENOMEM if not enough memory is available to process the ioctl
+   0 in case of success
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 995a598..fcfff71 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -89,7 +89,8 @@ struct kvm_s390_sie_block {
atomic_t cpuflags;  /* 0x */
__u32 : 1;  /* 0x0004 */
__u32 prefix : 18;
-   __u32 : 13;
+   __u32 : 1;
+   __u32 ibc : 12;
__u8reserved08[4];  /* 0x0008 */
 #define PROG_IN_SIE (10)
__u32   prog0c; /* 0x000c */
@@ -523,6 +524,7 @@ struct s390_model_fac {
 struct kvm_s390_cpu_model {
struct s390_model_fac *fac;
struct cpuid cpu_id;
+   unsigned short ibc;
 };
 
 struct kvm_s390_crypto {
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 546fc3a..9c77e60 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -59,6 +59,7 @@ struct kvm_s390_io_adapter_req {
 #define KVM_S390_VM_MEM_CTRL   0
 #define KVM_S390_VM_TOD1
 #define KVM_S390_VM_CRYPTO 2
+#define KVM_S390_VM_CPU_MODEL  3
 
 /* kvm attributes

[PATCH 5/7] KVM: s390: use facilities and cpu_id per KVM

2015-02-04 Thread Christian Borntraeger

From: Michael Mueller m...@linux.vnet.ibm.com

The patch introduces facilities and cpu_ids per virtual machine.
Different virtual machines may want to expose different facilities and
cpu ids to the guest, so let's make them per-vm instead of global.

Signed-off-by: Michael Mueller m...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/include/asm/kvm_host.h | 21 +
 arch/s390/kvm/gaccess.c  |  4 +-
 arch/s390/kvm/kvm-s390.c | 92 +---
 arch/s390/kvm/kvm-s390.h | 13 --
 arch/s390/kvm/priv.c | 13 --
 5 files changed, 99 insertions(+), 44 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 09b6c1f..995a598 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -505,6 +505,26 @@ struct s390_io_adapter {
 #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
 #define MAX_S390_ADAPTER_MAPS 256
 
+/* maximum size of facilities and facility mask is 2k bytes */
+#define S390_ARCH_FAC_LIST_SIZE_BYTE (111)
+#define S390_ARCH_FAC_LIST_SIZE_U64 \
+   (S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
+#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
+#define S390_ARCH_FAC_MASK_SIZE_U64 \
+   (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
+
+struct s390_model_fac {
+   /* facilities used in SIE context */
+   __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
+   /* subset enabled by kvm */
+   __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
+};
+
+struct kvm_s390_cpu_model {
+   struct s390_model_fac *fac;
+   struct cpuid cpu_id;
+};
+
 struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
__u32 crycbd;
@@ -535,6 +555,7 @@ struct kvm_arch{
int ipte_lock_count;
struct mutex ipte_mutex;
spinlock_t start_stop_lock;
+   struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
 };
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 8a1be90..267523c 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -357,8 +357,8 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, 
unsigned long gva,
union asce asce;
 
ctlreg0.val = vcpu-arch.sie_block-gcr[0];
-   edat1 = ctlreg0.edat  test_vfacility(8);
-   edat2 = edat1  test_vfacility(78);
+   edat1 = ctlreg0.edat  test_kvm_facility(vcpu-kvm, 8);
+   edat2 = edat1  test_kvm_facility(vcpu-kvm, 78);
asce.val = get_vcpu_asce(vcpu);
if (asce.r)
goto real_address;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 57ba533..ab8b016 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,7 +30,6 @@
 #include asm/pgtable.h
 #include asm/nmi.h
 #include asm/switch_to.h
-#include asm/facility.h
 #include asm/sclp.h
 #include kvm-s390.h
 #include gaccess.h
@@ -99,15 +98,20 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
 };
 
-unsigned long *vfacilities;
-static struct gmap_notifier gmap_notifier;
+/* upper facilities limit for kvm */
+unsigned long kvm_s390_fac_list_mask[] = {
+   0xff82fffbf4fc2000UL,
+   0x005cUL,
+};
 
-/* test availability of vfacility */
-int test_vfacility(unsigned long nr)
+unsigned long kvm_s390_fac_list_mask_size(void)
 {
-   return __test_facility(nr, (void *) vfacilities);
+   BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask)  
S390_ARCH_FAC_MASK_SIZE_U64);
+   return ARRAY_SIZE(kvm_s390_fac_list_mask);
 }
 
+static struct gmap_notifier gmap_notifier;
+
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
 {
@@ -350,7 +354,7 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct 
kvm_device_attr *attr)
struct kvm_vcpu *vcpu;
int i;
 
-   if (!test_vfacility(76))
+   if (!test_kvm_facility(kvm, 76))
return -EINVAL;
 
mutex_lock(kvm-lock);
@@ -699,9 +703,15 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
kvm-arch.crypto.crycbd |= CRYCB_FORMAT1;
 }
 
+static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
+{
+   get_cpu_id(cpu_id);
+   cpu_id-version = 0xff;
+}
+
 static int kvm_s390_crypto_init(struct kvm *kvm)
 {
-   if (!test_vfacility(76))
+   if (!test_kvm_facility(kvm, 76))
return 0;
 
kvm-arch.crypto.crycb = kzalloc(sizeof(*kvm-arch.crypto.crycb),
@@ -720,7 +730,7 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-   int rc;
+   int i, rc;
char debug_name[16];
static unsigned long sca_offset;
 
@@ -755,6 +765,34 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm-arch.dbf)
goto out_nodbf

Re: [PATCH 3/7] s390/kernel: Update /proc/sysinfo file with Extended Name and UUID

2015-02-04 Thread Christian Borntraeger

Am 04.02.2015 um 14:03 schrieb Paolo Bonzini:
 
 
 On 04/02/2015 14:01, Christian Borntraeger wrote:
 Am 04.02.2015 um 13:57 schrieb Paolo Bonzini:


 On 04/02/2015 10:44, Christian Borntraeger wrote:
 From: Ekaterina Tumanova tuman...@linux.vnet.ibm.com

 Read the additional data fields (Extended Name and UUID) from the
 1KB block returned by the STSI command and reflect this information in
 the /proc/sysinfo file accordingly

 Signed-off-by: Ekaterina Tumanova tuman...@linux.vnet.ibm.com
 Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
 Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
 Acked-by: Heiko Carstens heiko.carst...@de.ibm.com
 Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com

 Why should this go in via KVM? :)

 Because we are being the first that will implement the architecture (
 the other side so to speak). 
 Heiko gave his ack so this shoudl be fine. If you prefer I can certainly push
 that via Martin/Heiko.
 
 No problem.  Perhaps edit the commit message?

Something like

A new architecture extends STSI 3.2.2 with UUID and long names. KVM will
provide the first implementation. This patch adds the additional data 
fields (Extended Name and UUID) from the 4KB block returned by the STSI 
3.2.2 command and reflect this information in the /proc/sysinfo file
accordingly. 
This is is non-KVM code, but developed by the KVM team. The patch is 
acked by Heiko Carstens to go over the KVM tree.

Signed-off-by: Ekaterina Tumanova tuman...@linux.vnet.ibm.com
Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com
Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com
Acked-by: Heiko Carstens heiko.carst...@de.ibm.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com

?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 0/1] KVM: ioctl for reading/writing guest memory

2015-02-03 Thread Christian Borntraeger

Am 03.02.2015 um 13:59 schrieb Paolo Bonzini:
 
 
 On 03/02/2015 13:11, Thomas Huth wrote:
 The userspace (QEMU) then can simply call this ioctl when it wants
 to read or write from/to virtual guest memory. Then kernel then takes
 the IPTE-lock, walks the MMU table of the guest to find out the
 physical address that corresponds to the virtual address, copies
 the requested amount of bytes from the userspace buffer to guest
 memory or the other way round, and finally frees the IPTE-lock again.

 Does that sound like a viable solution (IMHO it does ;-))? Or should
 I maybe try to pursue another approach?
 
 It looks feasible to me as well.

Yes, we discussed this internally a lot and things are really tricky. The
ipte lock could be exported to userspace, but we might also need to handle
storage keys (and key protection) in an atomic fashion, so this really
looks like the only safe way.
I guess we will give it some more testing, but to me it looks like a good
candidate for kvm/next after 3.20-rc1.


Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC 1/1] KVM: s390: Add MEMOP ioctl for reading/writing guest memory

2015-02-04 Thread Christian Borntraeger

Am 04.02.2015 um 11:39 schrieb Paolo Bonzini:
 Conny:
 I am asking myself, if we should explicitly add a comment in the 
 virtio-ccw spec, that all accesses are assumed to be with key 0 and 
 thus never cause key protection. The change/reference bit is set
 by the underlying I/O or memory copy anyway.
 
 Can you explain the last sentence? :)

Whenever vhost or qemu or a finished aio request wrote content into a
virtio buffer, the HW has set the storage key for that physical page,
which  makes it automatically dirty/referenced in the guest visible
storage key. 


For completeness sake: 
Now, if the guest does not use the storage key, but instead the new fault
based software dirty tracking, it wont notice the change bit. The guest 
I/O itself when finished will mark the struct page as Dirty, just like on
x86.

Makes sense?

Christian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

< 2 3 4 5 6 7 8 9 10 11 >

601 - 700 of 1075 matches

Mail list logo