This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte <co...@de.ibm.com>
---
---
 Documentation/virtual/kvm/api.txt |   38 +++++++++++++
 arch/s390/include/asm/kvm_host.h  |    4 +
 arch/s390/include/asm/pgtable.h   |    1 
 arch/s390/kvm/kvm-s390.c          |  108 ++++++++++++++++++++++++++++++++++++--
 arch/s390/mm/pgtable.c            |   70 ++++++++++++++++++------
 include/linux/kvm.h               |    7 ++
 6 files changed, 207 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+       struct kvm_s390_keyop {
+               __u64 user_addr;
+               __u8  key;
+               __u8  operation;
+       };
+
+user_addr      contains the userspace address of a memory page
+key            contains the guest visible storage key as defined by the
+               z Architecture Principles of Operation book, including key
+               value for key controlled storage protection, the fetch
+               protection bit, and the reference and change indicator bits
+operation      indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+       This operation behaves just like the set storage key extended (SSKE)
+       instruction would, if it were issued by the guest. The storage key
+       provided in "key" is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+       This operation behaves just like the insert storage key extended (ISKE)
+       instruction would, if it were issued by the guest. After this call,
+       the guest visible storage key is presented in the "key" field.
+KVM_S390_KEYOP_RRBE:
+       This operation behaves just like the reset referenced bit extended
+       (RRBE) instruction would, if it were issued by the guest. The guest
+       visible reference bit is cleared, and the value presented in the "key"
+       field after this call has the reference bit set to 1 in case the
+       guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
        atomic_t scn;
        __u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,115 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+       unsigned long addr = kop->user_addr;
+       pte_t *ptep;
+       pgste_t pgste;
+       int r;
+       unsigned long skey;
+       unsigned long bits;
+
+       /* make sure this process is a hypervisor */
+       r = -EINVAL;
+       if (!mm_has_pgste(current->mm))
+               goto out;
+
+       r = -EFAULT;
+       if (addr >= PGDIR_SIZE)
+               goto out;
+
+       spin_lock(&current->mm->page_table_lock);
+       ptep = ptep_for_addr(addr);
+       if (IS_ERR(ptep)) {
+               r = PTR_ERR(ptep);
+               goto out_unlock;
+       }
+
+       pgste = pgste_get_lock(ptep);
+
+       switch (kop->operation) {
+       case KVM_S390_KEYOP_SSKE:
+               pgste = pgste_update_all(ptep, pgste);
+               /* set the real key back w/o rc bits */
+               skey = kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+               if (pte_present(*ptep)) {
+                       page_set_storage_key(pte_val(*ptep), skey, 1);
+                       /* avoid race clobbering changed bit */
+                       pte_val(*ptep) |= _PAGE_SWC;
+               }
+               /* put acc+f plus guest referenced and changed into the pgste */
+               pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+                                    | RCP_GC_BIT);
+               bits = (kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT));
+               pgste_val(pgste) |= bits << 56;
+               bits = (kop->key & (_PAGE_CHANGED | _PAGE_REFERENCED));
+               pgste_val(pgste) |= bits << 48;
+               r = 0;
+               break;
+       case KVM_S390_KEYOP_ISKE:
+               if (pte_present(*ptep)) {
+                       skey = page_get_storage_key(pte_val(*ptep));
+                       kop->key = skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+               } else {
+                       skey = 0;
+                       kop->key = (pgste_val(pgste) >> 56) &
+                                  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+               }
+               kop->key |= skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+               kop->key |= (pgste_val(pgste) >> 48) &
+                           (_PAGE_CHANGED | _PAGE_REFERENCED);
+               r = 0;
+               break;
+       case KVM_S390_KEYOP_RRBE:
+               pgste = pgste_update_all(ptep, pgste);
+               kop->key = 0;
+               if (pgste_val(pgste) & RCP_GR_BIT)
+                       kop->key |= _PAGE_REFERENCED;
+               pgste_val(pgste) &= ~RCP_GR_BIT;
+               r = 0;
+               break;
+       default:
+               r = -EINVAL;
+       }
+       pgste_set_unlock(ptep, pgste);
+
+out_unlock:
+       spin_unlock(&current->mm->page_table_lock);
+out:
+       return r;
+}
+
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
-       if (ioctl == KVM_S390_ENABLE_SIE)
-               return s390_enable_sie();
-       return -EINVAL;
+       void __user *argp = (void __user *)arg;
+       int r;
+
+       switch (ioctl) {
+       case KVM_S390_ENABLE_SIE:
+               r = s390_enable_sie();
+               break;
+       case KVM_S390_KEYOP: {
+               struct kvm_s390_keyop kop;
+               r = -EFAULT;
+               if (copy_from_user(&kop, argp, sizeof(struct kvm_s390_keyop)))
+                       break;
+               r = kvm_s390_keyop(&kop);
+               if (r)
+                       break;
+               r = -EFAULT;
+               if (copy_to_user(argp, &kop, sizeof(struct kvm_s390_keyop)))
+                       break;
+               r = 0;
+               break;
+       }
+       default:
+               r = -ENOTTY;
+       }
+
+       return r;
 }
 
 int kvm_dev_ioctl_check_extension(long ext)
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -393,6 +393,33 @@ out_unmap:
 }
 EXPORT_SYMBOL_GPL(gmap_map_segment);
 
+static pmd_t *__pmdp_for_addr(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *vma;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       vma = find_vma(mm, addr);
+       if (!vma || (vma->vm_start > addr))
+               return ERR_PTR(-EFAULT);
+
+       pgd = pgd_offset(mm, addr);
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return ERR_PTR(-ENOMEM);
+
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return ERR_PTR(-ENOMEM);
+
+       if (!pmd_present(*pmd) &&
+           __pte_alloc(mm, vma, pmd, addr))
+               return ERR_PTR(-ENOMEM);
+
+       return pmd;
+}
+
 /*
  * this function is assumed to be called with mmap_sem held
  */
@@ -402,10 +429,7 @@ unsigned long __gmap_fault(unsigned long
        struct mm_struct *mm;
        struct gmap_pgtable *mp;
        struct gmap_rmap *rmap;
-       struct vm_area_struct *vma;
        struct page *page;
-       pgd_t *pgd;
-       pud_t *pud;
        pmd_t *pmd;
 
        current->thread.gmap_addr = address;
@@ -433,21 +457,11 @@ unsigned long __gmap_fault(unsigned long
                return mp->vmaddr | (address & ~PMD_MASK);
        } else if (segment & _SEGMENT_ENTRY_RO) {
                vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
-               vma = find_vma(mm, vmaddr);
-               if (!vma || vma->vm_start > vmaddr)
-                       return -EFAULT;
-
-               /* Walk the parent mm page table */
-               pgd = pgd_offset(mm, vmaddr);
-               pud = pud_alloc(mm, pgd, vmaddr);
-               if (!pud)
-                       return -ENOMEM;
-               pmd = pmd_alloc(mm, pud, vmaddr);
-               if (!pmd)
-                       return -ENOMEM;
-               if (!pmd_present(*pmd) &&
-                   __pte_alloc(mm, vma, pmd, vmaddr))
-                       return -ENOMEM;
+
+               pmd = __pmdp_for_addr(mm, vmaddr);
+               if (IS_ERR(pmd))
+                       return PTR_ERR(pmd);
+
                /* pmd now points to a valid segment table entry. */
                rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
                if (!rmap)
@@ -806,6 +820,26 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
+pte_t *ptep_for_addr(unsigned long addr)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+
+       down_read(&current->mm->mmap_sem);
+
+       pmd = __pmdp_for_addr(current->mm, addr);
+       if (IS_ERR(pmd)) {
+               pte = (pte_t *)pmd;
+               goto up_out;
+       }
+
+       pte = pte_offset(pmd, addr);
+up_out:
+       up_read(&current->mm->mmap_sem);
+       return pte;
+}
+EXPORT_SYMBOL_GPL(ptep_for_addr);
+
 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
 bool kernel_page_present(struct page *page)
 {
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,13 @@ struct kvm_ppc_pvinfo {
 #define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 0x02, struct kvm_msr_list)
 
 #define KVM_S390_ENABLE_SIE       _IO(KVMIO,   0x06)
+
+struct kvm_s390_keyop {
+       __u64 user_addr;
+       __u8  key;
+       __u8  operation;
+};
+#define KVM_S390_KEYOP            _IOWR(KVMIO,   0x09, struct kvm_s390_keyop)
 /*
  * Check if a kvm extension is available.  Argument is extension number,
  * return is 1 (yes) or 0 (no, sorry).

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to