Add changes to s390 memory management which are necessary to use the s390
hardware assisted virtualization facility. For this the upper halve of each
page table needs to be reserved so the hardware can save extended page status
bits for the guest and the host.
Easy solution to this is to just change PTRS_PER_PTE and PTRS_PER_PMD
accordingly, so the upper halves of the pages that contain page tables are
unused and can be used by the hardware.
Unfortunately with these #ifdef changes we need twice as much memory for
processes, even for those which don't need to save extended status bits.

Maybe a better solution would be to make PTRS_PER_PTE and PTRS_PER_PMD
a per-process value and only double the size of the page tables if the
process wants to make use of the virtualization instruction.

---
 include/asm-s390/page.h    |    8 +
 include/asm-s390/pgalloc.h |    5 +
 include/asm-s390/pgtable.h |  197 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 209 insertions(+), 1 deletion(-)

Index: linux-2.6/include/asm-s390/pgtable.h
===================================================================
--- linux-2.6.orig/include/asm-s390/pgtable.h
+++ linux-2.6/include/asm-s390/pgtable.h
@@ -65,7 +65,11 @@ extern char empty_zero_page[PAGE_SIZE];
 # define PMD_SHIFT     22
 # define PGDIR_SHIFT   22
 #else /* __s390x__ */
+#ifdef CONFIG_S390_HOST
+# define PMD_SHIFT     20
+#else
 # define PMD_SHIFT     21
+#endif
 # define PGDIR_SHIFT   31
 #endif /* __s390x__ */
 
@@ -85,8 +89,13 @@ extern char empty_zero_page[PAGE_SIZE];
 # define PTRS_PER_PMD    1
 # define PTRS_PER_PGD    512
 #else /* __s390x__ */
+#ifdef CONFIG_S390_HOST
+# define PTRS_PER_PTE    256
+# define PTRS_PER_PMD    2048
+#else
 # define PTRS_PER_PTE    512
 # define PTRS_PER_PMD    1024
+#endif
 # define PTRS_PER_PGD    2048
 #endif /* __s390x__ */
 
@@ -217,6 +226,18 @@ extern unsigned long vmalloc_end;
 #define _PAGE_SWT      0x001           /* SW pte type bit t */
 #define _PAGE_SWX      0x002           /* SW pte type bit x */
 
+#ifdef CONFIG_S390_HOST
+#define _PAGE_SOFT_REFERENCED  0x4
+#define _PAGE_SOFT_CHANGED     0x8
+
+/* Page status extended */
+#define _PAGE_RCP_PCL  0x0080000000000000UL
+#define _PAGE_RCP_HR   0x0040000000000000UL
+#define _PAGE_RCP_HC   0x0020000000000000UL
+#define _PAGE_RCP_GR   0x0004000000000000UL
+#define _PAGE_RCP_GC   0x0002000000000000UL
+#endif
+
 /* Six different types of pages. */
 #define _PAGE_TYPE_EMPTY       0x400
 #define _PAGE_TYPE_NONE                0x401
@@ -514,6 +535,9 @@ static inline int pte_write(pte_t pte)
 
 static inline int pte_dirty(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       return (pte_val(pte) & _PAGE_SOFT_CHANGED) != 0;
+#endif
        /* A pte is neither clean nor dirty on s/390. The dirty bit
         * is in the storage key. See page_test_and_clear_dirty for
         * details.
@@ -523,6 +547,9 @@ static inline int pte_dirty(pte_t pte)
 
 static inline int pte_young(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       return (pte_val(pte) & _PAGE_SOFT_REFERENCED) != 0;
+#endif
        /* A pte is neither young nor old on s/390. The young bit
         * is in the storage key. See page_test_and_clear_young for
         * details.
@@ -582,7 +609,9 @@ static inline void pgd_clear(pgd_t * pgd
 static inline void pmd_clear_kernel(pmd_t * pmdp)
 {
        pmd_val(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
+#ifndef CONFIG_S390_HOST
        pmd_val1(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
+#endif
 }
 
 static inline void pmd_clear(pmd_t * pmdp)
@@ -632,6 +661,9 @@ static inline pte_t pte_mkwrite(pte_t pt
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       pte_val(pte) &= ~_PAGE_SOFT_CHANGED;
+#endif
        /* The only user of pte_mkclean is the fork() code.
           We must *not* clear the *physical* page dirty bit
           just because fork() wants to clear the dirty bit in
@@ -641,6 +673,9 @@ static inline pte_t pte_mkclean(pte_t pt
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       pte_val(pte) |= _PAGE_SOFT_CHANGED;
+#endif
        /* We do not explicitly set the dirty bit because the
         * sske instruction is slow. It is faster to let the
         * next instruction set the dirty bit.
@@ -650,6 +685,9 @@ static inline pte_t pte_mkdirty(pte_t pt
 
 static inline pte_t pte_mkold(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       pte_val(pte) &= ~_PAGE_SOFT_REFERENCED;
+#endif
        /* S/390 doesn't keep its dirty/referenced bit in the pte.
         * There is no point in clearing the real referenced bit.
         */
@@ -658,14 +696,111 @@ static inline pte_t pte_mkold(pte_t pte)
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
+#ifdef CONFIG_S390_HOST
+       pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+#endif
        /* S/390 doesn't keep its dirty/referenced bit in the pte.
         * There is no point in setting the real referenced bit.
         */
        return pte;
 }
+#ifdef CONFIG_S390_HOST
+static inline void rcp_lock(pte_t *ptep)
+{
+       unsigned long val1, val2, old, new;
+       unsigned long *rcp;
+
+       preempt_disable();
+       val1 = _PAGE_RCP_PCL;
+       val2 = ~_PAGE_RCP_PCL;
+       rcp = (unsigned long *) (ptep + PTRS_PER_PTE);
+
+       asm volatile("   lg   %0,0(%5)\n"          \
+                    "0: ngr  %0,%3\n"             \
+                    "   lgr  %1,%0\n"             \
+                    "   ogr  %1,%2\n"             \
+                    "   csg  %0,%1,0(%5)\n"       \
+                    "   jl   0b\n"                \
+                    : "=&d" (old), "=&d" (new),   \
+                      "+&d" (val1), "+&d" (val2), \
+                      "=m" (*rcp)                 \
+                    : "a" (rcp), "m" (*rcp),      \
+                      "m" (val1), "m" (val2)      \
+                    : "cc", "memory");
+}
+
+static inline void rcp_unlock(pte_t *ptep)
+{
+       unsigned long val, old, new;
+       unsigned long *rcp;
+
+       val = ~_PAGE_RCP_PCL;
+       rcp = (unsigned long *) (ptep + PTRS_PER_PTE);
+
+       asm volatile("   lg   %0,0(%4)\n"          \
+                    "   lgr  %1,%0\n"             \
+                    "   ngr  %1,%2\n"             \
+                    "   csg  %0,%1,0(%4)\n"       \
+                    : "=&d" (old), "=&d" (new),   \
+                      "+&d" (val), "=m" (*rcp)    \
+                    : "a" (rcp), "m" (*rcp),      \
+                      "m" (val)                   \
+                    : "cc", "memory");
+       preempt_enable();
+}
+
+static inline void rcp_set_bits(pte_t *ptep, unsigned long val)
+{
+       *(unsigned long *) (ptep + PTRS_PER_PTE) |= val;
+}
+
+static inline int rcp_test_bits(pte_t *ptep, unsigned long val)
+{
+       return ((*(unsigned long *) (ptep + PTRS_PER_PTE)) & val) == val;
+}
+
+static inline int rcp_test_and_clear_bits(pte_t *ptep, unsigned long val)
+{
+       unsigned long ret;
+
+       ret = *(unsigned long *) (ptep + PTRS_PER_PTE);
+       *(unsigned long *) (ptep + PTRS_PER_PTE) &= ~val;
+       return (ret & val) == val;
+}
+
+static inline void ____global_flush_tlb(void)
+{
+       register unsigned long addr asm("4");
+       long dummy;
+
+       dummy = 0;
+       addr = ((unsigned long) &dummy) + 1;
+       __asm__ __volatile__ ("    slr  2,2\n"
+                             "    slr  3,3\n"
+                             "    csp  2,%0"
+                             : : "a" (addr), "m" (dummy) : "cc", "2", "3" );
+}
+
+#endif
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
+#ifdef CONFIG_S390_HOST
+       unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+       int ccode, rc;
+
+       rcp_lock(ptep);
+       asm volatile ("rrbe 0,%1\n\t"
+                     "ipm  %0\n\t"
+                     "srl  %0,28\n\t"
+                     : "=d" (ccode) : "a" (physpage) : "cc" );
+       rc = (ccode & 2) != 0;
+       if (rc)
+               rcp_set_bits(ptep, _PAGE_RCP_GR);
+       rc |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR);
+       rcp_unlock(ptep);
+       return rc;
+#endif
        return 0;
 }
 
@@ -673,12 +808,36 @@ static inline int
 ptep_clear_flush_young(struct vm_area_struct *vma,
                        unsigned long address, pte_t *ptep)
 {
+#ifdef CONFIG_S390_HOST
+       int rc;
+
+       rc = ptep_test_and_clear_young(vma, address, ptep);
+       if (rc)
+               ____global_flush_tlb();
+       return rc;
+#endif
        /* No need to flush TLB; bits are in storage key */
        return ptep_test_and_clear_young(vma, address, ptep);
 }
 
 static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
+#ifdef CONFIG_S390_HOST
+       unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+       int skey;
+       int rc;
+
+       rcp_lock(ptep);
+       skey = page_get_storage_key(physpage);
+       rc = (skey & _PAGE_CHANGED) != 0;
+       if (rc) {
+               rcp_set_bits(ptep, _PAGE_RCP_GC);
+               page_set_storage_key(physpage, skey & ~_PAGE_CHANGED);
+       }
+       rc |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HC);
+       rcp_unlock(ptep);
+       return rc;
+#endif
        return 0;
 }
 
@@ -686,6 +845,14 @@ static inline int
 ptep_clear_flush_dirty(struct vm_area_struct *vma,
                        unsigned long address, pte_t *ptep)
 {
+#ifdef CONFIG_S390_HOST
+       int rc;
+
+       rc = ptep_test_and_clear_dirty(vma, address, ptep);
+       if (rc)
+               ____global_flush_tlb();
+       return rc;
+#endif
        /* No need to flush TLB; bits are in storage key */
        return ptep_test_and_clear_dirty(vma, address, ptep);
 }
@@ -693,6 +860,21 @@ ptep_clear_flush_dirty(struct vm_area_st
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
+
+#ifdef CONFIG_S390_HOST
+       unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+       int skey;
+
+       if (!(pte_val(pte) & _PAGE_INVALID)) {
+               rcp_lock(ptep);
+               skey = page_get_storage_key(physpage);
+               if ((skey & _PAGE_CHANGED) || rcp_test_bits(ptep, _PAGE_RCP_HC))
+                       pte_val(pte) |= _PAGE_SOFT_CHANGED;
+               if ((skey & _PAGE_REFERENCED) || rcp_test_bits(ptep, 
_PAGE_RCP_HR))
+                       pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+               rcp_unlock(ptep);
+       }
+#endif
        pte_clear(mm, addr, ptep);
        return pte;
 }
@@ -721,7 +903,20 @@ ptep_clear_flush(struct vm_area_struct *
 {
        pte_t pte = *ptep;
        pte_t *shadow_pte = get_shadow_pte(ptep);
-
+#ifdef CONFIG_S390_HOST
+       unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+       int skey;
+
+       if (!(pte_val(pte) & _PAGE_INVALID)) {
+               rcp_lock(ptep);
+               skey = page_get_storage_key(physpage);
+               if ((skey & _PAGE_CHANGED) || rcp_test_bits(ptep, _PAGE_RCP_HC))
+                       pte_val(pte) |= _PAGE_SOFT_CHANGED;
+               if ((skey & _PAGE_REFERENCED) || rcp_test_bits(ptep, 
_PAGE_RCP_HR))
+                       pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+               rcp_unlock(ptep);
+       }
+#endif
        __ptep_ipte(address, ptep);
        if (shadow_pte)
                __ptep_ipte(address, shadow_pte);
Index: linux-2.6/include/asm-s390/page.h
===================================================================
--- linux-2.6.orig/include/asm-s390/page.h
+++ linux-2.6/include/asm-s390/page.h
@@ -93,13 +93,21 @@ typedef struct {
 #else /* __s390x__ */
 
 typedef struct { 
+#ifdef CONFIG_S390_HOST
+        unsigned long pmd;
+#else
         unsigned long pmd0;
         unsigned long pmd1; 
+#endif
         } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 
+#ifdef CONFIG_S390_HOST
+#define pmd_val(x)      ((x).pmd)
+#else
 #define pmd_val(x)      ((x).pmd0)
 #define pmd_val1(x)     ((x).pmd1)
+#endif
 #define pgd_val(x)      ((x).pgd)
 
 #endif /* __s390x__ */
Index: linux-2.6/include/asm-s390/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-s390/pgalloc.h
+++ linux-2.6/include/asm-s390/pgalloc.h
@@ -154,7 +154,9 @@ pmd_populate_kernel(struct mm_struct *mm
        pmd_val(pmd[3]) = _PAGE_TABLE + __pa(pte+768);
 #else /* __s390x__ */
        pmd_val(*pmd) = _PMD_ENTRY + __pa(pte);
+#ifndef CONFIG_S390_HOST
        pmd_val1(*pmd) = _PMD_ENTRY + __pa(pte+256);
+#endif
 #endif /* __s390x__ */
 }
 
@@ -196,6 +198,9 @@ pte_alloc_one_kernel(struct mm_struct *m
                pte_clear(mm, vmaddr, pte + i);
                vmaddr += PAGE_SIZE;
        }
+#ifdef CONFIG_S390_HOST
+       memset(pte + PTRS_PER_PTE, 0, sizeof(pte_t) * PTRS_PER_PTE);
+#endif
        return pte;
 }
 



-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to