Add changes to s390 memory management which are necessary to use the s390
hardware assisted virtualization facility. For this the upper halve of each
page table needs to be reserved so the hardware can save extended page status
bits for the guest and the host.
Easy solution to this is to just change PTRS_PER_PTE and PTRS_PER_PMD
accordingly, so the upper halves of the pages that contain page tables are
unused and can be used by the hardware.
Unfortunately with these #ifdef changes we need twice as much memory for
processes, even for those which don't need to save extended status bits.
Maybe a better solution would be to make PTRS_PER_PTE and PTRS_PER_PMD
a per-process value and only double the size of the page tables if the
process wants to make use of the virtualization instruction.
---
include/asm-s390/page.h | 8 +
include/asm-s390/pgalloc.h | 5 +
include/asm-s390/pgtable.h | 197 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 209 insertions(+), 1 deletion(-)
Index: linux-2.6/include/asm-s390/pgtable.h
===================================================================
--- linux-2.6.orig/include/asm-s390/pgtable.h
+++ linux-2.6/include/asm-s390/pgtable.h
@@ -65,7 +65,11 @@ extern char empty_zero_page[PAGE_SIZE];
# define PMD_SHIFT 22
# define PGDIR_SHIFT 22
#else /* __s390x__ */
+#ifdef CONFIG_S390_HOST
+# define PMD_SHIFT 20
+#else
# define PMD_SHIFT 21
+#endif
# define PGDIR_SHIFT 31
#endif /* __s390x__ */
@@ -85,8 +89,13 @@ extern char empty_zero_page[PAGE_SIZE];
# define PTRS_PER_PMD 1
# define PTRS_PER_PGD 512
#else /* __s390x__ */
+#ifdef CONFIG_S390_HOST
+# define PTRS_PER_PTE 256
+# define PTRS_PER_PMD 2048
+#else
# define PTRS_PER_PTE 512
# define PTRS_PER_PMD 1024
+#endif
# define PTRS_PER_PGD 2048
#endif /* __s390x__ */
@@ -217,6 +226,18 @@ extern unsigned long vmalloc_end;
#define _PAGE_SWT 0x001 /* SW pte type bit t */
#define _PAGE_SWX 0x002 /* SW pte type bit x */
+#ifdef CONFIG_S390_HOST
+#define _PAGE_SOFT_REFERENCED 0x4
+#define _PAGE_SOFT_CHANGED 0x8
+
+/* Page status extended */
+#define _PAGE_RCP_PCL 0x0080000000000000UL
+#define _PAGE_RCP_HR 0x0040000000000000UL
+#define _PAGE_RCP_HC 0x0020000000000000UL
+#define _PAGE_RCP_GR 0x0004000000000000UL
+#define _PAGE_RCP_GC 0x0002000000000000UL
+#endif
+
/* Six different types of pages. */
#define _PAGE_TYPE_EMPTY 0x400
#define _PAGE_TYPE_NONE 0x401
@@ -514,6 +535,9 @@ static inline int pte_write(pte_t pte)
static inline int pte_dirty(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ return (pte_val(pte) & _PAGE_SOFT_CHANGED) != 0;
+#endif
/* A pte is neither clean nor dirty on s/390. The dirty bit
* is in the storage key. See page_test_and_clear_dirty for
* details.
@@ -523,6 +547,9 @@ static inline int pte_dirty(pte_t pte)
static inline int pte_young(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ return (pte_val(pte) & _PAGE_SOFT_REFERENCED) != 0;
+#endif
/* A pte is neither young nor old on s/390. The young bit
* is in the storage key. See page_test_and_clear_young for
* details.
@@ -582,7 +609,9 @@ static inline void pgd_clear(pgd_t * pgd
static inline void pmd_clear_kernel(pmd_t * pmdp)
{
pmd_val(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
+#ifndef CONFIG_S390_HOST
pmd_val1(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
+#endif
}
static inline void pmd_clear(pmd_t * pmdp)
@@ -632,6 +661,9 @@ static inline pte_t pte_mkwrite(pte_t pt
static inline pte_t pte_mkclean(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ pte_val(pte) &= ~_PAGE_SOFT_CHANGED;
+#endif
/* The only user of pte_mkclean is the fork() code.
We must *not* clear the *physical* page dirty bit
just because fork() wants to clear the dirty bit in
@@ -641,6 +673,9 @@ static inline pte_t pte_mkclean(pte_t pt
static inline pte_t pte_mkdirty(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ pte_val(pte) |= _PAGE_SOFT_CHANGED;
+#endif
/* We do not explicitly set the dirty bit because the
* sske instruction is slow. It is faster to let the
* next instruction set the dirty bit.
@@ -650,6 +685,9 @@ static inline pte_t pte_mkdirty(pte_t pt
static inline pte_t pte_mkold(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ pte_val(pte) &= ~_PAGE_SOFT_REFERENCED;
+#endif
/* S/390 doesn't keep its dirty/referenced bit in the pte.
* There is no point in clearing the real referenced bit.
*/
@@ -658,14 +696,111 @@ static inline pte_t pte_mkold(pte_t pte)
static inline pte_t pte_mkyoung(pte_t pte)
{
+#ifdef CONFIG_S390_HOST
+ pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+#endif
/* S/390 doesn't keep its dirty/referenced bit in the pte.
* There is no point in setting the real referenced bit.
*/
return pte;
}
+#ifdef CONFIG_S390_HOST
+static inline void rcp_lock(pte_t *ptep)
+{
+ unsigned long val1, val2, old, new;
+ unsigned long *rcp;
+
+ preempt_disable();
+ val1 = _PAGE_RCP_PCL;
+ val2 = ~_PAGE_RCP_PCL;
+ rcp = (unsigned long *) (ptep + PTRS_PER_PTE);
+
+ asm volatile(" lg %0,0(%5)\n" \
+ "0: ngr %0,%3\n" \
+ " lgr %1,%0\n" \
+ " ogr %1,%2\n" \
+ " csg %0,%1,0(%5)\n" \
+ " jl 0b\n" \
+ : "=&d" (old), "=&d" (new), \
+ "+&d" (val1), "+&d" (val2), \
+ "=m" (*rcp) \
+ : "a" (rcp), "m" (*rcp), \
+ "m" (val1), "m" (val2) \
+ : "cc", "memory");
+}
+
+static inline void rcp_unlock(pte_t *ptep)
+{
+ unsigned long val, old, new;
+ unsigned long *rcp;
+
+ val = ~_PAGE_RCP_PCL;
+ rcp = (unsigned long *) (ptep + PTRS_PER_PTE);
+
+ asm volatile(" lg %0,0(%4)\n" \
+ " lgr %1,%0\n" \
+ " ngr %1,%2\n" \
+ " csg %0,%1,0(%4)\n" \
+ : "=&d" (old), "=&d" (new), \
+ "+&d" (val), "=m" (*rcp) \
+ : "a" (rcp), "m" (*rcp), \
+ "m" (val) \
+ : "cc", "memory");
+ preempt_enable();
+}
+
+static inline void rcp_set_bits(pte_t *ptep, unsigned long val)
+{
+ *(unsigned long *) (ptep + PTRS_PER_PTE) |= val;
+}
+
+static inline int rcp_test_bits(pte_t *ptep, unsigned long val)
+{
+ return ((*(unsigned long *) (ptep + PTRS_PER_PTE)) & val) == val;
+}
+
+static inline int rcp_test_and_clear_bits(pte_t *ptep, unsigned long val)
+{
+ unsigned long ret;
+
+ ret = *(unsigned long *) (ptep + PTRS_PER_PTE);
+ *(unsigned long *) (ptep + PTRS_PER_PTE) &= ~val;
+ return (ret & val) == val;
+}
+
+static inline void ____global_flush_tlb(void)
+{
+ register unsigned long addr asm("4");
+ long dummy;
+
+ dummy = 0;
+ addr = ((unsigned long) &dummy) + 1;
+ __asm__ __volatile__ (" slr 2,2\n"
+ " slr 3,3\n"
+ " csp 2,%0"
+ : : "a" (addr), "m" (dummy) : "cc", "2", "3" );
+}
+
+#endif
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_S390_HOST
+ unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+ int ccode, rc;
+
+ rcp_lock(ptep);
+ asm volatile ("rrbe 0,%1\n\t"
+ "ipm %0\n\t"
+ "srl %0,28\n\t"
+ : "=d" (ccode) : "a" (physpage) : "cc" );
+ rc = (ccode & 2) != 0;
+ if (rc)
+ rcp_set_bits(ptep, _PAGE_RCP_GR);
+ rc |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR);
+ rcp_unlock(ptep);
+ return rc;
+#endif
return 0;
}
@@ -673,12 +808,36 @@ static inline int
ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
+#ifdef CONFIG_S390_HOST
+ int rc;
+
+ rc = ptep_test_and_clear_young(vma, address, ptep);
+ if (rc)
+ ____global_flush_tlb();
+ return rc;
+#endif
/* No need to flush TLB; bits are in storage key */
return ptep_test_and_clear_young(vma, address, ptep);
}
static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_S390_HOST
+ unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+ int skey;
+ int rc;
+
+ rcp_lock(ptep);
+ skey = page_get_storage_key(physpage);
+ rc = (skey & _PAGE_CHANGED) != 0;
+ if (rc) {
+ rcp_set_bits(ptep, _PAGE_RCP_GC);
+ page_set_storage_key(physpage, skey & ~_PAGE_CHANGED);
+ }
+ rc |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HC);
+ rcp_unlock(ptep);
+ return rc;
+#endif
return 0;
}
@@ -686,6 +845,14 @@ static inline int
ptep_clear_flush_dirty(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
+#ifdef CONFIG_S390_HOST
+ int rc;
+
+ rc = ptep_test_and_clear_dirty(vma, address, ptep);
+ if (rc)
+ ____global_flush_tlb();
+ return rc;
+#endif
/* No need to flush TLB; bits are in storage key */
return ptep_test_and_clear_dirty(vma, address, ptep);
}
@@ -693,6 +860,21 @@ ptep_clear_flush_dirty(struct vm_area_st
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long
addr, pte_t *ptep)
{
pte_t pte = *ptep;
+
+#ifdef CONFIG_S390_HOST
+ unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+ int skey;
+
+ if (!(pte_val(pte) & _PAGE_INVALID)) {
+ rcp_lock(ptep);
+ skey = page_get_storage_key(physpage);
+ if ((skey & _PAGE_CHANGED) || rcp_test_bits(ptep, _PAGE_RCP_HC))
+ pte_val(pte) |= _PAGE_SOFT_CHANGED;
+ if ((skey & _PAGE_REFERENCED) || rcp_test_bits(ptep,
_PAGE_RCP_HR))
+ pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+ rcp_unlock(ptep);
+ }
+#endif
pte_clear(mm, addr, ptep);
return pte;
}
@@ -721,7 +903,20 @@ ptep_clear_flush(struct vm_area_struct *
{
pte_t pte = *ptep;
pte_t *shadow_pte = get_shadow_pte(ptep);
-
+#ifdef CONFIG_S390_HOST
+ unsigned long physpage = pte_val(*ptep) & PAGE_MASK;
+ int skey;
+
+ if (!(pte_val(pte) & _PAGE_INVALID)) {
+ rcp_lock(ptep);
+ skey = page_get_storage_key(physpage);
+ if ((skey & _PAGE_CHANGED) || rcp_test_bits(ptep, _PAGE_RCP_HC))
+ pte_val(pte) |= _PAGE_SOFT_CHANGED;
+ if ((skey & _PAGE_REFERENCED) || rcp_test_bits(ptep,
_PAGE_RCP_HR))
+ pte_val(pte) |= _PAGE_SOFT_REFERENCED;
+ rcp_unlock(ptep);
+ }
+#endif
__ptep_ipte(address, ptep);
if (shadow_pte)
__ptep_ipte(address, shadow_pte);
Index: linux-2.6/include/asm-s390/page.h
===================================================================
--- linux-2.6.orig/include/asm-s390/page.h
+++ linux-2.6/include/asm-s390/page.h
@@ -93,13 +93,21 @@ typedef struct {
#else /* __s390x__ */
typedef struct {
+#ifdef CONFIG_S390_HOST
+ unsigned long pmd;
+#else
unsigned long pmd0;
unsigned long pmd1;
+#endif
} pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
+#ifdef CONFIG_S390_HOST
+#define pmd_val(x) ((x).pmd)
+#else
#define pmd_val(x) ((x).pmd0)
#define pmd_val1(x) ((x).pmd1)
+#endif
#define pgd_val(x) ((x).pgd)
#endif /* __s390x__ */
Index: linux-2.6/include/asm-s390/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-s390/pgalloc.h
+++ linux-2.6/include/asm-s390/pgalloc.h
@@ -154,7 +154,9 @@ pmd_populate_kernel(struct mm_struct *mm
pmd_val(pmd[3]) = _PAGE_TABLE + __pa(pte+768);
#else /* __s390x__ */
pmd_val(*pmd) = _PMD_ENTRY + __pa(pte);
+#ifndef CONFIG_S390_HOST
pmd_val1(*pmd) = _PMD_ENTRY + __pa(pte+256);
+#endif
#endif /* __s390x__ */
}
@@ -196,6 +198,9 @@ pte_alloc_one_kernel(struct mm_struct *m
pte_clear(mm, vmaddr, pte + i);
vmaddr += PAGE_SIZE;
}
+#ifdef CONFIG_S390_HOST
+ memset(pte + PTRS_PER_PTE, 0, sizeof(pte_t) * PTRS_PER_PTE);
+#endif
return pte;
}
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel