Re: [RFC/PATCH 01/15 v2] preparation: provide hook to enable pgstes in user pagetable

2008-03-24 Thread Andrew Morton
On Sat, 22 Mar 2008 18:02:37 +0100
Carsten Otte [EMAIL PROTECTED] wrote:

 From: Martin Schwidefsky [EMAIL PROTECTED]
 
 The SIE instruction on s390 uses the 2nd half of the page table page to
 virtualize the storage keys of a guest. This patch offers the s390_enable_sie
 function, which reorganizes the page tables of a single-threaded process to
 reserve space in the page table:
 s390_enable_sie makes sure that the process is single threaded and then uses
 dup_mm to create a new mm with reorganized page tables. The old mm is freed 
 and the process has now a page status extended field after every page table.
 
 Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.
 
 This patch has a small common code hit, namely making dup_mm non-static.
 
 Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's
 review feedback. Now we do have the prototype for dup_mm in
 include/linux/sched.h.
 
 ...

 --- linux-host.orig/kernel/fork.c
 +++ linux-host/kernel/fork.c
 @@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk,
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
 -static struct mm_struct *dup_mm(struct task_struct *tsk)
 +struct mm_struct *dup_mm(struct task_struct *tsk)
  {
   struct mm_struct *mm, *oldmm = current-mm;
   int err;

ack

 --- linux-host.orig/include/linux/sched.h
 +++ linux-host/include/linux/sched.h
 @@ -1758,6 +1758,8 @@ extern void mmput(struct mm_struct *);
  extern struct mm_struct *get_task_mm(struct task_struct *task);
  /* Remove the current tasks stale references to the old mm_struct */
  extern void mm_release(struct task_struct *, struct mm_struct *);
 +/* Allocate a new mm structure and copy contents from tsk-mm */
 +extern struct mm_struct *dup_mm(struct task_struct *tsk);
  
  extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, 
 struct task_struct *, struct pt_regs *);
  extern void flush_thread(void);
 

hm, why did we put these in sched.h?

oh well - acked-by-me.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization


[RFC/PATCH 01/15 v2] preparation: provide hook to enable pgstes in user pagetable

2008-03-22 Thread Carsten Otte
From: Martin Schwidefsky [EMAIL PROTECTED]

The SIE instruction on s390 uses the 2nd half of the page table page to
virtualize the storage keys of a guest. This patch offers the s390_enable_sie
function, which reorganizes the page tables of a single-threaded process to
reserve space in the page table:
s390_enable_sie makes sure that the process is single threaded and then uses
dup_mm to create a new mm with reorganized page tables. The old mm is freed 
and the process has now a page status extended field after every page table.

Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.

This patch has a small common code hit, namely making dup_mm non-static.

Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's
review feedback. Now we do have the prototype for dup_mm in
include/linux/sched.h.

Signed-off-by: Martin Schwidefsky [EMAIL PROTECTED]
Signed-off-by: Carsten Otte [EMAIL PROTECTED]
---

 arch/s390/Kconfig  |4 +++
 arch/s390/kernel/setup.c   |4 +++
 arch/s390/mm/pgtable.c |   53 ++---
 include/asm-s390/mmu.h |1 
 include/asm-s390/mmu_context.h |8 +-
 include/asm-s390/pgtable.h |1 
 include/linux/sched.h  |2 +
 kernel/fork.c  |2 -
 8 files changed, 70 insertions(+), 5 deletions(-)

Index: linux-host/arch/s390/Kconfig
===
--- linux-host.orig/arch/s390/Kconfig
+++ linux-host/arch/s390/Kconfig
@@ -55,6 +55,10 @@ config GENERIC_LOCKBREAK
default y
depends on SMP  PREEMPT
 
+config PGSTE
+   bool
+   default y if KVM
+
 mainmenu Linux Kernel Configuration
 
 config S390
Index: linux-host/arch/s390/kernel/setup.c
===
--- linux-host.orig/arch/s390/kernel/setup.c
+++ linux-host/arch/s390/kernel/setup.c
@@ -315,7 +315,11 @@ static int __init early_parse_ipldelay(c
 early_param(ipldelay, early_parse_ipldelay);
 
 #ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
 unsigned int switch_amode = 0;
+#endif
 EXPORT_SYMBOL_GPL(switch_amode);
 
 static void set_amode_and_uaccess(unsigned long user_amode,
Index: linux-host/arch/s390/mm/pgtable.c
===
--- linux-host.orig/arch/s390/mm/pgtable.c
+++ linux-host/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
 #define TABLES_PER_PAGE4
 #define FRAG_MASK  15UL
 #define SECOND_HALVES  10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+   clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+   memset(table + 256, 0, PAGE_SIZE/4);
+   clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+   memset(table + 768, 0, PAGE_SIZE/4);
+}
+
 #else
 #define ALLOC_ORDER2
 #define TABLES_PER_PAGE2
 #define FRAG_MASK  3UL
 #define SECOND_HALVES  2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+   clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+   memset(table + 256, 0, PAGE_SIZE/2);
+}
+
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct m
unsigned long *table;
unsigned long bits;
 
-   bits = mm-context.noexec ? 3UL : 1UL;
+   bits = (mm-context.noexec || mm-context.pgstes) ? 3UL : 1UL;
spin_lock(mm-page_table_lock);
page = NULL;
if (!list_empty(mm-context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct m
pgtable_page_ctor(page);
page-flags = ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
-   clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+   if (mm-context.pgstes)
+   clear_table_pgstes(table);
+   else
+   clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(mm-page_table_lock);
list_add(page-lru, mm-context.pgtable_list);
}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *m
struct page *page;
unsigned long bits;
 
-   bits = mm-context.noexec ? 3UL : 1UL;
+   bits = (mm-context.noexec || mm-context.pgstes) ? 3UL : 1UL;
bits = (__pa(table)  (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table)  PAGE_SHIFT);
spin_lock(mm-page_table_lock);
@@ -228,3 +247,31 @@ void disable_noexec(struct mm_struct *mm
mm-context.noexec = 0;
update_mm(mm, tsk);
 }
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+   struct task_struct *tsk = current;
+   struct mm_struct *mm;
+
+   if (tsk-mm-context.pgstes)
+   return 0;
+   if (!tsk-mm || atomic_read(tsk-mm-mm_users)  1 ||
+   tsk-mm != tsk-active_mm ||