From: Fenghua Yu <fenghua...@intel.com>

PKS allows kernel users to define domains of page mappings which have
additional protections beyond the paging protections.

Add an API to allocate, use, and free a protection key which identifies
such a domain.

We export 2 new symbols pks_key_alloc() and pks_key_free() while
pks_update_protection() is exposed as an inline function via header
file.

pks_key_alloc() reserves pkey 0 for default kernel pages.  The other 15
keys are dynamically allocated to allow better use of the limited key
space.

This, and the fact that PKS may not be available on all arch's, means
callers of the allocator _must_ be prepared for it to fail and take
appropriate action to run without their allocated domain.  This is not
anticipated to be a problem as these protections only serve to harden
memory and users should be no worse off than before the introduction of
PKS.

PAGE_KERNEL_PKEY(key) and _PAGE_PKEY(pkey) aid in setting page table
entry bits by kernel users.  Note these defines will be used in follow
on patches but are included here for a complete interface.

pks_update_protection() is inlined for performance and allows kernel
users the ability to change the protections for the domain identified by
the pkey specified.  It is undefined behavior to call this on a pkey not
allocated by the allocator.  And will WARN_ON if called on architectures
which do not support PKS.  (Again callers are expected to check the
return of pks_key_alloc() before using this API further.)

It should also be noted that the underlying WRMSR(MSR_IA32_PKRS) is not
serializing but still maintains ordering properties similar to WRPKRU.
The current SDM section on PKRS needs updating but should be the same as
that of WRPKRU.  So to quote from the WRPKRU text:

        WRPKRU will never execute speculatively. Memory accesses
        affected by PKRU register will not execute (even speculatively)
        until all prior executions of WRPKRU have completed execution
        and updated the PKRU register.

Finally, pks_key_free() allows a user to return the key to the
allocator for use by others.

The interface maintains Access Disabled (AD=1) for all keys not
currently allocated.  Therefore, the user can depend on access being
disabled when pks_key_alloc() returns a key.

Co-developed-by: Ira Weiny <ira.we...@intel.com>
Signed-off-by: Ira Weiny <ira.we...@intel.com>
Signed-off-by: Fenghua Yu <fenghua...@intel.com>
---
 arch/x86/include/asm/pgtable_types.h  |  4 ++
 arch/x86/include/asm/pkeys.h          | 30 ++++++++++
 arch/x86/include/asm/pkeys_internal.h |  4 ++
 arch/x86/mm/pkeys.c                   | 79 +++++++++++++++++++++++++++
 include/linux/pkeys.h                 | 14 +++++
 5 files changed, 131 insertions(+)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index 816b31c68550..2ab45ef89c7d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -73,6 +73,8 @@
                         _PAGE_PKEY_BIT2 | \
                         _PAGE_PKEY_BIT3)
 
+#define _PAGE_PKEY(pkey)       (_AT(pteval_t, pkey) << _PAGE_BIT_PKEY_BIT0)
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
 #else
@@ -229,6 +231,8 @@ enum page_cache_mode {
 #define PAGE_KERNEL_IO         __pgprot_mask(__PAGE_KERNEL_IO)
 #define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
 
+#define PAGE_KERNEL_PKEY(pkey) __pgprot_mask(__PAGE_KERNEL | _PAGE_PKEY(pkey))
+
 #endif /* __ASSEMBLY__ */
 
 /*         xwr */
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34cef29fed20..e30ea907abb6 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -138,4 +138,34 @@ static inline int vma_pkey(struct vm_area_struct *vma)
 
 u32 get_new_pkr(u32 old_pkr, int pkey, unsigned long init_val);
 
+#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+int pks_key_alloc(const char *const pkey_user);
+void pks_key_free(int pkey);
+u32 get_new_pkr(u32 old_pkr, int pkey, unsigned long init_val);
+
+/*
+ * pks_update_protection - Update the protection of the specified key
+ *
+ * @pkey: Key for the domain to change
+ * @protection: protection bits to be used
+ *
+ * Protection utilizes the same protection bits specified for User pkeys
+ *     PKEY_DISABLE_ACCESS
+ *     PKEY_DISABLE_WRITE
+ *
+ * This is not a global update.  It only affects the current running thread.
+ *
+ * It is undefined and a bug for users to call this without having allocated a
+ * pkey and using it as pkey here.
+ */
+static inline void pks_update_protection(int pkey, unsigned long protection)
+{
+       current->thread.saved_pkrs = get_new_pkr(current->thread.saved_pkrs,
+                                                pkey, protection);
+       preempt_disable();
+       write_pkrs(current->thread.saved_pkrs);
+       preempt_enable();
+}
+#endif /* CONFIG_ARCH_HAS_SUPERVISOR_PKEYS */
+
 #endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/include/asm/pkeys_internal.h 
b/arch/x86/include/asm/pkeys_internal.h
index 05257cdc7200..e34f380c66d1 100644
--- a/arch/x86/include/asm/pkeys_internal.h
+++ b/arch/x86/include/asm/pkeys_internal.h
@@ -22,6 +22,10 @@
                         PKR_AD_KEY(10) | PKR_AD_KEY(11) | PKR_AD_KEY(12) | \
                         PKR_AD_KEY(13) | PKR_AD_KEY(14) | PKR_AD_KEY(15))
 
+/*  PKS supports 16 keys. Key 0 is reserved for the kernel. */
+#define        PKS_KERN_DEFAULT_KEY    0
+#define        PKS_NUM_KEYS            16
+
 #ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
 void write_pkrs(u32 pkrs_val);
 #else
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 0f86f2374bd7..16f735c12fcd 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -3,6 +3,9 @@
  * Intel Memory Protection Keys management
  * Copyright (c) 2015, Intel Corporation.
  */
+#undef pr_fmt
+#define pr_fmt(fmt) "x86/pkeys: " fmt
+
 #include <linux/debugfs.h>             /* debugfs_create_u32()         */
 #include <linux/mm_types.h>             /* mm_struct, vma, etc...       */
 #include <linux/pkeys.h>                /* PKEY_*                       */
@@ -249,3 +252,79 @@ void write_pkrs(u32 pkrs_val)
        this_cpu_write(pkrs_cache, pkrs_val);
        wrmsrl(MSR_IA32_PKRS, pkrs_val);
 }
+
+DEFINE_MUTEX(pks_lock);
+static const char pks_key_user0[] = "kernel";
+
+/* Store names of allocated keys for debug.  Key 0 is reserved for the kernel. 
 */
+static const char *pks_key_users[PKS_NUM_KEYS] = {
+       pks_key_user0
+};
+
+/*
+ * Each key is represented by a bit.  Bit 0 is set for key 0 and reserved for
+ * its use.  We use ulong for the bit operations but only 16 bits are used.
+ */
+static unsigned long pks_key_allocation_map = 1 << PKS_KERN_DEFAULT_KEY;
+
+/*
+ * pks_key_alloc - Allocate a PKS key
+ *
+ * @pkey_user: String stored for debugging of key exhaustion.  The caller is
+ * responsible to maintain this memory until pks_key_free().
+ */
+int pks_key_alloc(const char * const pkey_user)
+{
+       int nr, old_bit, pkey;
+
+       might_sleep();
+
+       if (!cpu_feature_enabled(X86_FEATURE_PKS))
+               return -EINVAL;
+
+       mutex_lock(&pks_lock);
+       /* Find a free bit (0) in the bit map. */
+       old_bit = 1;
+       while (old_bit) {
+               nr = ffz(pks_key_allocation_map);
+               old_bit = __test_and_set_bit(nr, &pks_key_allocation_map);
+       }
+
+       if (nr < PKS_NUM_KEYS) {
+               pkey = nr;
+               /* for debugging key exhaustion */
+               pks_key_users[pkey] = pkey_user;
+       } else {
+               pkey = -ENOSPC;
+               pr_info("Cannot allocate supervisor key for %s.\n",
+                       pkey_user);
+       }
+
+       mutex_unlock(&pks_lock);
+       return pkey;
+}
+EXPORT_SYMBOL_GPL(pks_key_alloc);
+
+/*
+ * pks_key_free - Free a previously allocate PKS key
+ *
+ * @pkey: Key to be free'ed
+ */
+void pks_key_free(int pkey)
+{
+       might_sleep();
+
+       if (!cpu_feature_enabled(X86_FEATURE_PKS))
+               return;
+
+       if (pkey >= PKS_NUM_KEYS || pkey <= PKS_KERN_DEFAULT_KEY)
+               return;
+
+       mutex_lock(&pks_lock);
+       __clear_bit(pkey, &pks_key_allocation_map);
+       pks_key_users[pkey] = NULL;
+       /* Restore to default AD=1 and WD=0. */
+       pks_update_protection(pkey, PKEY_DISABLE_ACCESS);
+       mutex_unlock(&pks_lock);
+}
+EXPORT_SYMBOL_GPL(pks_key_free);
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 2955ba976048..e4bff77d7b49 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -50,4 +50,18 @@ static inline void copy_init_pkru_to_fpregs(void)
 
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
+#ifndef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+static inline int pks_key_alloc(const char * const pkey_user)
+{
+       return -EINVAL;
+}
+static inline void pks_key_free(int pkey)
+{
+}
+static inline void pks_update_protection(int pkey, unsigned long protection)
+{
+       WARN_ON_ONCE(1);
+}
+#endif /* ! CONFIG_ARCH_HAS_SUPERVISOR_PKEYS */
+
 #endif /* _LINUX_PKEYS_H */
-- 
2.28.0.rc0.12.gb6a658bd00c9

Reply via email to