kvmclock defines few static variables which are shared with the
hypervisor during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if guest OS wants to share the memory region with hypervisor then it must
clear the C-bit before sharing it. Currently, we use
kernel_physical_mapping_init() to split large pages before clearing the
C-bit on shared pages. But it fails when called from the kvmclock
initialization (mainly because memblock allocator is not ready that early
during boot).

Add a __decrypted section attribute which can be used when defining
such shared variable. The so-defined variables will be placed in the
.data..decrypted section. This section is mapped with C=0 early
during boot, we also ensure that the initialized values are updated
to match with C=0 (i.e perform an in-place decryption). The
.data..decrypted section is PMD-aligned and sized so that we avoid
the need to split the large pages when mapping the section.

The sme_encrypt_kernel() was used to perform the in-place encryption
of the Linux kernel and initrd when SME is active. The routine has been
enhanced to decrypt the .data..decrypted section for both SME and SEV
cases.

Signed-off-by: Brijesh Singh <brijesh.si...@amd.com>
Reviewed-by: Tom Lendacky <thomas.lenda...@amd.com>
Cc: Tom Lendacky <thomas.lenda...@amd.com>
Cc: k...@vger.kernel.org
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Borislav Petkov <b...@suse.de>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: Sean Christopherson <sean.j.christopher...@intel.com>
Cc: k...@vger.kernel.org
Cc: "Radim Krčmář" <rkrc...@redhat.com>
---
 arch/x86/include/asm/mem_encrypt.h |  6 +++
 arch/x86/kernel/head64.c           | 11 +++++
 arch/x86/kernel/vmlinux.lds.S      | 17 +++++++
 arch/x86/mm/mem_encrypt_identity.c | 94 ++++++++++++++++++++++++++++++++------
 4 files changed, 113 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index c064383..802b2eb 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -52,6 +52,8 @@ void __init mem_encrypt_init(void);
 bool sme_active(void);
 bool sev_active(void);
 
+#define __decrypted __attribute__((__section__(".data..decrypted")))
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask    0ULL
@@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long 
size) { return 0;
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 
0; }
 
+#define __decrypted
+
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
 /*
@@ -88,6 +92,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long 
size) { return 0;
 #define __sme_pa(x)            (__pa(x) | sme_me_mask)
 #define __sme_pa_nodebug(x)    (__pa_nodebug(x) | sme_me_mask)
 
+extern char __start_data_decrypted[], __end_data_decrypted[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379..af39d68 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long 
physaddr)
 unsigned long __head __startup_64(unsigned long physaddr,
                                  struct boot_params *bp)
 {
+       unsigned long vaddr, vaddr_end;
        unsigned long load_delta, *p;
        unsigned long pgtable_flags;
        pgdval_t *pgd;
@@ -234,6 +235,16 @@ unsigned long __head __startup_64(unsigned long physaddr,
        /* Encrypt the kernel and related (if SME is active) */
        sme_encrypt_kernel(bp);
 
+       /* Clear the memory encryption mask from the .data..decrypted section. 
*/
+       if (mem_encrypt_active()) {
+               vaddr = (unsigned long)__start_data_decrypted;
+               vaddr_end = (unsigned long)__end_data_decrypted;
+               for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+                       i = pmd_index(vaddr);
+                       pmd[i] -= sme_get_me_mask();
+               }
+       }
+
        /*
         * Return the SME encryption mask (if SME is active) to be used as a
         * modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a4..4cb1064 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,21 @@ jiffies_64 = jiffies;
 #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
 #define ALIGN_ENTRY_TEXT_END   . = ALIGN(PMD_SIZE);
 
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid spliting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define DATA_DECRYPTED                                         \
+       . = ALIGN(PMD_SIZE);                                    \
+       __start_data_decrypted = .;                             \
+       *(.data..decrypted);                                    \
+       . = ALIGN(PMD_SIZE);                                    \
+       __end_data_decrypted = .;                               \
+
 #else
 
 #define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +89,7 @@ jiffies_64 = jiffies;
 
 #define ALIGN_ENTRY_TEXT_BEGIN
 #define ALIGN_ENTRY_TEXT_END
+#define DATA_DECRYPTED
 
 #endif
 
@@ -171,6 +187,7 @@ SECTIONS
                /* rarely changed data like cpu maps */
                READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
 
+               DATA_DECRYPTED
                /* End of data section */
                _edata = .;
        } :data
diff --git a/arch/x86/mm/mem_encrypt_identity.c 
b/arch/x86/mm/mem_encrypt_identity.c
index 7659e65..08e70ba 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -51,6 +51,8 @@
                                 (_PAGE_PAT | _PAGE_PWT))
 
 #define PMD_FLAGS_ENC          (PMD_FLAGS_LARGE | _PAGE_ENC)
+#define PMD_FLAGS_ENC_WP       ((PMD_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \
+                                (_PAGE_PAT | _PAGE_PWT))
 
 #define PTE_FLAGS              (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
 
@@ -59,6 +61,8 @@
                                 (_PAGE_PAT | _PAGE_PWT))
 
 #define PTE_FLAGS_ENC          (PTE_FLAGS | _PAGE_ENC)
+#define PTE_FLAGS_ENC_WP       ((PTE_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \
+                                (_PAGE_PAT | _PAGE_PWT))
 
 struct sme_populate_pgd_data {
        void    *pgtable_area;
@@ -231,6 +235,11 @@ static void __init sme_map_range_encrypted(struct 
sme_populate_pgd_data *ppd)
        __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
 }
 
+static void __init sme_map_range_encrypted_wp(struct sme_populate_pgd_data 
*ppd)
+{
+       __sme_map_range(ppd, PMD_FLAGS_ENC_WP, PTE_FLAGS_ENC_WP);
+}
+
 static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
 {
        __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
@@ -378,7 +387,10 @@ static void __init build_workarea_map(struct boot_params 
*bp,
        ppd->paddr = workarea_start;
        ppd->vaddr = workarea_start;
        ppd->vaddr_end = workarea_end;
-       sme_map_range_decrypted(ppd);
+       if (sev_active())
+               sme_map_range_encrypted(ppd);
+       else
+               sme_map_range_decrypted(ppd);
 
        /* Flush the TLB - no globals so cr3 is enough */
        native_write_cr3(__native_read_cr3());
@@ -435,16 +447,27 @@ static void __init build_workarea_map(struct boot_params 
*bp,
                sme_map_range_decrypted_wp(ppd);
        }
 
-       /* Add decrypted workarea mappings to both kernel mappings */
+       /*
+        * When SEV is active, kernel is already encrypted hence mapping
+        * the initial workarea_start as encrypted. When SME is active,
+        * the kernel is not encrypted hence add decrypted workarea
+        * mappings to both kernel mappings.
+        */
        ppd->paddr = workarea_start;
        ppd->vaddr = workarea_start;
        ppd->vaddr_end = workarea_end;
-       sme_map_range_decrypted(ppd);
+       if (sev_active())
+               sme_map_range_encrypted(ppd);
+       else
+               sme_map_range_decrypted(ppd);
 
        ppd->paddr = workarea_start;
        ppd->vaddr = workarea_start + decrypted_base;
        ppd->vaddr_end = workarea_end + decrypted_base;
-       sme_map_range_decrypted(ppd);
+       if (sev_active())
+               sme_map_range_encrypted(ppd);
+       else
+               sme_map_range_decrypted(ppd);
 
        wa->kernel_start = kernel_start;
        wa->kernel_end = kernel_end;
@@ -487,28 +510,69 @@ static void __init teardown_workarea_map(struct 
sme_workarea_data *wa,
        native_write_cr3(__native_read_cr3());
 }
 
+static void __init decrypt_shared_data(struct sme_workarea_data *wa,
+                                      struct sme_populate_pgd_data *ppd)
+{
+       unsigned long decrypted_start, decrypted_end, decrypted_len;
+
+       /* Physical addresses of decrypted data section */
+       decrypted_start = __pa_symbol(__start_data_decrypted);
+       decrypted_end = ALIGN(__pa_symbol(__end_data_decrypted), PMD_PAGE_SIZE);
+       decrypted_len = decrypted_end - decrypted_start;
+
+       if (!decrypted_len)
+               return;
+
+       /* Add decrypted mapping for the section (identity) */
+       ppd->paddr = decrypted_start;
+       ppd->vaddr = decrypted_start;
+       ppd->vaddr_end = decrypted_end;
+       sme_map_range_decrypted(ppd);
+
+       /* Add encrypted-wp mapping for the section (non-identity) */
+       ppd->paddr = decrypted_start;
+       ppd->vaddr = decrypted_start + wa->decrypted_base;
+       ppd->vaddr_end = decrypted_end + wa->decrypted_base;
+       sme_map_range_encrypted_wp(ppd);
+
+       /* Perform in-place decryption */
+       sme_encrypt_execute(decrypted_start,
+                           decrypted_start + wa->decrypted_base,
+                           decrypted_len, wa->workarea_start,
+                           (unsigned long)ppd->pgd);
+
+       ppd->vaddr = decrypted_start + wa->decrypted_base;
+       ppd->vaddr_end = decrypted_end + wa->decrypted_base;
+       sme_clear_pgd(ppd);
+}
+
 void __init sme_encrypt_kernel(struct boot_params *bp)
 {
        struct sme_populate_pgd_data ppd;
        struct sme_workarea_data wa;
 
-       if (!sme_active())
+       if (!mem_encrypt_active())
                return;
 
        build_workarea_map(bp, &wa, &ppd);
 
-       /* When SEV is active, encrypt kernel and initrd */
-       sme_encrypt_execute(wa.kernel_start,
-                           wa.kernel_start + wa.decrypted_base,
-                           wa.kernel_len, wa.workarea_start,
-                           (unsigned long)ppd.pgd);
-
-       if (wa.initrd_len)
-               sme_encrypt_execute(wa.initrd_start,
-                                   wa.initrd_start + wa.decrypted_base,
-                                   wa.initrd_len, wa.workarea_start,
+       /* When SME is active, encrypt kernel and initrd */
+       if (sme_active()) {
+               sme_encrypt_execute(wa.kernel_start,
+                                   wa.kernel_start + wa.decrypted_base,
+                                   wa.kernel_len, wa.workarea_start,
                                    (unsigned long)ppd.pgd);
 
+               if (wa.initrd_len)
+                       sme_encrypt_execute(wa.initrd_start,
+                                           wa.initrd_start + wa.decrypted_base,
+                                           wa.initrd_len, wa.workarea_start,
+                                           (unsigned long)ppd.pgd);
+       }
+
+       /* Decrypt the contents of .data..decrypted section */
+       decrypt_shared_data(&wa, &ppd);
+
        teardown_workarea_map(&wa, &ppd);
 }
 
-- 
2.7.4

Reply via email to