On 06/14/18 at 04:47pm, Lianbo Jiang wrote: > When sme enabled on AMD server, we also need to support kdump. Because > the memory is encrypted in the first kernel, we will remap the old memory > encrypted to the second kernel(crash kernel), and sme is also enabled in > the second kernel, otherwise the old memory encrypted can not be decrypted. > Because simply changing the value of a C-bit on a page will not > automatically encrypt the existing contents of a page, and any data in the > page prior to the C-bit modification will become unintelligible. A page of > memory that is marked encrypted will be automatically decrypted when read > from DRAM and will be automatically encrypted when written to DRAM. > > For the kdump, it is necessary to distinguish whether the memory is > encrypted. Furthermore, we should also know which part of the memory is > encrypted or decrypted. We will appropriately remap the memory according > to the specific situation in order to tell cpu how to deal with the data( > encrypted or unencrypted). For example, when sme enabled, if the old memory > is encrypted, we will remap the old memory in encrypted way, which will > automatically decrypt the old memory encrypted when we read those data from > the remapping address. > > ---------------------------------------------- > | first-kernel | second-kernel | kdump support | > | (mem_encrypt=on|off) | (yes|no) | > |--------------+---------------+---------------| > | on | on | yes | > | off | off | yes | > | on | off | no | > | off | on | no | > |______________|_______________|_______________| > > Signed-off-by: Lianbo Jiang <liji...@redhat.com> > --- > Some changes based on V1: > 1. remove the '#ifdef' stuff throughout this patch. > 2. put some logic into the early_memremap_pgprot_adjust() and clean the > previous unnecessary changes, for example: arch/x86/include/asm/dmi.h, > arch/x86/kernel/acpi/boot.c, drivers/acpi/tables.c. > 3. rewrite two functions, copy_oldmem_page() and > copy_oldmem_page_encrypted(). > 4. distingish sme_active() and sev_active(), when a distinction doesn't > need, mem_encrypt_active() will be used.
Lianbo, I think you modified this based on Tom's comment. But it would be good to add this only when you tested sev and it worked for you. > 5. clean compile warning in copy_device_table(). > > arch/x86/kernel/crash_dump_64.c | 42 > +++++++++++++++++++++++++++++++---------- > arch/x86/mm/ioremap.c | 4 ++++ > drivers/iommu/amd_iommu_init.c | 14 +++++++++++++- Assume it will not break bisection it is better to split the iommu changes as one standalone patch and cc iommu list. > fs/proc/vmcore.c | 20 +++++++++++++++----- > include/linux/crash_dump.h | 5 +++++ > kernel/kexec_core.c | 12 ++++++++++++ Another two patches, one for kexec_core, another for vmcore.c will be better for review. > 6 files changed, 81 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c > index 4f2e077..a2c7b13 100644 > --- a/arch/x86/kernel/crash_dump_64.c > +++ b/arch/x86/kernel/crash_dump_64.c > @@ -11,6 +11,23 @@ > #include <linux/uaccess.h> > #include <linux/io.h> > > +static ssize_t copy_to(void *to, void *vaddr, unsigned long offset, > + size_t size, int userbuf) > +{ > + if (userbuf) { > + if (copy_to_user(to, vaddr + offset, size)) { > + iounmap(vaddr); > + return -ENOMEM; > + } > + } else > + memcpy(to, vaddr + offset, size); > + > + set_iounmap_nonlazy(); > + iounmap(vaddr); > + > + return size; > +} > + > /** > * copy_oldmem_page - copy one page from "oldmem" > * @pfn: page frame number to be copied > @@ -36,15 +53,20 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, > if (!vaddr) > return -ENOMEM; > > - if (userbuf) { > - if (copy_to_user(buf, vaddr + offset, csize)) { > - iounmap(vaddr); > - return -EFAULT; > - } > - } else > - memcpy(buf, vaddr + offset, csize); > + return copy_to(buf, vaddr, offset, csize, userbuf); > +} > > - set_iounmap_nonlazy(); > - iounmap(vaddr); > - return csize; > +ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, > + size_t csize, unsigned long offset, int userbuf) > +{ > + void *vaddr; > + > + if (!csize) > + return 0; > + > + vaddr = ioremap_encrypted(pfn << PAGE_SHIFT, PAGE_SIZE); > + if (!vaddr) > + return -ENOMEM; > + > + return copy_to(buf, vaddr, offset, csize, userbuf); > } > diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c > index 24e0920..e365fc4 100644 > --- a/arch/x86/mm/ioremap.c > +++ b/arch/x86/mm/ioremap.c > @@ -24,6 +24,7 @@ > #include <asm/pgalloc.h> > #include <asm/pat.h> > #include <asm/setup.h> > +#include <linux/crash_dump.h> > > #include "physaddr.h" > > @@ -696,6 +697,9 @@ pgprot_t __init > early_memremap_pgprot_adjust(resource_size_t phys_addr, > if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size)) > encrypted_prot = false; > > + if (sme_active() && is_kdump_kernel()) > + encrypted_prot = false; > + > return encrypted_prot ? pgprot_encrypted(prot) > : pgprot_decrypted(prot); > } > diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c > index 904c575..5e535a6 100644 > --- a/drivers/iommu/amd_iommu_init.c > +++ b/drivers/iommu/amd_iommu_init.c > @@ -889,11 +889,23 @@ static bool copy_device_table(void) > } > > old_devtb_phys = entry & PAGE_MASK; > + /* > + * When sme enable in the first kernel, old_devtb_phys includes the > + * memory encryption mask(sme_me_mask), we must remove the memory > + * encryption mask to obtain the true physical address in kdump mode. > + */ > + if (mem_encrypt_active() && is_kdump_kernel()) > + old_devtb_phys = __sme_clr(old_devtb_phys); > if (old_devtb_phys >= 0x100000000ULL) { > pr_err("The address of old device table is above 4G, not > trustworthy!\n"); > return false; > } > - old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); > + if (mem_encrypt_active() && is_kdump_kernel()) > + old_devtb = (void *)ioremap_encrypted(old_devtb_phys, > + dev_table_size); > + else > + old_devtb = memremap(old_devtb_phys, > + dev_table_size, MEMREMAP_WB); > if (!old_devtb) > return false; > > diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c > index a45f0af..4d0c884 100644 > --- a/fs/proc/vmcore.c > +++ b/fs/proc/vmcore.c > @@ -25,6 +25,8 @@ > #include <linux/uaccess.h> > #include <asm/io.h> > #include "internal.h" > +#include <linux/mem_encrypt.h> > +#include <asm/pgtable.h> > > /* List representing chunks of contiguous memory areas and their offsets in > * vmcore file. > @@ -86,7 +88,8 @@ static int pfn_is_ram(unsigned long pfn) > > /* Reads a page from the oldmem device from given offset. */ > static ssize_t read_from_oldmem(char *buf, size_t count, > - u64 *ppos, int userbuf) > + u64 *ppos, int userbuf, > + bool encrypted) > { > unsigned long pfn, offset; > size_t nr_bytes; > @@ -108,8 +111,13 @@ static ssize_t read_from_oldmem(char *buf, size_t count, > if (pfn_is_ram(pfn) == 0) > memset(buf, 0, nr_bytes); > else { > - tmp = copy_oldmem_page(pfn, buf, nr_bytes, > + if (encrypted) > + tmp = copy_oldmem_page_encrypted(pfn, buf, > + nr_bytes, offset, userbuf); > + else > + tmp = copy_oldmem_page(pfn, buf, nr_bytes, > offset, userbuf); > + > if (tmp < 0) > return tmp; > } > @@ -143,7 +151,7 @@ void __weak elfcorehdr_free(unsigned long long addr) > */ > ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) > { > - return read_from_oldmem(buf, count, ppos, 0); > + return read_from_oldmem(buf, count, ppos, 0, sev_active()); > } > > /* > @@ -151,7 +159,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, > u64 *ppos) > */ > ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) > { > - return read_from_oldmem(buf, count, ppos, 0); > + return read_from_oldmem(buf, count, ppos, 0, sme_active()); > } > > /* > @@ -161,6 +169,7 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct > *vma, > unsigned long from, unsigned long pfn, > unsigned long size, pgprot_t prot) > { > + prot = pgprot_encrypted(prot); > return remap_pfn_range(vma, from, pfn, size, prot); > } > > @@ -235,7 +244,8 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, > loff_t *fpos, > m->offset + m->size - *fpos, > buflen); > start = m->paddr + *fpos - m->offset; > - tmp = read_from_oldmem(buffer, tsz, &start, userbuf); > + tmp = read_from_oldmem(buffer, tsz, &start, userbuf, > + mem_encrypt_active()); > if (tmp < 0) > return tmp; > buflen -= tsz; > diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h > index f7ac2aa..28b0a7c 100644 > --- a/include/linux/crash_dump.h > +++ b/include/linux/crash_dump.h > @@ -25,6 +25,11 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct > *vma, > > extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, > unsigned long, int); > +extern ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, > + size_t csize, unsigned long offset, > + int userbuf); > +#define copy_oldmem_page_encrypted copy_oldmem_page_encrypted > + > void vmcore_cleanup(void); > > /* Architecture code defines this if there are other possible ELF > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c > index 20fef1a..3c22a9b 100644 > --- a/kernel/kexec_core.c > +++ b/kernel/kexec_core.c > @@ -471,6 +471,16 @@ static struct page > *kimage_alloc_crash_control_pages(struct kimage *image, > } > } > > + if (pages) { > + unsigned int count, i; > + > + pages->mapping = NULL; > + set_page_private(pages, order); > + count = 1 << order; > + for (i = 0; i < count; i++) > + SetPageReserved(pages + i); > + arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0); > + } > return pages; > } > > @@ -865,6 +875,7 @@ static int kimage_load_crash_segment(struct kimage *image, > result = -ENOMEM; > goto out; > } > + arch_kexec_post_alloc_pages(page_address(page), 1, 0); > ptr = kmap(page); > ptr += maddr & ~PAGE_MASK; > mchunk = min_t(size_t, mbytes, > @@ -882,6 +893,7 @@ static int kimage_load_crash_segment(struct kimage *image, > result = copy_from_user(ptr, buf, uchunk); > kexec_flush_icache_page(page); > kunmap(page); > + arch_kexec_pre_free_pages(page_address(page), 1); > if (result) { > result = -EFAULT; > goto out; > -- > 2.9.5 > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec