Hi all,

Here is my POC mail:
https://www.spinics.net/lists/kernel/msg2571811.html

Since no reply, so I made this RFC PATCH.
I ran it in QEMU guest. It can get and print the mem_affinity.
But no physical machine available right now.

If there is something wrong, please let me know.
If someone has a better method to handle the movable memory,
please tell me.

Thanks,
Chao Fan

On Fri, Aug 18, 2017 at 04:58:20PM +0800, Chao Fan wrote:
>KASLR should choose the memory region of immovable node to extract kernel.
>So get ACPI SRAT table and store the memory region of movable node which
>kaslr shold avoid.
>
>Signed-off-by: Chao Fan <[email protected]>
>---
> arch/x86/boot/compressed/kaslr.c | 231 +++++++++++++++++++++++++++++++++++++++
> arch/x86/boot/compressed/misc.h  |  27 +++++
> 2 files changed, 258 insertions(+)
>
>diff --git a/arch/x86/boot/compressed/kaslr.c 
>b/arch/x86/boot/compressed/kaslr.c
>index 7de23bb279ce..3b8c111b8a84 100644
>--- a/arch/x86/boot/compressed/kaslr.c
>+++ b/arch/x86/boot/compressed/kaslr.c
>@@ -45,6 +45,11 @@
> #define STATIC
> #include <linux/decompress/mm.h>
> 
>+#include <linux/efi.h>
>+#include <linux/acpi.h>
>+#include <linux/numa.h>
>+#include <asm/efi.h>
>+
> extern unsigned long get_cmd_line_ptr(void);
> 
> /* Simplified build-specific string for starting entropy. */
>@@ -94,6 +99,18 @@ static bool memmap_too_large;
> /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
> unsigned long long mem_limit = ULLONG_MAX;
> 
>+/* Store the max numbers of acpi tables */
>+#define ACPI_MAX_TABLES               128
>+
>+/* Store the movable memory */
>+static struct {
>+      u64 start;
>+      u64 end;
>+} movable_mem[MAX_NUMNODES*2];
>+
>+/* Store the num of movable mem affinity */
>+static int num_movable_ma;
>+
> 
> enum mem_avoid_index {
>       MEM_AVOID_ZO_RANGE = 0,
>@@ -257,6 +274,180 @@ static int handle_mem_memmap(void)
>       return 0;
> }
> 
>+static void handle_movable_node(void)
>+{
>+      struct acpi_table_desc table_descs[ACPI_MAX_TABLES];
>+      struct acpi_table_header *table_header;
>+      struct acpi_srat_mem_affinity *ma;
>+      struct acpi_subtable_header *asth;
>+      acpi_physical_address root_table;
>+      acpi_physical_address acpi_table;
>+      acpi_physical_address rsdp_addr;
>+      struct acpi_table_header *th;
>+      efi_system_table_t *systab;
>+      unsigned long table_size;
>+      unsigned long table_end;
>+      bool use_rsdt = false;
>+      bool acpi_20 = false;
>+      bool efi_64 = false;
>+      void *config_tables;
>+      int size, total_size;
>+      u32 table_entry_size;
>+      struct efi_info *e;
>+      u8 *table_entry;
>+      u32 table_count;
>+      char *args;
>+      char *sig;
>+      u32 len;
>+      int i, j;
>+
>+      args = (char *)get_cmd_line_ptr();
>+      if (!strstr(args, "movable_node"))
>+              return;
>+
>+      e = &boot_params->efi_info;
>+      sig = (char *)&e->efi_loader_signature;
>+
>+      if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4))
>+              efi_64 = true;
>+      else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4))
>+              efi_64 = false;
>+      else {
>+              debug_putstr("Wrong efi loader signature.\n");
>+              return;
>+      }
>+
>+      // Get systab from boot params
>+#ifdef CONFIG_X86_32
>+      if (e->efi_systab_hi || e->efi_memmap_hi) {
>+              debug_putstr("Table located above 4GB, disabling EFI.\n");
>+              return;
>+      }
>+      systab = (efi_system_table_t *)e->efi_systab;
>+#else
>+      systab = (efi_system_table_t *)(e->efi_systab |
>+                      ((__u64)e->efi_systab_hi<<32));
>+#endif
>+
>+      // Get efi tables from systab
>+      size = efi_64 ? sizeof(efi_config_table_64_t) :
>+                      sizeof(efi_config_table_32_t);
>+      total_size = systab->nr_tables * size;
>+
>+      for (i = 0; i < systab->nr_tables; i++) {
>+              efi_guid_t guid;
>+              unsigned long table;
>+
>+              config_tables = (void *)(systab->tables + size * i);
>+              if (efi_64) {
>+                      efi_config_table_64_t *tmp_table;
>+
>+                      tmp_table = (efi_config_table_64_t *)config_tables;
>+                      guid = tmp_table->guid;
>+                      table = tmp_table->table;
>+#ifndef CONFIG_64BIT
>+                      if (table >> 32) {
>+                              debug_putstr
>+                              ("Table located above 4G, disabling EFI.\n");
>+                              return -EINVAL;
>+                      }
>+#endif
>+              } else {
>+                      efi_config_table_32_t *tmp_table;
>+
>+                      tmp_table = (efi_config_table_32_t *)config_tables;
>+                      guid = tmp_table->guid;
>+                      table = tmp_table->table;
>+              }
>+
>+              // Get rsdp from efi tables
>+              if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)) && !acpi_20) {
>+                      rsdp_addr = (acpi_physical_address)table;
>+                      acpi_20 = false;
>+              } else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) {
>+                      rsdp_addr = (acpi_physical_address)table;
>+                      acpi_20 = true;
>+              }
>+      }
>+
>+      // Get rsdt or xsdt from rsdp
>+      if (strstr(args, "acpi=rsdt"))
>+              use_rsdt = true;
>+
>+      if (!(use_rsdt) && (acpi_20) &&
>+              ((((struct acpi_table_rsdp *)rsdp_addr)->revision) > 1)) {
>+              root_table = ((struct acpi_table_rsdp *)
>+                              rsdp_addr)->xsdt_physical_address;
>+              table_entry_size = ACPI_XSDT_ENTRY_SIZE;
>+      } else {
>+              root_table = ((struct acpi_table_rsdp *)
>+                              rsdp_addr)->rsdt_physical_address;
>+              table_entry_size = ACPI_RSDT_ENTRY_SIZE;
>+      }
>+
>+      // Get acpi root table from rsdt or xsdt
>+      th = (struct acpi_table_header *)root_table;
>+      len = th->length;
>+      table_count = (u32)((len - sizeof(struct acpi_table_header)) /
>+                              table_entry_size);
>+      table_entry = ACPI_ADD_PTR(u8, th, sizeof(struct acpi_table_header));
>+
>+      for (i = 0; i < table_count; i++) {
>+              u64 address64;
>+
>+              memset(&table_descs[i], 0, sizeof(struct acpi_table_desc));
>+              if (table_entry_size == ACPI_RSDT_ENTRY_SIZE)
>+                      acpi_table = ((acpi_physical_address)
>+                                      (*ACPI_CAST_PTR(u32, table_entry)));
>+              else {
>+                      ACPI_MOVE_64_TO_64(&address64, table_entry);
>+                      acpi_table = (acpi_physical_address) address64;
>+              }
>+
>+              if (acpi_table) {
>+                      table_descs[i].address = acpi_table;
>+                      table_descs[i].length =
>+                              sizeof(struct acpi_table_header);
>+                      table_descs[i].pointer =
>+                              (struct acpi_table_header *)acpi_table;
>+                      for (j = 0; j < 4; j++)
>+                              table_descs[i].signature.ascii[j] =
>+                                      ((struct acpi_table_header *)
>+                                       acpi_table)->signature[j];
>+              }
>+
>+              if (!strncmp(table_descs[i].signature.ascii, "SRAT", 4)) {
>+                      table_header = table_descs[i].pointer;
>+                      break;
>+              }
>+
>+              table_entry += table_entry_size;
>+      }
>+
>+      // Get acpi srat mem affinity frpm acpi root table
>+      table_size = sizeof(struct acpi_table_srat);
>+      table_end = (unsigned long)table_header + table_header->length;
>+      asth = (struct acpi_subtable_header *)
>+              ((unsigned long)table_header + table_size);
>+      j = 0;
>+
>+      while (((unsigned long)asth) +
>+                      sizeof(struct acpi_subtable_header) < table_end) {
>+              if (asth->type == 1) {
>+                      ma = (struct acpi_srat_mem_affinity *)asth;
>+                      if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
>+                              movable_mem[j].start = ma->base_address;
>+                              movable_mem[j].end = ma->base_address +
>+                                                   ma->length - 1;
>+                              j++;
>+                      }
>+              }
>+              asth = (struct acpi_subtable_header *)
>+                      ((unsigned long)asth + asth->length);
>+      }
>+      num_movable_ma = j;
>+}
>+
> /*
>  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>  * The mem_avoid array is used to store the ranges that need to be avoided
>@@ -380,6 +571,11 @@ static void mem_avoid_init(unsigned long input, unsigned 
>long input_size,
>       /* Mark the memmap regions we need to avoid */
>       handle_mem_memmap();
> 
>+#ifdef CONFIG_EFI
>+      /* Mark the hotplug SB regions we need choose */
>+      handle_movable_node();
>+#endif
>+
> #ifdef CONFIG_X86_VERBOSE_BOOTUP
>       /* Make sure video RAM can be used. */
>       add_identity_map(0, PMD_SIZE);
>@@ -481,6 +677,36 @@ static unsigned long slots_fetch_random(void)
>       return 0;
> }
> 
>+static int check_movable_memory(struct mem_vector *entry)
>+{
>+      int i;
>+      unsigned long long start;
>+      unsigned long long end;
>+
>+      start = entry->start;
>+      end = entry->start + entry->size - 1;
>+
>+      if (num_movable_ma == 0)
>+              return 0;
>+
>+      for (i = 0; i < num_movable_ma; i++) {
>+              if ((start >= movable_mem[i].start) &&
>+                  (start <= movable_mem[i].end))
>+                      return 1;
>+
>+              if ((end >= movable_mem[i].start) &&
>+                  (end <= movable_mem[i].end))
>+                      return 1;
>+
>+              if (start > movable_mem[i].end)
>+                      continue;
>+
>+              if (end < movable_mem[i].start)
>+                      break;
>+      }
>+      return 0;
>+}
>+
> static void process_mem_region(struct mem_vector *entry,
>                              unsigned long minimum,
>                              unsigned long image_size)
>@@ -502,6 +728,11 @@ static void process_mem_region(struct mem_vector *entry,
>       end = min(entry->size + entry->start, mem_limit);
>       if (entry->start >= end)
>               return;
>+
>+      /* Ignore the memory region of movable_node */
>+      if (check_movable_memory(entry))
>+              return;
>+
>       cur_entry.start = entry->start;
>       cur_entry.size = end - entry->start;
> 
>diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
>index 766a5211f827..5f514959b2f1 100644
>--- a/arch/x86/boot/compressed/misc.h
>+++ b/arch/x86/boot/compressed/misc.h
>@@ -109,3 +109,30 @@ static inline void console_init(void)
> #endif
> 
> #endif
>+
>+#ifdef ACPI_BIG_ENDIAN
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0]; }
>+#else
>+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{*(u64 *)(void *)(d) = *(u64 *)(void *)(s)}
>+#else
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7]; }
>+#endif
>+#endif
>-- 
>2.13.4
>


Reply via email to