For machine with some new BIOS other than legacy BIOS, such as EFI,
LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel
based on legacy BIOS can not be used, so a 32-bit boot protocol need
to be defined.

This patchset defines a 32-bit boot protocol for i386 and x86_64. A
linked list based boot parameters pass mechanism is also added to
improve the extensibility.

The patchset has been tested against 2.6.23-rc4-mm1 kernel on x86_64.

This patchset is based on the proposal of Peter Anvin.

Known Issues:

1. Where is safe to place the linked list of setup_data?

Because the length of the linked list of setup_data is variable, it
can not be copied into BSS segment of kernel as that of "zero
page". We must find a safe place for it, where it will not be
overwritten by kernel during booting up. The i386 kernel will
overwrite some pages after _end. The x86_64 kernel will overwrite some
pages from 0x1000 on.

EFI64 runtime service is the first user of 32-bit boot protocol and
boot parameters passing mechanism. To demonstrate their usage, the
EFI64 runtime service patch is also appended with the mail.

Best Regards,
Huang Ying

---

 Documentation/i386/boot.txt            |   15 
 Documentation/x86_64/boot-options.txt  |   12 
 arch/x86_64/Kconfig                    |   11 
 arch/x86_64/kernel/Makefile            |    1 
 arch/x86_64/kernel/efi.c               |  597 +++++++++++++++++++++++++++++++++
 arch/x86_64/kernel/efi_callwrap.S      |   69 +++
 arch/x86_64/kernel/reboot.c            |   20 -
 arch/x86_64/kernel/setup.c             |   14 
 arch/x86_64/kernel/time.c              |   48 +-
 include/asm-i386/bootparam.h           |   10 
 include/asm-x86_64/efi.h               |   18 
 include/asm-x86_64/eficallwrap.h       |   33 +
 include/asm-x86_64/emergency-restart.h |    9 
 include/asm-x86_64/fixmap.h            |    3 
 include/asm-x86_64/time.h              |    7 
 15 files changed, 842 insertions(+), 25 deletions(-)

Index: linux-2.6.23-rc4/include/asm-x86_64/eficallwrap.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc4/include/asm-x86_64/eficallwrap.h   2007-09-17 
15:03:47.000000000 +0800
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (C) 2007 Intel Corp
+ *     Bibo Mao <[EMAIL PROTECTED]>
+ *     Huang Ying <[EMAIL PROTECTED]>
+ *
+ *  Function calling ABI conversion from SYSV to Windows for x86_64
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *
+ */
+
+#ifndef __ASM_X86_64_EFICALLWRAP_H
+#define __ASM_X86_64_EFICALLWRAP_H
+
+extern efi_status_t lin2win0(void *fp);
+extern efi_status_t lin2win1(void *fp, u64 arg1);
+extern efi_status_t lin2win2(void *fp, u64 arg1, u64 arg2);
+extern efi_status_t lin2win3(void *fp, u64 arg1, u64 arg2, u64 arg3);
+extern efi_status_t lin2win4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4);
+extern efi_status_t lin2win5(void *fp, u64 arg1, u64 arg2, u64 arg3,
+                            u64 arg4, u64 arg5);
+extern efi_status_t lin2win6(void *fp, u64 arg1, u64 arg2, u64 arg3,
+                            u64 arg4, u64 arg5, u64 arg6);
+
+#endif
Index: linux-2.6.23-rc4/arch/x86_64/kernel/efi.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc4/arch/x86_64/kernel/efi.c   2007-09-17 15:03:47.000000000 
+0800
@@ -0,0 +1,597 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <[EMAIL PROTECTED]>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ *     David Mosberger-Tang <[EMAIL PROTECTED]>
+ *     Stephane Eranian <[EMAIL PROTECTED]>
+ * Copyright (C) 2005-2008 Intel Co.
+ *     Fenghua Yu <[EMAIL PROTECTED]>
+ *     Bibo Mao <[EMAIL PROTECTED]>
+ *     Chandramouli Narayanan <[EMAIL PROTECTED]>
+ *
+ * Code to convert EFI to E820 map has been implemented in elilo bootloader
+ * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
+ * is setup appropriately for EFI runtime code.
+ * - mouli 06/14/2007.
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ *
+ * Goutham Rao: <[EMAIL PROTECTED]>
+ *     Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#include <asm/setup.h>
+#include <asm/bootparam.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/proto.h>
+#include <asm/eficallwrap.h>
+#include <asm/efi.h>
+#include <asm/time.h>
+
+int efi_enabled;
+EXPORT_SYMBOL(efi_enabled);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+
+struct efi_memory_map memmap;
+
+struct efi efi_phys __initdata;
+static efi_system_table_t efi_systab __initdata;
+static unsigned long efi_flags __initdata;
+/* efi_lock protects efi physical mode call */
+static spinlock_t efi_lock __initdata = SPIN_LOCK_UNLOCKED;
+static pgd_t save_pgd __initdata;
+static int noefi_time __initdata;
+
+static int __init setup_noefi(char *arg)
+{
+       efi_enabled = 0;
+       return 0;
+}
+early_param("noefi", setup_noefi);
+
+static int __init setup_noefi_time(char *arg)
+{
+       noefi_time = 1;
+       return 0;
+}
+early_param("noefi_time", setup_noefi_time);
+
+static efi_status_t _efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+       return lin2win2((void *)efi.systab->runtime->get_time,
+                       (u64)tm, (u64)tc);
+}
+
+static efi_status_t _efi_set_time(efi_time_t *tm)
+{
+       return lin2win1((void *)efi.systab->runtime->set_time, (u64)tm);
+}
+
+static efi_status_t _efi_get_wakeup_time(efi_bool_t *enabled,
+                                        efi_bool_t *pending,
+                                        efi_time_t *tm)
+{
+       return lin2win3((void *)efi.systab->runtime->get_wakeup_time,
+                       (u64)enabled, (u64)pending, (u64)tm);
+}
+
+static efi_status_t _efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+       return lin2win2((void *)efi.systab->runtime->set_wakeup_time,
+                       (u64)enabled, (u64)tm);
+}
+
+static efi_status_t _efi_get_variable(efi_char16_t *name,
+                                     efi_guid_t *vendor,
+                                     u32 *attr,
+                                     unsigned long *data_size,
+                                     void *data)
+{
+       return lin2win5((void *)efi.systab->runtime->get_variable,
+                       (u64)name, (u64)vendor, (u64)attr,
+                       (u64)data_size, (u64)data);
+}
+
+static efi_status_t _efi_get_next_variable(unsigned long *name_size,
+                                          efi_char16_t *name,
+                                          efi_guid_t *vendor)
+{
+       return lin2win3((void *)efi.systab->runtime->get_next_variable,
+                       (u64)name_size, (u64)name, (u64)vendor);
+}
+
+static efi_status_t _efi_set_variable(
+                       efi_char16_t *name, efi_guid_t *vendor,
+                       u64 attr, u64 data_size, void *data)
+{
+       return lin2win5((void *)efi.systab->runtime->set_variable,
+                       (u64)name, (u64)vendor, (u64)attr,
+                       (u64)data_size, (u64)data);
+}
+
+static efi_status_t _efi_get_next_high_mono_count(u32 *count)
+{
+       return lin2win1((void *)efi.systab->runtime->get_next_high_mono_count,
+                       (u64)count);
+}
+
+static efi_status_t _efi_reset_system(int reset_type,
+                                     efi_status_t status,
+                                     unsigned long data_size,
+                                     efi_char16_t *data)
+{
+       return lin2win4((void *)efi.systab->runtime->reset_system,
+                       (u64)reset_type, (u64)status,
+                       (u64)data_size, (u64)data);
+}
+
+static efi_status_t _efi_set_virtual_address_map(
+       unsigned long memory_map_size,
+       unsigned long descriptor_size,
+       u32 descriptor_version,
+       efi_memory_desc_t *virtual_map)
+{
+       return lin2win4((void *)efi.systab->runtime->set_virtual_address_map,
+                       (u64)memory_map_size, (u64)descriptor_size,
+                       (u64)descriptor_version, (u64)virtual_map);
+}
+
+static void __init early_mapping_set_exec(unsigned long start,
+                                         unsigned long end,
+                                         int executable)
+{
+       pte_t *kpte;
+
+       while (start < end) {
+               kpte = lookup_address((unsigned long)__va(start));
+               BUG_ON(!kpte);
+               if (executable)
+                       set_pte(kpte, pte_mkexec(*kpte));
+               else
+                       set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
+                                           __supported_pte_mask));
+               if (pte_huge(*kpte))
+                       start = (start + PMD_SIZE) & PMD_MASK;
+               else
+                       start = (start + PAGE_SIZE) & PAGE_MASK;
+       }
+}
+
+static void __init early_runtime_code_mapping_set_exec(int executable)
+{
+       efi_memory_desc_t *md;
+       void *p;
+
+       /* Make EFI runtime service code area executable */
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               if (md->type == EFI_RUNTIME_SERVICES_CODE) {
+                       unsigned long end;
+                       end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+                       early_mapping_set_exec(md->phys_addr, end, executable);
+               }
+       }
+}
+
+static void __init efi_call_phys_prelog(void) __acquires(efi_lock)
+{
+       unsigned long vaddress;
+
+       /*
+        * Lock sequence is different from normal case because
+        * efi_flags is global
+        */
+       spin_lock(&efi_lock);
+       local_irq_save(efi_flags);
+       early_runtime_code_mapping_set_exec(1);
+       vaddress = (unsigned long)__va(0x0UL);
+       pgd_val(save_pgd) = pgd_val(*pgd_offset_k(0x0UL));
+       set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+       global_flush_tlb();
+}
+
+static void __init efi_call_phys_epilog(void) __releases(efi_lock)
+{
+       /*
+        * After the lock is released, the original page table is restored.
+        */
+       set_pgd(pgd_offset_k(0x0UL), save_pgd);
+       early_runtime_code_mapping_set_exec(0);
+       global_flush_tlb();
+       local_irq_restore(efi_flags);
+       spin_unlock(&efi_lock);
+}
+
+static efi_status_t __init phys_efi_set_virtual_address_map(
+       unsigned long memory_map_size,
+       unsigned long descriptor_size,
+       u32 descriptor_version,
+       efi_memory_desc_t *virtual_map)
+{
+       efi_status_t status;
+
+       efi_call_phys_prelog();
+       status = lin2win4((void *)efi_phys.set_virtual_address_map,
+                         (u64)memory_map_size, (u64)descriptor_size,
+                         (u64)descriptor_version, (u64)virtual_map);
+       efi_call_phys_epilog();
+       return status;
+}
+
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+                                            efi_time_cap_t *tc)
+{
+       efi_status_t status;
+
+       efi_call_phys_prelog();
+       status = lin2win2((void *)efi_phys.get_time, (u64)tm, (u64)tc);
+       efi_call_phys_epilog();
+       return status;
+}
+
+/*
+ * To call this function, the irq must be disabled and rtc_lock in
+ * time.c must be held.
+ */
+int efi_set_rtc_mmss(unsigned long nowtime)
+{
+       int real_seconds, real_minutes;
+       efi_status_t    status;
+       efi_time_t      eft;
+       efi_time_cap_t  cap;
+
+       status = efi.get_time(&eft, &cap);
+       if (status != EFI_SUCCESS) {
+               printk(KERN_ERR "Oops: efitime: can't read time!\n");
+               return -1;
+       }
+
+       real_seconds = nowtime % 60;
+       real_minutes = nowtime / 60;
+       if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+               real_minutes += 30;
+       real_minutes %= 60;
+       eft.minute = real_minutes;
+       eft.second = real_seconds;
+
+       status = efi.set_time(&eft);
+       if (status != EFI_SUCCESS) {
+               printk(KERN_ERR "Oops: efitime: can't write time!\n");
+               return -1;
+       }
+       return 0;
+}
+
+/*
+ * To call this function, the irq must be disabled and rtc_lock in
+ * time.c must be held.
+ */
+unsigned long efi_get_time(void)
+{
+       efi_status_t status;
+       efi_time_t eft;
+       efi_time_cap_t cap;
+
+       status = efi.get_time(&eft, &cap);
+       if (status != EFI_SUCCESS)
+               printk(KERN_ERR "Oops: efitime: can't read time!\n");
+
+       return mktime(eft.year, eft.month, eft.day, eft.hour,
+                     eft.minute, eft.second);
+}
+
+void __init efi_reserve_bootmem(void)
+{
+       reserve_bootmem_generic((unsigned long)memmap.phys_map,
+                               memmap.nr_map * memmap.desc_size);
+}
+
+void __init parse_efi64_info(struct setup_data *setup_data,
+                            unsigned long pa_setup_data)
+{
+       struct efi64_info *efi64_info = (struct efi64_info *)setup_data->data;
+
+       memset(&efi, 0, sizeof(efi));
+       memset(&efi_phys, 0, sizeof(efi_phys));
+
+       efi_phys.systab = (efi_system_table_t *)efi64_info->efi_systab;
+       memmap.phys_map = (void *)pa_setup_data +
+               ((char *)efi64_info->efi_memmap - (char *)setup_data);
+       memmap.nr_map = efi64_info->efi_memmap_size /
+               efi64_info->efi_memdesc_size;
+       memmap.desc_version = efi64_info->efi_memdesc_version;
+       memmap.desc_size = efi64_info->efi_memdesc_size;
+
+       efi_enabled = 1;
+}
+
+void __init efi_init(void)
+{
+       efi_config_table_t *config_tables;
+       efi_runtime_services_t *runtime;
+       efi_char16_t *c16;
+       char vendor[100] = "unknown";
+       int i = 0;
+
+       efi.systab = early_ioremap((unsigned long)efi_phys.systab,
+                                  sizeof(efi_system_table_t));
+       memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
+       efi.systab = &efi_systab;
+       /*
+        * Verify the EFI Table
+        */
+       if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+               printk(KERN_ERR "Woah! EFI system table "
+                      "signature incorrect\n");
+       if ((efi.systab->hdr.revision >> 16) == 0)
+               printk(KERN_ERR "Warning: EFI system table version "
+                      "%d.%02d, expected 1.00 or greater\n",
+                      efi.systab->hdr.revision >> 16,
+                      efi.systab->hdr.revision & 0xffff);
+
+       /*
+        * Show what we know for posterity
+        */
+       c16 = early_ioremap(efi.systab->fw_vendor, 2);
+       if (!probe_kernel_address(c16, i)) {
+               for (i = 0; i < sizeof(vendor) && *c16; ++i)
+                       vendor[i] = *c16++;
+               vendor[i] = '\0';
+       }
+
+       printk(KERN_INFO "EFI v%u.%.02u by %s \n",
+              efi.systab->hdr.revision >> 16,
+              efi.systab->hdr.revision & 0xffff, vendor);
+
+       /*
+        * Let's see what config tables the firmware passed to us.
+        */
+       config_tables = early_ioremap(
+               efi.systab->tables,
+               efi.systab->nr_tables * sizeof(efi_config_table_t));
+       if (config_tables == NULL)
+               printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+
+       printk(KERN_INFO);
+       for (i = 0; i < efi.systab->nr_tables; i++) {
+               if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
+                       efi.mps = config_tables[i].table;
+                       printk(" MPS=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       ACPI_20_TABLE_GUID)) {
+                       efi.acpi20 = config_tables[i].table;
+                       printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       ACPI_TABLE_GUID)) {
+                       efi.acpi = config_tables[i].table;
+                       printk(" ACPI=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       SMBIOS_TABLE_GUID)) {
+                       efi.smbios = config_tables[i].table;
+                       printk(" SMBIOS=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       HCDP_TABLE_GUID)) {
+                       efi.hcdp = config_tables[i].table;
+                       printk(" HCDP=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       UGA_IO_PROTOCOL_GUID)) {
+                       efi.uga = config_tables[i].table;
+                       printk(" UGA=0x%lx ", config_tables[i].table);
+               }
+       }
+       printk("\n");
+
+       /*
+        * Check out the runtime services table. We need to map
+        * the runtime services table so that we can grab the physical
+        * address of several of the EFI runtime functions, needed to
+        * set the firmware into virtual mode.
+        */
+       runtime = early_ioremap((unsigned long)efi.systab->runtime,
+                               sizeof(efi_runtime_services_t));
+       if (runtime != NULL) {
+               /*
+                * We will only need *early* access to the following
+                * two EFI runtime services before set_virtual_address_map
+                * is invoked.
+                */
+               efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+               efi_phys.set_virtual_address_map =
+                       (efi_set_virtual_address_map_t 
*)runtime->set_virtual_address_map;
+               /*
+                * Make efi_get_time can be called before entering
+                * virtual mode.
+                */
+               efi.get_time = phys_efi_get_time;
+       } else
+               printk(KERN_ERR "Could not map the EFI runtime service "
+                      "table!\n");
+
+       /* Map the EFI memory map */
+       memmap.map = __va((unsigned long)memmap.phys_map);
+       memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+       if (memmap.desc_size != sizeof(efi_memory_desc_t))
+               printk(KERN_WARNING "Kernel-defined memdesc"
+                      "doesn't match the one from EFI!\n");
+
+       /* Setup for EFI runtime service */
+       reboot_type = BOOT_EFI;
+
+       if (!noefi_time) {
+               get_wallclock = efi_get_time;
+               set_wallclock = efi_set_rtc_mmss;
+       }
+}
+
+static void __init runtime_code_page_mkexec(void)
+{
+       efi_memory_desc_t *md;
+       void *p;
+
+       /* Make EFI runtime service code area executable */
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               if (md->type == EFI_RUNTIME_SERVICES_CODE)
+                       change_page_attr_addr(md->virt_addr,
+                                             md->num_pages,
+                                             PAGE_KERNEL_EXEC);
+       }
+       global_flush_tlb();
+}
+
+static void __iomem * __init efi_ioremap(unsigned long offset,
+                                        unsigned long size)
+{
+       static unsigned pages_mapped;
+       unsigned long last_addr;
+       unsigned i, pages;
+
+       last_addr = offset + size - 1;
+       offset &= PAGE_MASK;
+       pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+       if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+               return NULL;
+
+       for (i = 0; i < pages; i++) {
+               set_fixmap_nocache(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
+                                  offset);
+               offset += PAGE_SIZE;
+               pages_mapped++;
+       }
+
+       return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
+                                            (pages_mapped - pages));
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor and update
+ * that memory descriptor with the virtual address obtained from ioremap().
+ * This enables the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+       efi_memory_desc_t *md;
+       efi_status_t status;
+       unsigned long end;
+       void *p;
+
+       efi.systab = NULL;
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               if (!(md->attribute & EFI_MEMORY_RUNTIME))
+                       continue;
+               if (md->attribute & EFI_MEMORY_WB)
+                       md->virt_addr = (unsigned long)__va(md->phys_addr);
+               else if (md->attribute & (EFI_MEMORY_UC | EFI_MEMORY_WC))
+                       md->virt_addr = (unsigned long)
+                               efi_ioremap(md->phys_addr,
+                                           md->num_pages << EFI_PAGE_SHIFT);
+               end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+               if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
+                   ((unsigned long)efi_phys.systab < end))
+                       efi.systab = (efi_system_table_t *)
+                               (md->virt_addr - md->phys_addr +
+                                (unsigned long)efi_phys.systab);
+       }
+
+       BUG_ON(!efi.systab);
+
+       status = phys_efi_set_virtual_address_map(
+               memmap.desc_size * memmap.nr_map,
+               memmap.desc_size,
+               memmap.desc_version,
+               memmap.phys_map);
+
+       if (status != EFI_SUCCESS) {
+               printk(KERN_ALERT "You are screwed! "
+                      "Unable to switch EFI into virtual mode "
+                      "(status=%lx)\n", status);
+               panic("EFI call to SetVirtualAddressMap() failed!");
+       }
+
+       /*
+        * Now that EFI is in virtual mode, update the function
+        * pointers in the runtime service table to the new virtual addresses.
+        *
+        * Call EFI services through wrapper functions.
+        */
+       efi.get_time = (efi_get_time_t *)_efi_get_time;
+       efi.set_time = (efi_set_time_t *)_efi_set_time;
+       efi.get_wakeup_time = (efi_get_wakeup_time_t *)_efi_get_wakeup_time;
+       efi.set_wakeup_time = (efi_set_wakeup_time_t *)_efi_set_wakeup_time;
+       efi.get_variable = (efi_get_variable_t *)_efi_get_variable;
+       efi.get_next_variable = (efi_get_next_variable_t *)
+               _efi_get_next_variable;
+       efi.set_variable = (efi_set_variable_t *)_efi_set_variable;
+       efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
+               _efi_get_next_high_mono_count;
+       efi.reset_system = (efi_reset_system_t *)_efi_reset_system;
+       efi.set_virtual_address_map = (efi_set_virtual_address_map_t *)
+               _efi_set_virtual_address_map;
+
+       runtime_code_page_mkexec();
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+u32 efi_mem_type(unsigned long phys_addr)
+{
+       efi_memory_desc_t *md;
+       void *p;
+
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               if ((md->phys_addr <= phys_addr) &&
+                   (phys_addr < (md->phys_addr +
+                                 (md->num_pages << EFI_PAGE_SHIFT))))
+                       return md->type;
+       }
+       return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+       efi_memory_desc_t *md;
+       void *p;
+
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               if ((md->phys_addr <= phys_addr) &&
+                   (phys_addr < (md->phys_addr +
+                                 (md->num_pages << EFI_PAGE_SHIFT))))
+                       return md->attribute;
+       }
+       return 0;
+}
Index: linux-2.6.23-rc4/arch/x86_64/kernel/efi_callwrap.S
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc4/arch/x86_64/kernel/efi_callwrap.S  2007-09-17 
15:03:47.000000000 +0800
@@ -0,0 +1,69 @@
+/*
+ * linux/arch/x86_64/kernel/efi_callwrap.S -- Function calling ABI
+ *     conversion from SYSV to Windows for x86_64
+ *
+ * Copyright (C) 2007 Intel Corp
+ *     Bibo Mao <[EMAIL PROTECTED]>
+ *     Huang Ying <[EMAIL PROTECTED]>
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(lin2win0)
+       subq $40, %rsp
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win1)
+       subq $40, %rsp
+       mov  %rsi, %rcx
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win2)
+       subq $40, %rsp
+       mov  %rsi, %rcx
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win3)
+       subq $40, %rsp
+       mov  %rcx, %r8
+       mov  %rsi, %rcx
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win4)
+       subq $40, %rsp
+       mov %r8, %r9
+       mov %rcx, %r8
+       mov %rsi, %rcx
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win5)
+       subq $40, %rsp
+       mov %r9, 32(%rsp)
+       mov %r8, %r9
+       mov %rcx, %r8
+       mov %rsi, %rcx
+       call *%rdi
+       addq $40, %rsp
+       ret
+
+ENTRY(lin2win6)
+       subq $56, %rsp
+       mov 56+8(%rsp), %rax
+       mov %r9, 32(%rsp)
+       mov %rax, 40(%rsp)
+       mov %r8, %r9
+       mov %rcx, %r8
+       mov %rsi, %rcx
+       call *%rdi
+       addq $56, %rsp
+       ret
Index: linux-2.6.23-rc4/arch/x86_64/kernel/setup.c
===================================================================
--- linux-2.6.23-rc4.orig/arch/x86_64/kernel/setup.c    2007-09-17 
15:02:33.000000000 +0800
+++ linux-2.6.23-rc4/arch/x86_64/kernel/setup.c 2007-09-17 15:03:47.000000000 
+0800
@@ -44,6 +44,7 @@
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/efi.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -63,6 +64,7 @@
 #include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/dmi.h>
+#include <asm/efi.h>
 
 /*
  * Machine setup..
@@ -230,6 +232,9 @@
        while (pa_setup_data) {
                setup_data = early_ioremap(pa_setup_data, PAGE_SIZE);
                switch (setup_data->type) {
+               case SETUP_EFI64_INFO:
+                       parse_efi64_info(setup_data, pa_setup_data);
+                       break;
                default:
                        break;
                }
@@ -292,6 +297,8 @@
        discover_ebda();
 
        init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+       if (efi_enabled)
+               efi_init();
 
        dmi_scan_machine();
 
@@ -384,6 +391,10 @@
         */
        acpi_reserve_bootmem();
 #endif
+
+       if (efi_enabled)
+               efi_reserve_bootmem();
+
        /*
         * Find and reserve possible boot-time SMP configuration:
         */
@@ -459,7 +470,8 @@
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-       conswitchp = &vga_con;
+       if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+               conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
        conswitchp = &dummy_con;
 #endif
Index: linux-2.6.23-rc4/include/asm-x86_64/fixmap.h
===================================================================
--- linux-2.6.23-rc4.orig/include/asm-x86_64/fixmap.h   2007-09-17 
15:02:33.000000000 +0800
+++ linux-2.6.23-rc4/include/asm-x86_64/fixmap.h        2007-09-17 
15:03:47.000000000 +0800
@@ -15,6 +15,7 @@
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <asm/vsyscall.h>
+#include <asm/efi.h>
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -41,6 +42,8 @@
        FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
        FIX_IO_APIC_BASE_0,
        FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+       FIX_EFI_IO_MAP_LAST_PAGE,
+       FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
        __end_of_fixed_addresses
 };
 
Index: linux-2.6.23-rc4/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.23-rc4.orig/arch/x86_64/Kconfig   2007-09-17 15:02:33.000000000 
+0800
+++ linux-2.6.23-rc4/arch/x86_64/Kconfig        2007-09-17 15:03:47.000000000 
+0800
@@ -280,6 +280,17 @@
        depends on SMP && !MK8
        default y
 
+config EFI
+       bool "EFI runtime service support (EXPERIMENTAL)"
+       ---help---
+         This enables the kernel to use any EFI runtime services that are
+         available (such as the EFI variable services).
+         This option is only useful on systems that have EFI firmware
+         and will result in a kernel image that is ~8k larger. However,
+         even with this option, the resultant kernel should continue to
+         boot on existing non-EFI platforms. For more information on
+         how to set up [U]EFI64 system, see Documentation/x86_64/uefi.txt.
+
 config MATH_EMULATION
        bool
 
Index: linux-2.6.23-rc4/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.23-rc4.orig/arch/x86_64/kernel/Makefile   2007-09-17 
15:02:33.000000000 +0800
+++ linux-2.6.23-rc4/arch/x86_64/kernel/Makefile        2007-09-17 
15:03:47.000000000 +0800
@@ -39,6 +39,7 @@
 obj-$(CONFIG_K8_NB)            += k8.o
 obj-$(CONFIG_AUDIT)            += audit.o
 obj-$(CONFIG_STACK_UNWIND)     += unwind.o
+obj-$(CONFIG_EFI)              += efi.o efi_callwrap.o
 
 obj-$(CONFIG_MODULES)          += module.o
 obj-$(CONFIG_PCI)              += early-quirks.o
Index: linux-2.6.23-rc4/include/asm-i386/bootparam.h
===================================================================
--- linux-2.6.23-rc4.orig/include/asm-i386/bootparam.h  2007-09-17 
15:02:33.000000000 +0800
+++ linux-2.6.23-rc4/include/asm-i386/bootparam.h       2007-09-17 
15:03:47.000000000 +0800
@@ -11,6 +11,7 @@
 
 /* setup data types */
 #define SETUP_NONE                     0
+#define SETUP_EFI64_INFO               1
 
 /* extensible setup data list node */
 struct setup_data {
@@ -101,4 +102,13 @@
        u8  _pad9[276];                                 /* 0xeec */
 } __attribute__((packed));
 
+struct efi64_info {
+       u64 efi_systab;
+       u32 efi_memmap_size;
+       u32 efi_memdesc_size;
+       u32 efi_memdesc_version;
+       u32 _pad1;
+       u8  efi_memmap[0];
+} __attribute__((packed));
+
 #endif /* _ASM_BOOTPARAM_H */
Index: linux-2.6.23-rc4/include/asm-x86_64/efi.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc4/include/asm-x86_64/efi.h   2007-09-17 15:03:47.000000000 
+0800
@@ -0,0 +1,18 @@
+#ifndef __ASM_X86_64_EFI_H
+#define __ASM_X86_64_EFI_H
+
+#include <asm/bootparam.h>
+
+#define MAX_EFI_IO_PAGES       100
+
+extern void efi_reserve_bootmem(void);
+#ifdef CONFIG_EFI
+extern void parse_efi64_info(struct setup_data *setup_data,
+                            unsigned long pa_setup_data);
+#else
+static inline void parse_efi64_info(struct setup_data *setup_data,
+                                   unsigned long pa_setup_data)
+{ }
+#endif
+
+#endif
Index: linux-2.6.23-rc4/arch/x86_64/kernel/reboot.c
===================================================================
--- linux-2.6.23-rc4.orig/arch/x86_64/kernel/reboot.c   2007-09-17 
15:02:32.000000000 +0800
+++ linux-2.6.23-rc4/arch/x86_64/kernel/reboot.c        2007-09-17 
15:03:47.000000000 +0800
@@ -9,6 +9,7 @@
 #include <linux/pm.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/efi.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/delay.h>
@@ -27,20 +28,17 @@
 EXPORT_SYMBOL(pm_power_off);
 
 static long no_idt[3];
-static enum { 
-       BOOT_TRIPLE = 't',
-       BOOT_KBD = 'k',
-       BOOT_ACPI = 'a'
-} reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_KBD;
 static int reboot_mode = 0;
 int reboot_force;
 
-/* reboot=t[riple] | k[bd] [, [w]arm | [c]old]
+/* reboot=t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
    triple Force a triple fault (init)
    kbd    Use the keyboard controller. cold reset (default)
    acpi   Use the RESET_REG in the FADT
+   efi    Use efi reset_system runtime service
    force  Avoid anything that could hang.
  */ 
 static int __init reboot_setup(char *str)
@@ -59,6 +57,7 @@
                case 'a':
                case 'b':
                case 'k':
+               case 'e':
                        reboot_type = *str;
                        break;
                case 'f':
@@ -151,7 +150,14 @@
                        acpi_reboot();
                        reboot_type = BOOT_KBD;
                        break;
-               }      
+
+               case BOOT_EFI:
+                       if (efi_enabled)
+                               efi.reset_system(reboot_mode ? EFI_RESET_WARM : 
EFI_RESET_COLD,
+                                                EFI_SUCCESS, 0, NULL);
+                       reboot_type = BOOT_KBD;
+                       break;
+               }
        }      
 }
 
Index: linux-2.6.23-rc4/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.23-rc4.orig/arch/x86_64/kernel/time.c     2007-09-17 
15:02:32.000000000 +0800
+++ linux-2.6.23-rc4/arch/x86_64/kernel/time.c  2007-09-17 15:03:47.000000000 
+0800
@@ -27,6 +27,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/kallsyms.h>
+#include <linux/efi.h>
 #include <linux/acpi.h>
 #include <linux/clockchips.h>
 
@@ -47,12 +48,19 @@
 #include <asm/mpspec.h>
 #include <asm/nmi.h>
 #include <asm/vgtod.h>
+#include <asm/time.h>
 
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 
+static int set_rtc_mmss(unsigned long nowtime);
+static unsigned long read_cmos_clock(void);
+
+unsigned long (*get_wallclock)(void) = read_cmos_clock;
+int (*set_wallclock)(unsigned long nowtime) = set_rtc_mmss;
+
 unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
@@ -86,13 +94,6 @@
        unsigned char control, freq_select;
 
 /*
- * IRQs are disabled when we're called from the timer interrupt,
- * no need for spin_lock_irqsave()
- */
-
-       spin_lock(&rtc_lock);
-
-/*
  * Tell the clock it's being set and stop it.
  */
 
@@ -140,14 +141,23 @@
        CMOS_WRITE(control, RTC_CONTROL);
        CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
 
-       spin_unlock(&rtc_lock);
-
        return retval;
 }
 
 int update_persistent_clock(struct timespec now)
 {
-       return set_rtc_mmss(now.tv_sec);
+       int retval;
+
+/*
+ * IRQs are disabled when we're called from the timer interrupt,
+ * no need for spin_lock_irqsave()
+ */
+
+       spin_lock(&rtc_lock);
+       retval = set_wallclock(now.tv_sec);
+       spin_unlock(&rtc_lock);
+
+       return retval;
 }
 
 static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
@@ -157,14 +167,11 @@
        return IRQ_HANDLED;
 }
 
-unsigned long read_persistent_clock(void)
+static unsigned long read_cmos_clock(void)
 {
        unsigned int year, mon, day, hour, min, sec;
-       unsigned long flags;
        unsigned century = 0;
 
-       spin_lock_irqsave(&rtc_lock, flags);
-
        do {
                sec = CMOS_READ(RTC_SECONDS);
                min = CMOS_READ(RTC_MINUTES);
@@ -179,8 +186,6 @@
 #endif
        } while (sec != CMOS_READ(RTC_SECONDS));
 
-       spin_unlock_irqrestore(&rtc_lock, flags);
-
        /*
         * We know that x86-64 always uses BCD format, no need to check the
         * config register.
@@ -208,6 +213,17 @@
        return mktime(year, mon, day, hour, min, sec);
 }
 
+unsigned long read_persistent_clock(void)
+{
+       unsigned long flags, retval;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       retval = get_wallclock();
+       spin_unlock_irqrestore(&rtc_lock, flags);
+
+       return retval;
+}
+
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
Index: linux-2.6.23-rc4/include/asm-x86_64/emergency-restart.h
===================================================================
--- linux-2.6.23-rc4.orig/include/asm-x86_64/emergency-restart.h        
2007-09-17 15:02:32.000000000 +0800
+++ linux-2.6.23-rc4/include/asm-x86_64/emergency-restart.h     2007-09-17 
15:03:47.000000000 +0800
@@ -1,6 +1,15 @@
 #ifndef _ASM_EMERGENCY_RESTART_H
 #define _ASM_EMERGENCY_RESTART_H
 
+enum reboot_type {
+       BOOT_TRIPLE = 't',
+       BOOT_KBD = 'k',
+       BOOT_ACPI = 'a',
+       BOOT_EFI = 'e'
+};
+
+extern enum reboot_type reboot_type;
+
 extern void machine_emergency_restart(void);
 
 #endif /* _ASM_EMERGENCY_RESTART_H */
Index: linux-2.6.23-rc4/include/asm-x86_64/time.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc4/include/asm-x86_64/time.h  2007-09-17 15:03:47.000000000 
+0800
@@ -0,0 +1,7 @@
+#ifndef __ASM_X86_64_TIME_H
+#define __ASM_X86_64_TIME_H
+
+extern unsigned long (*get_wallclock)(void);
+extern int (*set_wallclock)(unsigned long nowtime);
+
+#endif
Index: linux-2.6.23-rc4/Documentation/x86_64/boot-options.txt
===================================================================
--- linux-2.6.23-rc4.orig/Documentation/x86_64/boot-options.txt 2007-09-17 
15:02:31.000000000 +0800
+++ linux-2.6.23-rc4/Documentation/x86_64/boot-options.txt      2007-09-17 
15:03:47.000000000 +0800
@@ -110,7 +110,7 @@
 
 Rebooting
 
-   reboot=b[ios] | t[riple] | k[bd] | a[cpi] [, [w]arm | [c]old]
+   reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
    bios          Use the CPU reboot vector for warm reset
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
@@ -119,6 +119,9 @@
    acpi   Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
           ACPI reset does not work, the reboot path attempts the reset using
           the keyboard controller.
+   efi    Use efi reset_system runtime service. If EFI is not configured or the
+          EFI reset does not work, the reboot path attempts the reset using
+          the keyboard controller.
 
    Using warm reset will be much faster especially on big memory
    systems because the BIOS will not go through the memory check.
@@ -303,4 +306,11 @@
                newfallback: use new unwinder but fall back to old if it gets
                        stuck (default)
 
+EFI
+
+  noefi                Disable EFI support
+
+  noefi_time   Disable EFI time runtime service, programming CMOS
+               hardware directly
+
 Miscellaneous
Index: linux-2.6.23-rc4/Documentation/i386/boot.txt
===================================================================
--- linux-2.6.23-rc4.orig/Documentation/i386/boot.txt   2007-09-17 
15:03:46.000000000 +0800
+++ linux-2.6.23-rc4/Documentation/i386/boot.txt        2007-09-17 
15:03:47.000000000 +0800
@@ -793,6 +793,21 @@
 
 **** SETUP DATA TYPES
 
+Type name:     UEFI 64 information
+Protocol:      2.07+
+
+  The UEFI 64 firmware information, including UEFI system table and memory map
+  information. The definition is as follow:
+
+  struct efi64_info {
+          u64 efi_systab;              /* EFI system table pointer */
+          u32 efi_memmap_size;         /* EFI memory descriptor map size */
+          u32 efi_memdesc_size;                /* EFI memory descriptor size */
+          u32 efi_memdesc_version;     /* EFI memory descriptor version */
+          u32 _pad1;
+          u8  efi_memmap[0];           /* EFI memory descriptor map */
+  } __attribute__((packed));
+
 
 **** 32-bit BOOT PROTOCOL
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to