date:20071206

[PATCH 4/4 -mm] kexec based hibernation -v7 : kimgcore

2007-12-06 Thread Huang, Ying

This patch adds a file in proc file system to access the loaded
kexec_image, which may contains the memory image of kexeced
system. This can be used by kexec based hibernation to create a file
image of hibernating kernel, so that a kernel booting process is not
needed for each hibernating.

Signed-off-by: Huang Ying <[EMAIL PROTECTED]>

---
 fs/proc/Makefile  |1 
 fs/proc/kimgcore.c|  204 ++
 fs/proc/proc_misc.c   |5 +
 include/linux/kexec.h |7 +
 kernel/kexec.c|5 -
 5 files changed, 217 insertions(+), 5 deletions(-)

--- /dev/null
+++ b/fs/proc/kimgcore.c
@@ -0,0 +1,204 @@
+/*
+ * fs/proc/kimgcore.c - Interface for accessing the loaded
+ * kexec_image, which may contains the memory image of kexeced system.
+ * Heavily borrowed from fs/proc/kcore.c
+ *
+ * Copyright (C) 2007, Intel Corp.
+ *  Huang Ying <[EMAIL PROTECTED]>
+ *
+ * This file is released under the GPLv2
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct proc_dir_entry *proc_root_kimgcore;
+
+static u32 kimgcore_size;
+
+static char *elfcorebuf;
+static size_t elfcorebuf_sz;
+
+static void *buf_page;
+
+static ssize_t kimage_copy_to_user(struct kimage *image, char __user *buf,
+ unsigned long offset, size_t count)
+{
+   kimage_entry_t *ptr, entry;
+   unsigned long off = 0, offinp, trunk;
+   struct page *page;
+   void *vaddr;
+
+   for_each_kimage_entry(image, ptr, entry) {
+   if (!(entry & IND_SOURCE))
+   continue;
+   if (off + PAGE_SIZE > offset) {
+   offinp = offset - off;
+   if (count > PAGE_SIZE - offinp)
+   trunk = PAGE_SIZE - offinp;
+   else
+   trunk = count;
+   page = pfn_to_page(entry >> PAGE_SHIFT);
+   if (PageHighMem(page)) {
+   vaddr = kmap(page);
+   memcpy(buf_page, vaddr+offinp, trunk);
+   kunmap(page);
+   vaddr = buf_page;
+   } else
+   vaddr = __va(entry & PAGE_MASK) + offinp;
+   if (copy_to_user(buf, vaddr, trunk))
+   return -EFAULT;
+   buf += trunk;
+   offset += trunk;
+   count -= trunk;
+   if (!count)
+   break;
+   }
+   off += PAGE_SIZE;
+   }
+   return count;
+}
+
+static ssize_t read_kimgcore(struct file *file, char __user *buffer,
+size_t buflen, loff_t *fpos)
+{
+   size_t acc = 0;
+   size_t tsz;
+   ssize_t ssz;
+
+   if (buflen == 0 || *fpos >= kimgcore_size)
+   return 0;
+
+   /* trim buflen to not go beyond EOF */
+   if (buflen > kimgcore_size - *fpos)
+   buflen = kimgcore_size - *fpos;
+   /* Read ELF core header */
+   if (*fpos < elfcorebuf_sz) {
+   tsz = elfcorebuf_sz - *fpos;
+   if (buflen < tsz)
+   tsz = buflen;
+   if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+   return -EFAULT;
+   buflen -= tsz;
+   *fpos += tsz;
+   buffer += tsz;
+   acc += tsz;
+
+   /* leave now if filled buffer already */
+   if (buflen == 0)
+   return acc;
+   }
+
+   ssz = kimage_copy_to_user(kexec_image, buffer,
+ *fpos - elfcorebuf_sz, buflen);
+   if (ssz < 0)
+   return ssz;
+
+   *fpos += (buflen - ssz);
+   acc += (buflen - ssz);
+
+   return acc;
+}
+
+static int init_kimgcore(void)
+{
+   Elf64_Ehdr *ehdr;
+   Elf64_Phdr *phdr;
+   struct kexec_segment *seg;
+   Elf64_Off off;
+   unsigned long i;
+
+   elfcorebuf_sz = sizeof(Elf64_Ehdr) +
+   kexec_image->nr_segments * sizeof(Elf64_Phdr);
+   elfcorebuf = kzalloc(elfcorebuf_sz, GFP_KERNEL);
+   if (!elfcorebuf)
+   return -ENOMEM;
+   ehdr = (Elf64_Ehdr *)elfcorebuf;
+   memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+   ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+   ehdr->e_ident[EI_DATA]  = ELFDATA2LSB;
+   ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+   ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+   memset(ehdr->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+   ehdr->e_type = ET_CORE;
+   ehdr->e_machine = ELF_ARCH;
+   ehdr->e_version = EV_CURRENT;
+   ehdr->e_entry = kexec_image->start;
+   ehdr->e_phoff = sizeof(Elf64_Ehdr);
+   ehdr->e_shoff = 0;
+   ehdr->e_flags = 0;
+   ehdr->e_ehsize =

[PATCH 3/4 -mm] kexec based hibernation -v7 : kexec hibernate/resume

2007-12-06 Thread Huang, Ying

This patch implements kexec based hibernate/resume. This is based on
the facility provided by kexec_jump. The states save/restore code of
ordinary kexec_jump is overridden by hibernate/resume specific
code. The ACPI methods are called at specified environment to conform
the ACPI specification. A new reboot command is added to go to ACPI S4
state from user space.

Signed-off-by: Huang Ying <[EMAIL PROTECTED]>

---
 include/linux/kexec.h   |4 
 include/linux/reboot.h  |1 
 include/linux/suspend.h |1 
 kernel/power/disk.c |  244 +++-
 kernel/sys.c|5 
 5 files changed, 251 insertions(+), 4 deletions(-)

--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "power.h"
 
@@ -365,13 +366,13 @@ int hibernation_platform_enter(void)
 }
 
 /**
- * power_down - Shut the machine down for hibernation.
+ * hibernate_power_down - Shut the machine down for hibernation.
  *
  * Use the platform driver, if configured so; otherwise try
  * to power off or reboot.
  */
 
-static void power_down(void)
+void hibernate_power_down(void)
 {
switch (hibernation_mode) {
case HIBERNATION_TEST:
@@ -461,7 +462,7 @@ int hibernate(void)
error = swsusp_write(flags);
swsusp_free();
if (!error)
-   power_down();
+   hibernate_power_down();
} else {
pr_debug("PM: Image restored successfully.\n");
swsusp_free();
@@ -478,6 +479,243 @@ int hibernate(void)
return error;
 }
 
+#ifdef CONFIG_KEXEC
+static int kexec_snapshot(struct notifier_block *nb,
+ unsigned long cmd, void *arg)
+{
+   int error;
+   int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
+
+   if (cmd != KJUMP_CMD_HIBERNATE_WRITE_IMAGE)
+   return NOTIFY_DONE;
+
+   pm_prepare_console();
+
+   error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+   if (error)
+   goto Exit;
+
+   error = freeze_processes();
+   if (error) {
+   error = -EBUSY;
+   goto Exit;
+   }
+
+   if (hibernation_test(TEST_FREEZER) ||
+   hibernation_testmode(HIBERNATION_TESTPROC)) {
+   error = -EAGAIN;
+   goto Resume_process;
+   }
+
+   error = platform_start(platform_mode);
+   if (error)
+   goto Resume_process;
+
+   suspend_console();
+   error = device_suspend(PMSG_FREEZE);
+   if (error)
+   goto Resume_console;
+
+   if (hibernation_test(TEST_DEVICES)) {
+   error = -EAGAIN;
+   goto Resume_devices;
+   }
+
+   error = platform_pre_snapshot(platform_mode);
+   if (error)
+   goto Resume_devices;
+
+   if (hibernation_test(TEST_PLATFORM)) {
+   error = -EAGAIN;
+   goto Resume_devices;
+   }
+
+   error = disable_nonboot_cpus();
+   if (error)
+   goto Resume_devices;
+
+   if (hibernation_test(TEST_CPUS) ||
+   hibernation_testmode(HIBERNATION_TEST)) {
+   error = -EAGAIN;
+   goto Enable_cpus;
+   }
+
+   local_irq_disable();
+   /* At this point, device_suspend() has been called, but *not*
+* device_power_down(). We *must* device_power_down() now.
+* Otherwise, drivers for some devices (e.g. interrupt
+* controllers) become desynchronized with the actual state of
+* the hardware at resume time, and evil weirdness ensues.
+*/
+   error = device_power_down(PMSG_FREEZE);
+   if (error)
+   goto Enable_irqs;
+
+   if (hibernation_test(TEST_CORE)) {
+   error = -EAGAIN;
+   goto Power_up;
+   }
+
+   return NOTIFY_STOP;
+
+ Power_up:
+   device_power_up();
+ Enable_irqs:
+   local_irq_enable();
+ Enable_cpus:
+   enable_nonboot_cpus();
+ Resume_devices:
+   platform_finish(platform_mode);
+   device_resume();
+ Resume_console:
+   resume_console();
+ Resume_process:
+   thaw_processes();
+ Exit:
+   pm_notifier_call_chain(PM_POST_HIBERNATION);
+   pm_restore_console();
+   return notifier_from_errno(error);
+}
+
+static int kexec_prepare_write_image(struct notifier_block *nb,
+unsigned long cmd, void *arg)
+{
+   int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
+
+   if (cmd != KJUMP_CMD_HIBERNATE_WRITE_IMAGE)
+   return NOTIFY_DONE;
+
+   device_power_up();
+   local_irq_enable();
+   enable_nonboot_cpus();
+   platform_finish(platform_mode);
+   device_resume();
+   resume_console();
+   thaw_processes();
+   pm_restore_console();
+   return NOTIFY_STOP;
+}
+
+static int kexec_prepare_resume(struct

[PATCH 1/4 -mm] kexec based hibernation -v7 : kexec jump

2007-12-06 Thread Huang, Ying

This patch implements the functionality of jumping between the kexeced
kernel and the original kernel.

To support jumping between two kernels, before jumping to (executing)
the new kernel and jumping back to the original kernel, the devices
are put into quiescent state, and the state of devices and CPU is
saved. After jumping back from kexeced kernel and jumping to the new
kernel, the state of devices and CPU are restored accordingly. The
devices/CPU state save/restore code of software suspend is called to
implement corresponding function.

To support jumping without reserving memory. One shadow backup page
(source page) is allocated for each page used by new (kexeced) kernel
(destination page). When do kexec_load, the image of new kernel is
loaded into source pages, and before executing, the destination pages
and the source pages are swapped, so the contents of destination pages
are backupped. Before jumping to the new (kexeced) kernel and after
jumping back to the original kernel, the destination pages and the
source pages are swapped too.

A jump back protocol for kexec is defined and documented. It is an
extension to ordinary function calling protocol. So, the facility
provided by this patch can be used to call ordinary C function in real
mode.

A set of flags for sys_kexec_load are added to control which state are
saved/restored before/after real mode code executing. For example, you
can specify the device state and FPU state are saved/restored
before/after real mode code executing.

The states (exclude CPU state) save/restore code can be overridden
based on the "command" parameter of kexec jump. Because more states
need to be saved/restored by hibernating/resuming.

Signed-off-by: Huang Ying <[EMAIL PROTECTED]>

---
 Documentation/i386/jump_back_protocol.txt |  103 ++
 arch/powerpc/kernel/machine_kexec.c   |2 
 arch/ppc/kernel/machine_kexec.c   |2 
 arch/sh/kernel/machine_kexec.c|2 
 arch/x86/kernel/machine_kexec_32.c|   88 +---
 arch/x86/kernel/machine_kexec_64.c|2 
 arch/x86/kernel/relocate_kernel_32.S  |  214 +++---
 include/asm-x86/kexec_32.h|   39 -
 include/linux/kexec.h |   40 +
 kernel/kexec.c|  188 ++
 kernel/power/Kconfig  |2 
 kernel/sys.c  |   35 +++-
 12 files changed, 648 insertions(+), 69 deletions(-)

--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -83,10 +84,14 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Currently nothing.
+ * Turn off NX bit for control page.
  */
 int machine_kexec_prepare(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image->control_code_page, 1, PAGE_KERNEL_EXEC);
+   global_flush_tlb();
+   }
return 0;
 }
 
@@ -96,25 +101,59 @@ int machine_kexec_prepare(struct kimage 
  */
 void machine_kexec_cleanup(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image->control_code_page, 1, PAGE_KERNEL);
+   global_flush_tlb();
+   }
+}
+
+void machine_kexec(struct kimage *image)
+{
+   machine_kexec_call(image, NULL, 0);
 }
 
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+int machine_kexec_vcall(struct kimage *image, unsigned long *ret,
+unsigned int argc, va_list args)
 {
unsigned long page_list[PAGES_NR];
void *control_page;
+   asmlinkage NORET_TYPE void
+   (*relocate_kernel_ptr)(unsigned long indirection_page,
+  unsigned long control_page,
+  unsigned long start_address,
+  unsigned int has_pae) ATTRIB_NORET;
 
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
 
control_page = page_address(image->control_code_page);
-   memcpy(control_page, relocate_kernel, PAGE_SIZE);
+   memcpy(control_page, relocate_page, PAGE_SIZE/2);
+   KCALL_MAGIC(control_page) = 0;
 
+   if (image->preserve_cpu) {
+   unsigned int i;
+   KCALL_MAGIC(control_page) = KCALL_MAGIC_NUMBER;
+   KCALL_ARGC(control_page) = argc;
+   for (i = 0; i < argc; i++)
+   KCALL_ARGS(control_page)[i] = \
+   va_arg(args, unsigned long);
+
+   if (kexec_call_save_cpu(control_page)) {
+

[PATCH 0/4 -mm] kexec based hibernation -v7

2007-12-06 Thread Huang, Ying

khibernation - kexec based hibernation

Kexec base hibernation has some potential advantages over u/swsusp and
TuxOnIce (suspend2). Some of them are as follow:

1. The hibernation image size can exceed half of memory size
easily. This is possible with TuxOnIce, but impossible with
u/swsusp.

2. It is possible to eliminate freezer from kexec based hibernation
implementation, after corresponding changes to device drivers are
done.

3. Based on kexec/kdump implementation, the kernel code needed is
less.

Now, only the i386 architecture is supported. The patchset is based on
Linux kernel 2.6.24-rc4-mm1, and has been tested on IBM T42 with ACPI
on and off.

The following user-space tools are needed to implement hibernation and
resume.

1. kexec-tools needs to be patched to support khibernation. The patches
and the precompiled kexec can be download from the following URL:
source:
http://khibernation.sourceforge.net/download/release_v7/kexec-tools/kexec-tools-src_git_kh7.tar.bz2
patches:
http://khibernation.sourceforge.net/download/release_v7/kexec-tools/kexec-tools-patches_git_kh7.tar.bz2
binary:
http://khibernation.sourceforge.net/download/release_v7/kexec-tools/kexec_git_kh7

2. makedumpfile with patches are used as memory image saving tool, it
can exclude free pages from hibernation image file. The patches and
the precompiled makedumpfile can be download from the following
URL:
source:
http://khibernation.sourceforge.net/download/release_v7/makedumpfile/makedumpfile-src_cvs_kh7.tar.bz2
patches:
http://khibernation.sourceforge.net/download/release_v7/makedumpfile/makedumpfile-patches_cvs_kh7.tar.bz2
binary:
http://khibernation.sourceforge.net/download/release_v7/makedumpfile/makedumpfile_cvs_kh7

3. A simplest memory image restoring tool named "krestore" is
implemented. It can be downloaded from the following URL:
source:
http://khibernation.sourceforge.net/download/release_v7/krestore/krestore-src_cvs_kh7.tar.bz2
binary:
http://khibernation.sourceforge.net/download/release_v7/krestore/krestore_cvs_kh7

4. A simplest jumping back image (hibernation image is a kind of
jumping back image) parameter dump tool name "kjump_back_param" is
implemented. It can be download from the following URL:
source:
http://khibernation.sourceforge.net/download/release_v7/kjump_back_param/kjump_back_param-src_cvs_kh7.tar.bz2
binary:
http://khibernation.sourceforge.net/download/release_v7/kjump_back_param/kjump_back_param_cvs_kh7

An initramfs image is used as the root file system of
hibernating/resuming kernel. So, all user space tools above are needed
in initramfs. An initramfs image built with "BuildRoot" can be
downloaded from the following URL:
initramfs image:
http://khibernation.sourceforge.net/download/release_v7/initramfs/rootfs_cvs_kh7.gz

Usage:

1. Compile and install patched kernel with following options selected:

CONFIG_X86_32=y
CONFIG_RELOCATABLE=y # not needed strictly, but it is more convenient with it
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y # only needed by kexeced kernel to save/restore memory image
CONFIG_PM=y
CONFIG_HIBERNATION=y

2. Build an initramfs image contains all needed user space tools. Or,
download the pre-built initramfs image.

3. Prepare a raw partition (without file system, called hibernating
partition in following text) used to store memory image of
hibernated kernel. Caution!!! The contents of the partition will be
overwritten during hibernating.

4. Build a memory image of hibernating kernel. This memory image need
only to be built once unless the hardware configuration is changed.

4.1. Boot kernel compiled for normal usage (kernel A).

4.2. Load kernel compiled for hibernating/resuming usage (kernel B)
with kexec, the same kernel as that of 4 can be used if
CONFIG_RELOCATABLE=y and CONFIG_CRASH_DUMP=y are selected.

The --elf64-core-headers should be specified in command line
of kexec, because only the 64bit ELF is supported by krestore
tool. The "go_to_hibernate" should be specified in kernel
command line to trigger the hibernating automatically. The
major and minor device number of hibernating partition should
be specified in kernel command line through "khdev=:".

For example, the shell command line can be as follow:

kexec --load-jump-back /boot/bzImage --mem-min=0x10
--mem-max=0xff --elf64-core-headers --append="khdev=3:7
go_to_hibernate"

4.3. Jump to the hibernating kernel (kernel B) with following shell
command line:

kexec -e

4.4. After normal boot process, the hibernation kernel will jump
back to hibernated kernel automatically. Then, the memory
image of hibernating kernel can be built with following shell
command line.

1 2 3 4 5 6 7 8 9 >

1 - 100 of 854 matches

Mail list logo